java.io.ByteArrayInputStream Scala Example

Source File: AvroDataToCatalyst.scala From spark-schema-registry with Apache License 2.0

6 votes

package com.hortonworks.spark.registry.avro

import java.io.ByteArrayInputStream

import com.hortonworks.registries.schemaregistry.{SchemaVersionInfo, SchemaVersionKey}
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import org.apache.avro.Schema
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.types.{BinaryType, DataType}

import scala.collection.JavaConverters._


case class AvroDataToCatalyst(child: Expression, schemaName: String, version: Option[Int], config: Map[String, Object])
  extends UnaryExpression with ExpectsInputTypes {

  override def inputTypes = Seq(BinaryType)

  @transient private lazy val srDeser: AvroSnapshotDeserializer = {
    val obj = new AvroSnapshotDeserializer()
    obj.init(config.asJava)
    obj
  }

  @transient private lazy val srSchema = fetchSchemaVersionInfo(schemaName, version)

  @transient private lazy val avroSchema = new Schema.Parser().parse(srSchema.getSchemaText)

  override lazy val dataType: DataType = SchemaConverters.toSqlType(avroSchema).dataType

  @transient private lazy val avroDeser= new AvroDeserializer(avroSchema, dataType)

  override def nullable: Boolean = true

  override def nullSafeEval(input: Any): Any = {
    val binary = input.asInstanceOf[Array[Byte]]
    val row = avroDeser.deserialize(srDeser.deserialize(new ByteArrayInputStream(binary), srSchema.getVersion))
    val result = row match {
      case r: InternalRow => r.copy()
      case _ => row
    }
    result
  }

  override def simpleString: String = {
    s"from_sr(${child.sql}, ${dataType.simpleString})"
  }

  override def sql: String = {
    s"from_sr(${child.sql}, ${dataType.catalogString})"
  }

  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val expr = ctx.addReferenceObj("this", this)
    defineCodeGen(ctx, ev, input =>
      s"(${ctx.boxedType(dataType)})$expr.nullSafeEval($input)")
  }

  private def fetchSchemaVersionInfo(schemaName: String, version: Option[Int]): SchemaVersionInfo = {
    val srClient = new SchemaRegistryClient(config.asJava)
    version.map(v => srClient.getSchemaVersionInfo(new SchemaVersionKey(schemaName, v)))
      .getOrElse(srClient.getLatestSchemaVersionInfo(schemaName))
  }

}

Source File: HDFSCredentialProvider.scala From drizzle-spark with Apache License 2.0

6 votes

package org.apache.spark.deploy.yarn.security

import java.io.{ByteArrayInputStream, DataInputStream}

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.mapred.Master
import org.apache.hadoop.security.Credentials

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._

private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging {
  // Token renewal interval, this value will be set in the first call,
  // if None means no token renewer specified, so cannot get token renewal interval.
  private var tokenRenewalInterval: Option[Long] = null

  override val serviceName: String = "hdfs"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    // NameNode to access, used to get tokens from different FileSystems
    nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
      val dstFs = dst.getFileSystem(hadoopConf)
      logInfo("getting token for namenode: " + dst)
      dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds)
    }

    // Get the token renewal interval if it is not set. It will only be called once.
    if (tokenRenewalInterval == null) {
      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf)
    }

    // Get the time of next renewal.
    tokenRenewalInterval.map { interval =>
      creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .map { t =>
          val identifier = new DelegationTokenIdentifier()
          identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
          identifier.getIssueDate + interval
      }.foldLeft(0L)(math.max)
    }
  }

  private def getTokenRenewalInterval(
      hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = {
    // We cannot use the tokens generated with renewer yarn. Trying to renew
    // those will fail with an access control issue. So create new tokens with the logged in
    // user as renewer.
    sparkConf.get(PRINCIPAL).map { renewer =>
      val creds = new Credentials()
      nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
        val dstFs = dst.getFileSystem(hadoopConf)
        dstFs.addDelegationTokens(renewer, creds)
      }
      val t = creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .head
      val newExpiration = t.renew(hadoopConf)
      val identifier = new DelegationTokenIdentifier()
      identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
      val interval = newExpiration - identifier.getIssueDate
      logInfo(s"Renewal Interval is $interval")
      interval
    }
  }

  private def getTokenRenewer(conf: Configuration): String = {
    val delegTokenRenewer = Master.getMasterPrincipal(conf)
    logDebug("delegation token renewer is: " + delegTokenRenewer)
    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
      logError(errorMessage)
      throw new SparkException(errorMessage)
    }

    delegTokenRenewer
  }

  private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
    sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet +
      sparkConf.get(STAGING_DIR).map(new Path(_))
        .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory)
  }
}

Source File: RecordReaderSuite.scala From spark-snowflake with Apache License 2.0

5 votes

package net.snowflake.spark.snowflake.io

import java.io.ByteArrayInputStream

import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.ObjectMapper
import org.scalatest.FunSuite

class RecordReaderSuite extends FunSuite {

  val mapper: ObjectMapper = new ObjectMapper()

  test("Read Json File") {
    val record1 =
      s"""
         |{
         |  "name":"abc",
         |  "age":123,
         |  "car":[
         |    {
         |      "make": "vw",
         |      "mode": "golf",
         |      "year": 2010
         |    },
         |    {
         |      "make": "Audi",
         |      "mode": "R8",
         |      "year": 2011
         |    }
         |  ]
         |}
         """.stripMargin
    val record2 =
      s"""
         |{
         |  "name":"def ghi",
         |  "age":222,
         |  "car":[
         |    {
         |      "make": "Tesla",
         |      "mode": "X",
         |      "year": 2017
         |    }
         |  ]
         |}
       """.stripMargin
    val file = record1 + record2

    val recordReader: SFRecordReader = new SFRecordReader(SupportedFormat.JSON)

    recordReader.addStream(new ByteArrayInputStream(file.getBytes))

    recordReader.addStream(new ByteArrayInputStream(file.getBytes))

    val result1 = mapper.readTree(recordReader.next())
    val json1 = mapper.readTree(record1)

    assert(json1.equals(result1))

    val result2 = mapper.readTree(recordReader.next())
    val json2 = mapper.readTree(record2)

    assert(json2.equals(result2))

    val result3 = mapper.readTree(recordReader.next())

    assert(json1.equals(result3))

    val result4 = mapper.readTree(recordReader.next())

    assert(json2.equals(result4))

    assert(!recordReader.hasNext)

  }

}

Source File: V26_1__Fill_create_argument.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package db.migration.postgres

import java.io.ByteArrayInputStream

import com.daml.platform.store.serialization.ValueSerializer
import db.migration.translation.ContractSerializer
import org.flywaydb.core.api.migration.{BaseJavaMigration, Context}

class V26_1__Fill_create_argument extends BaseJavaMigration {

  // left join on contracts to make sure to include divulged contracts
  private val SELECT_CONTRACT_DATA =
    """select contract_data.id, contract_data.contract
      |from contract_data
      |left join contracts
      |  on contracts.id = contract_data.id
      |  and contracts.archive_offset is null""".stripMargin

  private val UPDATE_PARTICIPANT_CONTRACTS =
    "update participant_contracts set create_argument = ?, template_id = ? where contract_id = ?"

  override def migrate(context: Context): Unit = {
    val conn = context.getConnection
    var loadContracts: java.sql.Statement = null
    var updateParticipantContracts: java.sql.PreparedStatement = null
    var rows: java.sql.ResultSet = null
    try {
      updateParticipantContracts = conn.prepareStatement(UPDATE_PARTICIPANT_CONTRACTS)
      loadContracts = conn.createStatement()
      rows = loadContracts.executeQuery(SELECT_CONTRACT_DATA)

      while (rows.next()) {
        val contractId = rows.getString("id")
        val contractBytes = rows.getBinaryStream("contract")
        val contract =
          ContractSerializer
            .deserializeContractInstance(contractBytes)
            .getOrElse(sys.error(s"failed to deserialize contract $contractId"))
        val createArgument = contract.arg
        val templateId = contract.template
        val createArgumentBytes =
          new ByteArrayInputStream(
            ValueSerializer.serializeValue(
              createArgument,
              s"failed to serialize create argument for contract $contractId"))
        updateParticipantContracts.setBinaryStream(1, createArgumentBytes)
        updateParticipantContracts.setString(2, templateId.toString)
        updateParticipantContracts.setString(3, contractId)
        updateParticipantContracts.execute()
      }
    } finally {
      if (loadContracts != null) {
        loadContracts.close()
      }
      if (updateParticipantContracts != null) {
        updateParticipantContracts.close()
      }
      if (rows != null) {
        rows.close()
      }
    }
  }

}

Source File: FileBasedLedgerDataExportSpec.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.export

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.time.Instant

import com.daml.ledger.participant.state.v1
import com.google.protobuf.ByteString
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{Matchers, WordSpec}

class FileBasedLedgerDataExportSpec extends WordSpec with Matchers with MockitoSugar {
  // XXX SC remove in Scala 2.13; see notes in ConfSpec
  import scala.collection.GenTraversable, org.scalatest.enablers.Containing
  private[this] implicit def `fixed sig containingNatureOfGenTraversable`[
      E: org.scalactic.Equality,
      TRAV]: Containing[TRAV with GenTraversable[E]] =
    Containing.containingNatureOfGenTraversable[E, GenTraversable]

  "addParentChild" should {
    "add entry to correlation ID mapping" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")

      instance.correlationIdMapping should contain("child" -> "parent")
    }
  }

  "addToWriteSet" should {
    "append to existing data" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")
      instance.addToWriteSet("child", Seq(keyValuePairOf("a", "b")))
      instance.addToWriteSet("child", Seq(keyValuePairOf("c", "d")))

      instance.bufferedKeyValueDataPerCorrelationId should contain(
        "parent" ->
          Seq(keyValuePairOf("a", "b"), keyValuePairOf("c", "d")))
    }
  }

  "finishedProcessing" should {
    "remove all data such as submission info, write-set and child correlation IDs" in {
      val dataOutputStream = new DataOutputStream(new ByteArrayOutputStream())
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        Instant.now(),
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      instance.inProgressSubmissions shouldBe empty
      instance.bufferedKeyValueDataPerCorrelationId shouldBe empty
      instance.correlationIdMapping shouldBe empty
    }
  }

  "serialized submission" should {
    "be readable back" in {
      val baos = new ByteArrayOutputStream()
      val dataOutputStream = new DataOutputStream(baos)
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      val expectedRecordTimeInstant = Instant.now()
      val expectedParticipantId = v1.ParticipantId.assertFromString("id")
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        expectedRecordTimeInstant,
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      val dataInputStream = new DataInputStream(new ByteArrayInputStream(baos.toByteArray))
      val (actualSubmissionInfo, actualWriteSet) = Serialization.readEntry(dataInputStream)
      actualSubmissionInfo.submissionEnvelope should be(ByteString.copyFromUtf8("an envelope"))
      actualSubmissionInfo.correlationId should be("parent")
      actualSubmissionInfo.recordTimeInstant should be(expectedRecordTimeInstant)
      actualSubmissionInfo.participantId should be(expectedParticipantId)
      actualWriteSet should be(Seq(keyValuePairOf("a", "b")))
    }
  }

  private def keyValuePairOf(key: String, value: String): (ByteString, ByteString) =
    ByteString.copyFromUtf8(key) -> ByteString.copyFromUtf8(value)
}

Source File: DarManifestReaderTest.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf.archive

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.Charset

import com.daml.lf.archive.DarManifestReader.DarManifestReaderException
import org.scalatest.{Inside, Matchers, WordSpec}

import scala.util.{Failure, Success}

class DarManifestReaderTest extends WordSpec with Matchers with Inside {

  private val unicode = Charset.forName("UTF-8")

  "should read dalf names from manifest, real scenario with Dalfs line split" in {
    val manifest = """Manifest-Version: 1.0
      |Created-By: Digital Asset packager (DAML-GHC)
      |Main-Dalf: com.daml.lf.archive:DarReaderTest:0.1.dalf
      |Dalfs: com.daml.lf.archive:DarReaderTest:0.1.dalf, daml-pri
      | m.dalf
      |Format: daml-lf
      |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(
      Dar("com.daml.lf.archive:DarReaderTest:0.1.dalf", List("daml-prim.dalf")))

    inputStream.close()
  }

  "should read dalf names from manifest, Main-Dalf returned in the head" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf
                     |Format: daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(Dar("A.dalf", List("B.dalf", "C.dalf", "E.dalf")))

    inputStream.close()
  }

  "should read dalf names from manifest, can handle one Dalf per manifest" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: A.dalf
                     |Format: daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(Dar("A.dalf", List.empty))

    inputStream.close()
  }

  "should return failure if Format is not daml-lf" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf
                     |Format: anything-different-from-daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    inside(actual) {
      case Failure(DarManifestReaderException(msg)) =>
        msg shouldBe "Unsupported format: anything-different-from-daml-lf"
    }

    inputStream.close()
  }
}

Source File: BytecodeUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor}
import org.apache.xbean.asm5.Opcodes._

import org.apache.spark.util.Utils


  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM5) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM5) {
          override def visitMethodInsn(
              op: Int, owner: String, name: String, desc: String, itf: Boolean) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
}

Source File: PortableDataStream.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.input

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import scala.collection.JavaConverters._

import com.google.common.io.{ByteStreams, Closeables}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit}


  def toArray(): Array[Byte] = {
    val stream = open()
    try {
      ByteStreams.toByteArray(stream)
    } finally {
      Closeables.close(stream, true)
    }
  }

  def getPath(): String = path
}

Source File: GenericAvroSerializerSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: SerializerPropertiesSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.util.Random

import org.scalatest.Assertions

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset


class SerializerPropertiesSuite extends SparkFunSuite {

  import SerializerPropertiesSuite._

  test("JavaSerializer does not support relocation") {
    // Per a comment on the SPARK-4550 JIRA ticket, Java serialization appears to write out the
    // full class name the first time an object is written to an output stream, but subsequent
    // references to the class write a more compact identifier; this prevents relocation.
    val ser = new JavaSerializer(new SparkConf())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer supports relocation when auto-reset is enabled") {
    val ser = new KryoSerializer(new SparkConf)
    assert(ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer does not support relocation when auto-reset is disabled") {
    val conf = new SparkConf().set("spark.kryo.registrator",
      classOf[RegistratorWithoutAutoReset].getName)
    val ser = new KryoSerializer(conf)
    assert(!ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

}

object SerializerPropertiesSuite extends Assertions {

  def generateRandomItem(rand: Random): Any = {
    val randomFunctions: Seq[() => Any] = Seq(
      () => rand.nextInt(),
      () => rand.nextString(rand.nextInt(10)),
      () => rand.nextDouble(),
      () => rand.nextBoolean(),
      () => (rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => {
        val x = MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10)))
        (x, x)
      }
    )
    randomFunctions(rand.nextInt(randomFunctions.size)).apply()
  }

  def testSupportsRelocationOfSerializedObjects(
      serializer: Serializer,
      generateRandomItem: Random => Any): Unit = {
    if (!serializer.supportsRelocationOfSerializedObjects) {
      return
    }
    val NUM_TRIALS = 5
    val rand = new Random(42)
    for (_ <- 1 to NUM_TRIALS) {
      val items = {
        // Make sure that we have duplicate occurrences of the same object in the stream:
        val randomItems = Seq.fill(10)(generateRandomItem(rand))
        randomItems ++ randomItems.take(5)
      }
      val baos = new ByteArrayOutputStream()
      val serStream = serializer.newInstance().serializeStream(baos)
      def serializeItem(item: Any): Array[Byte] = {
        val itemStartOffset = baos.toByteArray.length
        serStream.writeObject(item)
        serStream.flush()
        val itemEndOffset = baos.toByteArray.length
        baos.toByteArray.slice(itemStartOffset, itemEndOffset).clone()
      }
      val itemsAndSerializedItems: Seq[(Any, Array[Byte])] = {
        val serItems = items.map {
          item => (item, serializeItem(item))
        }
        serStream.close()
        rand.shuffle(serItems)
      }
      val reorderedSerializedData: Array[Byte] = itemsAndSerializedItems.flatMap(_._2).toArray
      val deserializedItemsStream = serializer.newInstance().deserializeStream(
        new ByteArrayInputStream(reorderedSerializedData))
      assert(deserializedItemsStream.asIterator.toSeq === itemsAndSerializedItems.map(_._1))
      deserializedItemsStream.close()
    }
  }
}

private case class MyCaseClass(foo: Int, bar: String)

Source File: ECIESCoder.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.crypto

import java.io.{ByteArrayInputStream, IOException}
import java.math.BigInteger
import java.security.SecureRandom

import org.spongycastle.crypto.digests.{SHA1Digest, SHA256Digest}
import org.spongycastle.crypto.engines.AESEngine
import org.spongycastle.crypto.generators.ECKeyPairGenerator
import org.spongycastle.crypto.macs.HMac
import org.spongycastle.crypto.modes.SICBlockCipher
import org.spongycastle.crypto.params._
import org.spongycastle.crypto.parsers.ECIESPublicKeyParser
import org.spongycastle.crypto.{BufferedBlockCipher, InvalidCipherTextException}
import org.spongycastle.math.ec.ECPoint

object ECIESCoder {

  val KeySize = 128
  val PublicKeyOverheadSize = 65
  val MacOverheadSize = 32
  val OverheadSize = PublicKeyOverheadSize + KeySize / 8 + MacOverheadSize

  @throws[IOException]
  @throws[InvalidCipherTextException]
  def decrypt(privKey: BigInteger, cipher: Array[Byte], macData: Option[Array[Byte]] = None): Array[Byte] = {
    val is = new ByteArrayInputStream(cipher)
    val ephemBytes = new Array[Byte](2 * ((curve.getCurve.getFieldSize + 7) / 8) + 1)
    is.read(ephemBytes)
    val ephem = curve.getCurve.decodePoint(ephemBytes)
    val IV = new Array[Byte](KeySize / 8)
    is.read(IV)
    val cipherBody = new Array[Byte](is.available)
    is.read(cipherBody)
    decrypt(ephem, privKey, Some(IV), cipherBody, macData)
  }

  @throws[InvalidCipherTextException]
  def decrypt(ephem: ECPoint, prv: BigInteger, IV: Option[Array[Byte]], cipher: Array[Byte], macData: Option[Array[Byte]]): Array[Byte] = {
    val aesEngine = new AESEngine

    val iesEngine = new EthereumIESEngine(
      kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)),
      mac = new HMac(new SHA256Digest),
      hash = new SHA256Digest,
      cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))),
      IV = IV,
      prvSrc = Left(new ECPrivateKeyParameters(prv, curve)),
      pubSrc = Left(new ECPublicKeyParameters(ephem, curve)))


    iesEngine.processBlock(cipher, 0, cipher.length, forEncryption = false, macData)
  }

  
  @throws[IOException]
  @throws[InvalidCipherTextException]
  def encryptSimple(pub: ECPoint, secureRandom: SecureRandom, plaintext: Array[Byte]): Array[Byte] = {

    val eGen = new ECKeyPairGenerator
    val gParam = new ECKeyGenerationParameters(curve, secureRandom)
    eGen.init(gParam)

    val iesEngine = new EthereumIESEngine(
      kdf = Right(new MGF1BytesGeneratorExt(new SHA1Digest)),
      mac = new HMac(new SHA1Digest),
      hash = new SHA1Digest,
      cipher = None,
      IV = Some(new Array[Byte](0)),
      prvSrc = Right(eGen),
      pubSrc = Left(new ECPublicKeyParameters(pub, curve)),
      hashMacKey = false)

    iesEngine.processBlock(plaintext, 0, plaintext.length, forEncryption = true)
  }

  private def makeIESEngine(pub: ECPoint, prv: BigInteger, IV: Option[Array[Byte]]) = {
    val aesEngine = new AESEngine

    val iesEngine = new EthereumIESEngine(
      kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)),
      mac = new HMac(new SHA256Digest),
      hash = new SHA256Digest,
      cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))),
      IV = IV,
      prvSrc = Left(new ECPrivateKeyParameters(prv, curve)),
      pubSrc = Left(new ECPublicKeyParameters(pub, curve)))

    iesEngine
  }

}

Source File: HdfsBlockStream.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.sparkling.io

import java.io.{ByteArrayInputStream, InputStream}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.archive.archivespark.sparkling.logging.LogContext
import org.archive.archivespark.sparkling.util.Common

import scala.util.Try

class HdfsBlockStream (fs: FileSystem, file: String, offset: Long = 0, length: Long = -1, retries: Int = 60, sleepMillis: Int = 1000 * 60) extends InputStream {
  implicit val logContext: LogContext = LogContext(this)

  val path = new Path(file)
  val (blockSize: Int, fileSize: Long) = {
    val status = fs.getFileStatus(path)
    (status.getBlockSize.min(Int.MaxValue).toInt, status.getLen)
  }

  private var pos: Long = offset.max(0)
  private val max: Long = if (length > 0) fileSize.min(pos + length) else fileSize

  private val buffer = new Array[Byte](blockSize)
  private val emptyBlock = new ByteArrayInputStream(Array.emptyByteArray)
  private var block: ByteArrayInputStream = emptyBlock

  def ensureNextBlock(): InputStream = {
    if (block.available() == 0 && pos < max) {
      val end = pos + blockSize
      val blockLength = ((end - (end % blockSize)).min(max) - pos).toInt
      Common.retry(retries, sleepMillis, (retry, e) => {
        "File access failed (" + retry + "/" + retries + "): " + path + " (Offset: " + pos + ") - " + e.getMessage
      }) { retry =>
        val in = fs.open(path, blockLength)
        if (retry > 0) Try(in.seekToNewSource(pos))
        else if (pos > 0) in.seek(pos)
        var read = 0
        while (read < blockLength) read += in.read(buffer, read, blockLength - read)
        Try(in.close())
      }
      pos += blockLength
      block = new ByteArrayInputStream(buffer, 0, blockLength)
    }
    block
  }

  override def read(): Int = ensureNextBlock().read()

  override def read(b: Array[Byte]): Int = ensureNextBlock().read(b)

  override def read(b: Array[Byte], off: Int, len: Int): Int = ensureNextBlock().read(b, off, len)

  override def skip(n: Long): Long = {
    val available = block.available()
    if (n <= available) block.skip(n)
    else {
      block = emptyBlock
      val currentPos = pos - available
      val skip = n.min(max - currentPos)
      pos += skip - available
      skip
    }
  }

  override def available(): Int = block.available()

  override def close(): Unit = {}
  override def markSupported(): Boolean = false
}

Source File: PLYReadWriteTests.scala From scalismo-faces with Apache License 2.0

5 votes

package scalismo.faces.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter}
import java.nio.ByteOrder
import java.util.Scanner

import scalismo.faces.FacesTestSuite
import scalismo.faces.io.ply._

class PLYReadWriteTests extends FacesTestSuite {

  describe("Write-read cycles to string, big- and little endian") {

    def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val writer = new SequenceWriter[A]
      writer.write(toWrite, os, bo)

      val ba = os.toByteArray

      val is = new ByteArrayInputStream(ba)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, is, bo)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val osw = new OutputStreamWriter(os)
      val writer = new SequenceWriter[A]
      writer.write(toWrite, osw)
      osw.flush()

      val is = new ByteArrayInputStream(os.toByteArray)
      val isr = new Scanner(is)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, isr)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      testRWStringCycle(toWrite)
      testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN)
      testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN)
    }

    it("should result in the same sequence of bytes") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of char") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of short") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of int") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of long") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of float") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of double") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255)
      testAllThreeCycles(toWrite)
    }

  }

}

Source File: package.scala From spark-alchemy with Apache License 2.0

5 votes

package com.swoop.alchemy.spark.expressions

import java.io.{ByteArrayInputStream, DataInputStream}

import com.clearspring.analytics.stream
import com.clearspring.analytics.stream.cardinality.RegisterSet
import com.clearspring.analytics.util.{Bits, Varint}
import net.agkn.hll.HLL
import net.agkn.hll.serialization.{HLLMetadata, SchemaVersionOne}
import net.agkn.hll.util.BitVector

package object hll {
  val IMPLEMENTATION_CONFIG_KEY = "com.swoop.alchemy.hll.implementation"

  def strmToAgkn(from: stream.cardinality.HyperLogLogPlus): net.agkn.hll.HLL = {
    HLL.fromBytes(strmToAgkn(from.getBytes))
  }

  def strmToAgkn(from: Array[Byte]): Array[Byte] = {
    var bais = new ByteArrayInputStream(from)
    var oi = new DataInputStream(bais)
    val version = oi.readInt
    // the new encoding scheme includes a version field
    // that is always negative.
    if (version >= 0) {
      throw new UnsupportedOperationException("conversion is only supported for the new style encoding scheme")
    }

    val p = Varint.readUnsignedVarInt(oi)
    val sp = Varint.readUnsignedVarInt(oi)
    val formatType = Varint.readUnsignedVarInt(oi)
    if (formatType != 0) {
      throw new UnsupportedOperationException("conversion is only supported for non-sparse representation")
    }

    val size = Varint.readUnsignedVarInt(oi)
    val longArrayBytes = new Array[Byte](size)
    oi.readFully(longArrayBytes)
    val registerSet = new RegisterSet(Math.pow(2, p).toInt, Bits.getBits(longArrayBytes))
    val bitVector = new BitVector(RegisterSet.REGISTER_SIZE, registerSet.count)

    for (i <- 0 until registerSet.count) bitVector.setRegister(i, registerSet.get(i))
    val schemaVersion = new SchemaVersionOne
    val serializer =
      schemaVersion.getSerializer(net.agkn.hll.HLLType.FULL, RegisterSet.REGISTER_SIZE, registerSet.count)
    bitVector.getRegisterContents(serializer)
    var outBytes = serializer.getBytes

    val metadata = new HLLMetadata(
      schemaVersion.schemaVersionNumber(),
      net.agkn.hll.HLLType.FULL,
      p,
      RegisterSet.REGISTER_SIZE,
      0,
      true,
      false,
      false
    )
    schemaVersion.writeMetadata(outBytes, metadata)
    outBytes
  }
}

Source File: DatasetFunctionsSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package daf.dataset

import java.io.ByteArrayInputStream

import akka.stream.ActorMaterializer
import akka.stream.scaladsl.StreamConverters
import controllers.modules.TestAbstractModule
import daf.filesystem.MergeStrategy
import daf.instances.{ AkkaInstance, ConfigurationInstance }
import org.scalatest.{ BeforeAndAfterAll, MustMatchers, WordSpecLike }

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.util.Random

class DatasetFunctionsSpec extends TestAbstractModule
  with WordSpecLike
  with MustMatchers
  with BeforeAndAfterAll
  with ConfigurationInstance
  with AkkaInstance {

  implicit lazy val executionContext = actorSystem.dispatchers.lookup("akka.actor.test-dispatcher")

  protected implicit lazy val materializer = ActorMaterializer.create { actorSystem }

  override def beforeAll() = {
    startAkka()
  }

  def data = (1 to 5) .map { i =>
    Random.alphanumeric.grouped(20).take(5).map { s => s"$i - ${s.mkString}" }.toStream :+ defaultSeparator
  }

  def stream = MergeStrategy.coalesced {
    data.map { iter =>
      new ByteArrayInputStream(
        iter.mkString(defaultSeparator).getBytes("UTF-8")
      )
    }
  }

  def source = StreamConverters.fromInputStream(() => stream, 5)

  "Source manipulation" must {

    "convert to a string source" in {
      Await.result(
        wrapDefault { asStringSource(source) }.runFold("") { _ + _ },
        5.seconds
      ).split(defaultSeparator).length must be { 25 }
    }

    "convert to a json source" in {
      Await.result(
        wrapJson { asStringSource(source) }.runFold("") { _ + _ },
        5.seconds
      ).split(jsonSeparator).length must be { 25 }
    }

  }

}

Source File: JavaSerializationConverter.scala From scala-serialization with MIT License

5 votes

package com.komanov.serialization.converters

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.komanov.serialization.converters.IoUtils.using
import com.komanov.serialization.domain.{Site, SiteEvent, SiteEventData}

object JavaSerializationConverter extends MyConverter {

  override def toByteArray(site: Site): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(site)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def fromByteArray(bytes: Array[Byte]): Site = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[Site]
      }
    }
  }

  override def toByteArray(event: SiteEvent): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(event)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def siteEventFromByteArray(clazz: Class[_], bytes: Array[Byte]): SiteEvent = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[SiteEvent]
      }
    }
  }

}

Source File: BMLHelper.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.filesystem.bml

import java.io.{ByteArrayInputStream, InputStream}
import java.util
import java.util.UUID

import com.webank.wedatasphere.linkis.bml.client.{BmlClient, BmlClientFactory}
import com.webank.wedatasphere.linkis.bml.protocol.{BmlDownloadResponse, BmlUpdateResponse, BmlUploadResponse}
import com.webank.wedatasphere.linkis.filesystem.exception.WorkspaceExceptionManager
import org.springframework.stereotype.Component

import scala.collection.JavaConversions._


@Component
class BMLHelper {

  def upload(userName: String, content: String, fileName: String): util.Map[String, Object] = {
    val inputStream = new ByteArrayInputStream(content.getBytes("utf-8"))
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def upload(userName: String, inputStream: InputStream, fileName: String, projectName: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }


  def upload(userName: String, inputStream: InputStream, fileName: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def update(userName: String, resourceId: String, inputStream: InputStream): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, "", inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def update(userName: String, resourceId: String, content: String): util.Map[String, Object] = {
    val inputStream = new ByteArrayInputStream(content.getBytes("utf-8"))
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, UUID.randomUUID().toString + ".json", inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def query(userName: String, resourceId: String, version: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    var resource: BmlDownloadResponse = null
    if (version == null) resource = client.downloadResource(userName, resourceId, null)
    else resource = client.downloadResource(userName, resourceId, version)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80023)
    val map = new util.HashMap[String, Object]
    map += "path" -> resource.fullFilePath
    map += "stream" -> resource.inputStream
  }

  private def inputstremToString(inputStream: InputStream): String = scala.io.Source.fromInputStream(inputStream).mkString

  private def createBMLClient(userName: String): BmlClient = if (userName == null)
    BmlClientFactory.createBmlClient()
  else
    BmlClientFactory.createBmlClient(userName)
}

Source File: StorageScriptFsWriter.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.script.writer

import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream}
import java.util

import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record}
import com.webank.wedatasphere.linkis.storage.LineRecord
import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData}
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}
import org.apache.commons.io.IOUtils


class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter {

  private val stringBuilder = new StringBuilder

  @scala.throws[IOException]
  override def addMetaData(metaData: MetaData): Unit = {
    val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath)))
    val metadataLine = new util.ArrayList[String]()
    if (compactions.length > 0) {
      metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add)
      if (outputStream != null) {
        IOUtils.writeLines(metadataLine, "\n", outputStream, charset)
      } else {
        import scala.collection.JavaConversions._
        metadataLine.foreach(m => stringBuilder.append(s"$m\n"))
      }
    }
  }

  @scala.throws[IOException]
  override def addRecord(record: Record): Unit = {
    //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中
    val scriptRecord = record.asInstanceOf[LineRecord]
    if (outputStream != null) {
      IOUtils.write(scriptRecord.getLine, outputStream, charset)
    } else {
      stringBuilder.append(scriptRecord.getLine)
    }
  }

  override def close(): Unit = {
    IOUtils.closeQuietly(outputStream)
  }

  override def flush(): Unit = if (outputStream != null) outputStream.flush()

  def getInputStream(): InputStream = {
    new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue))
  }

}

Source File: StorageResultSetReader.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.resultset

import java.io.{ByteArrayInputStream, IOException, InputStream}

import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader}
import com.webank.wedatasphere.linkis.common.io.{MetaData, Record}
import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.storage.domain.Dolphin
import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException
import com.webank.wedatasphere.linkis.storage.utils.StorageUtils

import scala.collection.mutable.ArrayBuffer



  def readLine(): Array[Byte] = {

    var rowLen = 0
    try rowLen = Dolphin.readInt(inputStream)
    catch {
      case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null
      case t: Throwable => throw t
    }

    val rowBuffer = ArrayBuffer[Byte]()
    var len = 0

    //Read the entire line, except for the data of the line length(读取整行，除了行长的数据)
    while (rowLen > 0 && len >= 0) {
      if (rowLen > READ_CACHE)
        len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE)
      else
        len = StorageUtils.readBytes(inputStream,bytes, rowLen)

      if (len > 0) {
        rowLen -= len
        rowBuffer ++= bytes.slice(0, len)
      }
    }
    rowCount = rowCount + 1
    rowBuffer.toArray
  }

  @scala.throws[IOException]
  override def getRecord: Record = {
    if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)")
    if (row ==  null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值，也许IO流已读取完毕或已被关闭！)")
    row
  }

  @scala.throws[IOException]
  override def getMetaData: MetaData = {
    if(metaData == null) init()
    metaData = deserializer.createMetaData(readLine())
    metaData
  }

  @scala.throws[IOException]
  override def skip(recordNum: Int): Int = {
    if(recordNum < 0 ) return -1

    if(metaData == null) getMetaData
    for(i <- recordNum until (0, -1)){
      try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1}
    }
    recordNum
  }

  @scala.throws[IOException]
  override def getPosition: Long = rowCount

  @scala.throws[IOException]
  override def hasNext: Boolean = {
    if(metaData == null) getMetaData
    val line = readLine()
    if(line == null) return  false
    row = deserializer.createRecord(line)
    if(row == null) return  false
    true
  }

  @scala.throws[IOException]
  override def available: Long = inputStream.available()

  override def close(): Unit = inputStream.close()
}

Source File: StreamingSpec.scala From seals with Apache License 2.0

5 votes

package com.example.streaming

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import shapeless.record._

import cats.effect.IO

import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpec

import fs2.Stream

import scodec.Codec
import scodec.bits.BitVector
import scodec.stream.CodecError

import dev.tauri.seals._
import dev.tauri.seals.scodec.Codecs._
import dev.tauri.seals.scodec.StreamCodecs._

class StreamingSpec extends AnyFlatSpec with Matchers {

  import Main.{ Animal, Elephant, Quokka, Quagga, Grey }

  val animals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f),
    Quokka("Nellie"),
    Quagga("Ford", speed = 120.0)
  )

  val transformedAnimals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f + 17.0f),
    Quokka("Nellie", Grey)
  )

  val animalStream = Stream.emits[IO, Animal](animals)

  val encoder = streamEncoderFromReified[Animal]
  val decoder = streamDecoderFromReified[Animal]

  "Encoding/decoding" should "work correctly" in {
    val tsk: IO[Unit] = for {
      bv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (animals)
    }
    tsk.unsafeRunSync()
  }

  it should "fail with incompatible models" in {
    val mod = Reified[Record.`'Elephant -> Elephant, 'Quokka -> Quokka`.T].model
    val bv: BitVector = Codec[Model].encode(mod).getOrElse(fail)
    val tsk: IO[Unit] = for {
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (Vector.empty)
    }

    val ex = intercept[CodecError] {
      tsk.unsafeRunSync()
    }
    ex.err.message should include ("incompatible models")
  }

  "Transformation" should "work correctly" in {
    val tsk: IO[Unit] = for {
      ibv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      is = new ByteArrayInputStream(ibv.toByteArray)
      os = new ByteArrayOutputStream
      _ <- Main.transform(is, os)(Main.transformer)
      obv = BitVector(os.toByteArray())
      transformed <- decoder.decode[IO](Stream(obv)).compile.fold(Vector.empty[Animal])(_ :+ _)
    } yield {
      transformed should === (transformedAnimals)
    }
    tsk.unsafeRunSync()
  }
}

Source File: CsvSourceTypeConversionTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.csv

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import io.eels.schema._
import org.scalatest.{Ignore, Matchers, WordSpec}

@Ignore
class CsvSourceTypeConversionTest extends WordSpec with Matchers {
  "CsvSource" should {
    "read schema" in {
      val exampleCsvString =
        """A,B,C,D
          |1,2.2,3,foo
          |4,5.5,6,bar
        """.stripMargin

      val stream = new ByteArrayInputStream(exampleCsvString.getBytes(StandardCharsets.UTF_8))
      val schema = new StructType(Vector(
        Field("A", IntType.Signed),
        Field("B", DoubleType),
        Field("C", IntType.Signed),
        Field("D", StringType)
      ))
      val source = new CsvSource(() => stream)
        .withSchema(schema)
      
      source.schema.fields.foreach(println)
      val ds = source.toDataStream()
      val firstRow = ds.iterator.toIterable.head
      val firstRowA = firstRow.get("A")
      println(firstRowA) // prints 1 as expected
      println(firstRowA.getClass.getTypeName) // prints java.lang.String
      assert(firstRowA == 1) // this assertion will fail because firstRowA is not an Int
    }
  }
}

Source File: get_features_from_peinfo.scala From gsoc_relationship with Apache License 2.0

5 votes

import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import Array.concat
import org.apache.spark.sql.types._
import org.apache.spark.ml.linalg.SQLDataTypes.VectorType 
import org.apache.spark.ml.linalg._
import org.apache.spark.sql.Row
import org.apache.spark.ml.feature.MinMaxScaler
import org.apache.spark.ml.linalg.DenseVector
import PreProcessingConfig._

case class peinfo_results_by_service_name_class(service_name: String, sha256: String)
case class peinfo_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class peinfo_join_results_class(sha256: String, service_name: String, results: String)
case class peinfo_int_final_array_rdd_class(sha256: String, array_results: Array[Double])
case class peinfo_binaray_final_array_rdd_class(sha256:String, array_results :Array[Double])
case class peinfo_final_array_rdd_class(sha256:String, array_results: Array[Double])

def unzip(x: Array[Byte]) : String = {      
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def findAllIntinpeinfo( peinfo_json_results : JsLookupResult, time: Double): Array[Double]= {
    val entropy = peinfo_json_results \\ "entropy" ; val virt_address = peinfo_json_results \\ "virt_address"; val virt_size = peinfo_json_results \\ "virt_size"; val size = peinfo_json_results \\ "size";
    var i= 0; var List  = Array.iterate(0.0,17)(a=>a*0)
    for (k <- ( peinfo_json_results \\ "section_name")){
        k.as[String] match {
            case ".text\u0000\u0000\u0000" => { List(0)=entropy(i).as[Double]; List(1)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(2)=virt_size(i).as[Double]; List(3)=size(i).as[Double] }
            case ".data\u0000\u0000\u0000" => { List(4)=entropy(i).as[Double]; List(5)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(6)=virt_size(i).as[Double]; List(7)=size(i).as[Double] }
            case ".rsrc\u0000\u0000\u0000" => { List(8)=entropy(i).as[Double]; List(9)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(10)=virt_size(i).as[Double]; List(11)=size(i).as[Double] }
            case ".rdata\u0000\u0000" => { List(12)=entropy(i).as[Double]; List(13)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(14)=virt_size(i).as[Double]; List(15)=size(i).as[Double] }
            case other => {}
        }
        i = i + 1
    }
    List(16)= time
    return List.toArray
}

val peinfo_results_by_service_name_meta = sc.cassandraTable[peinfo_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","peinfo")
val peinfo_results_by_service_name_rdd = peinfo_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val peinfo_results_by_sha256_meta = sc.cassandraTable[peinfo_results_by_sha256_class](keyspace,sha256_table)
val peinfo_results_by_sha256_rdd = peinfo_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val peinfo_join_results = peinfo_results_by_service_name_rdd.join(peinfo_results_by_sha256_rdd).map(x=> (new peinfo_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()

val peinfo_int_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "pe_sections"),{if ((Json.parse(x.results) \ "timestamp").isInstanceOf[JsUndefined]) 0.0 else (Json.parse(x.results) \ "timestamp" \\ "timestamp")(0).as[Double]})).filter(x=> !x._2.isInstanceOf[JsUndefined]).map(x=>new  peinfo_int_final_array_rdd_class(x._1,findAllIntinpeinfo(x._2,x._3)))

val peinfo_dllfunction_list= peinfo_join_results.map(x=>Json.parse(x.results) \ "imports").filter(x=> !x.isInstanceOf[JsUndefined]).flatMap(x=>x.as[List[Map[String, String]]].map(x=>(x("dll")+"."+x("function")))).toDF("func_name").groupBy("func_name").count.sort(desc("count")).filter("count > 10000").rdd.map(r => r.getString(0)).collect().toList
implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllBininpeinfo_dllfunction(peinfo_dllfunction : Seq[String]) : Array[Double] ={
    val forlist = for (family <- peinfo_dllfunction_list) yield {
        (peinfo_dllfunction.contains(family):Int).toDouble
    }
    return (forlist).toArray
}
val List502 = Array.iterate(0.0,502)(a=>0.0)
val peinfo_binaray_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "imports"))).map(x=>new  peinfo_binaray_final_array_rdd_class(x._1,{if (x._2.isInstanceOf[JsUndefined]) List502 else findAllBininpeinfo_dllfunction(x._2.as[Seq[Map[String, String]]].map(x=>(x("dll")+"."+x("function"))))}))

val peinfo_int_final_array_rdd_before_join = peinfo_int_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_binaray_final_array_rdd_before_join = peinfo_binaray_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_array_rdd_by_join = peinfo_int_final_array_rdd_before_join.join(peinfo_binaray_final_array_rdd_before_join).map(x=> (x._1,concat(x._2._1,x._2._2)))
val peinfo_final_array_rdd = peinfo_array_rdd_by_join.map(x=>new peinfo_final_array_rdd_class(x._1,x._2))

val peinfo_schema = new StructType().add("sha256", StringType).add("peinfo",VectorType)
val peinfo_vector_rdd = peinfo_final_array_rdd.map(x=>(x.sha256,Vectors.dense(x.array_results)))
val peinfo_vector_rowrdd = peinfo_vector_rdd.map(p => Row(p._1,p._2))
val peinfo_vector_dataframe = spark.createDataFrame(peinfo_vector_rowrdd, peinfo_schema)
val peinfo_scaler = new MinMaxScaler()
  .setInputCol("peinfo")
  .setOutputCol("scaled_peinfo")
val peinfo_scalerModel = peinfo_scaler.fit(peinfo_vector_dataframe)
val peinfo_scaledData_df = peinfo_scalerModel.transform(peinfo_vector_dataframe)
val peinfo_scaledData_rdd = peinfo_scaledData_df.select("sha256","scaled_peinfo").rdd.map(row=>(row.getAs[String]("sha256"),row.getAs[DenseVector]("scaled_peinfo"))).map(x=>new peinfo_final_array_rdd_class(x._1,x._2.toArray))
peinfo_scaledData_rdd.toDF().write.format("parquet").save(peinfo_final_array_file)

Source File: get_features_from_objdump.scala From gsoc_relationship with Apache License 2.0

5 votes

import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class objdump_results_by_service_name_class(service_name: String, sha256: String)
case class objdump_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class objdump_join_results_class(sha256: String, service_name: String, results: String)
case class objdump_binaray_final_array_rdd_class(sha256: String, array_results: Array[Double])
 
val objdump_main_list = sc.textFile(objdump_x86Opcodes_file).collect.toList
def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def combineAllObjdumpInOne( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] ={
    if (malwarelist(0).toString() == "null") return List("null")
    var begin = malwarelist(0).as[List[String]]
    for (i <- 1 to (malwarelist.size-1)){
        if (malwarelist(i).toString() == "null") begin = begin
        else begin = begin ::: malwarelist(i).as[List[String]]
    }
    return  begin
}
def convertToList( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] = {
    if (malwarelist(0).toString() == "null") return List("null")
    else {
        return malwarelist(0).as[List[String]]
    } 
    
}
def findAllBininobjdump_main_list(malware :List[String]) : Array[Double] ={
    if (malware == List("null")) return (List.fill(10000)(0.0)).toArray
    else {
        val forlist = for ( one  <- malware ) yield {
            objdump_main_list.indexOf(one) + 1.0
        }
        if (forlist.size < 10000){
            return  (List.concat(forlist,List.fill(10000-forlist.size)(0.0))).toArray
        }
        else return forlist.toArray
    }
}

val objdump_results_by_service_name_meta = sc.cassandraTable[objdump_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","objdump")
val objdump_results_by_service_name_rdd = objdump_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val objdump_results_by_sha256_meta = sc.cassandraTable[objdump_results_by_sha256_class](keyspace,sha256_table)
val objdump_results_by_sha256_rdd = objdump_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val objdump_join_results = objdump_results_by_service_name_rdd.join(objdump_results_by_sha256_rdd).map(x=> (new objdump_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct()
val objdump_binaray_final_array_rdd = objdump_join_results.map(x=>(x.sha256,(Json.parse(x.results) \\ "opcodes"))).filter(x=> (x._2.size > 0)).map(x=>(x._1,if ( x._2.size == 1 ) convertToList(x._2) else combineAllObjdumpInOne(x._2))).map(x=>(x._1,findAllBininobjdump_main_list(x._2)))
objdump_binaray_final_array_rdd.toDF().write.format("parquet").save(objdump_binaray_final_array_file)

Source File: get_VT_signatures.scala From gsoc_relationship with Apache License 2.0

5 votes

import com.datastax.spark.connector._
import play.api.libs.json.Json
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class VT_results_by_service_name_class(service_name: String, sha256: String)
case class VT_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte] )
case class VT_join_results_class(sha256: String, service_name: String, results: String)
case class VT_sample_signatures_initial_seq_rdd_class(sha256: String, seq_results: Seq[String])
case class VT_sample_signatures_final_array_rdd_class(sha256:String, array_results:Array[Double])

def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def deleteNumberInSampleSignatures(x: String): Boolean = {
    val regex = "[0-9]".r
    return regex.findFirstIn(x).isEmpty
}

val VT_results_by_service_name_meta = sc.cassandraTable[VT_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","virustotal")
val VT_results_by_service_name_rdd = VT_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val VT_results_by_sha256_meta = sc.cassandraTable[VT_results_by_sha256_class](keyspace,sha256_table)
val VT_results_by_sha256_rdd = VT_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val VT_join_results = VT_results_by_service_name_rdd.join(VT_results_by_sha256_rdd).map(x => (new VT_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()
val sample_signatures_rdd = VT_join_results.flatMap(x=>Json.parse(x.results) \ "scans" \\ "result").map(x=>Json.stringify(x)).filter( x=> !(x == "null"))
val sample_signatures_split_rdd = sample_signatures_rdd.flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())
val signatures_prefix_rdd = sc.textFile(VT_signatures_prefix_suffix_file).map(x=>x.toLowerCase())
val family_signatures_subtract_rdd = sample_signatures_split_rdd.subtract(signatures_prefix_rdd)
val family_signatures_sorted_rdd = sc.parallelize(family_signatures_subtract_rdd.countByValue().toSeq).filter(x=>(x._2>50)).sortBy(x=>x._2,false)
val family_signatures_list = family_signatures_sorted_rdd.keys.collect().toList
val VT_sample_signatures_rdd = VT_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "scans" \\ "result").map(_.toString).filter( s => !(s== "null")).flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())))
val  VT_sample_signatures_initial_seq_rdd = VT_sample_signatures_rdd.map(x=>new VT_sample_signatures_initial_seq_rdd_class(x._1, x._2))

implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllInFamilySignatures(sample_signatures_seq : Seq[String]) : Array[Double] ={
    val forlist = for (family <- family_signatures_list) yield {
        (sample_signatures_seq.contains(family):Int).toDouble
    }
    return forlist.toArray
}

val VT_sample_signatures_final_array_rdd = VT_sample_signatures_initial_seq_rdd.map(x=>new VT_sample_signatures_final_array_rdd_class(x.sha256,findAllInFamilySignatures(x.seq_results)))
VT_sample_signatures_final_array_rdd.toDF().write.format("parquet").save(VT_sample_signatures_final_array_file)

Source File: HelperMethods.scala From gsoc_relationship with Apache License 2.0

5 votes

package com.holmesprocessing.analytics.relationship.knowledgeBase

import play.api.libs.json.Json
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream


  def score(ruleset_1: String, ruleset_2:String) : Double = {

    val split_1 = ruleset_1.split(",").toSeq
    val split_2 = ruleset_2.split(",").toSeq
    if (split_1.length > 0 && split_2.length > 0) {
      return split_1.intersect(split_2).length.toDouble/split_1.union(split_2).distinct.length.toDouble
    } else {
      return 0
    }
  }

}

Source File: JsonSerializer.scala From hydra with Apache License 2.0

5 votes

package hydra.kafka.serializers

import java.io.ByteArrayInputStream
import java.util

import com.fasterxml.jackson.databind.{
  JsonNode,
  ObjectMapper,
  SerializationFeature
}
import hydra.common.config.ConfigSupport
import org.apache.kafka.common.serialization._


class JsonSerializer extends Serializer[JsonNode] with ConfigSupport {

  import JsonSerializer._

  override def serialize(topic: String, data: JsonNode): Array[Byte] = {
    mapper.writeValueAsBytes(data)
  }

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
    val indent = Option(configs.get("kafka.encoders.json.indent.output"))
      .map(_.toString.toBoolean)
      .getOrElse(false)
    mapper.configure(SerializationFeature.INDENT_OUTPUT, indent)
  }

  override def close(): Unit = {
    //nothing to do
  }
}

object JsonSerializer {
  val mapper = new ObjectMapper
}

class JsonDeserializer extends Deserializer[JsonNode] {

  import JsonDeserializer._

  override def deserialize(topic: String, bytes: Array[Byte]): JsonNode = {
    mapper.readTree(new ByteArrayInputStream(bytes))
  }

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
    val indent = Option(configs.get("kafka.encoders.json.indent.output"))
      .map(_.toString.toBoolean)
      .getOrElse(false)
    mapper.configure(SerializationFeature.INDENT_OUTPUT, indent)
  }

  override def close(): Unit = {
    //nothing
  }
}

object JsonDeserializer {
  val mapper = new ObjectMapper
}

Source File: KryoInitSpec.scala From hydra with Apache License 2.0

5 votes

package hydra.core.akka

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.romix.scala.serialization.kryo.{
  EnumerationSerializer,
  ScalaImmutableAbstractMapSerializer,
  ScalaMutableMapSerializer
}
import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpecLike

class KryoInitSpec extends Matchers with AnyFlatSpecLike {

  "The custom KryoInit" should "register serializers" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    kryo.getDefaultSerializer(classOf[scala.Enumeration#Value]) shouldBe an[
      EnumerationSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.mutable.HashMap[_, _]]) shouldBe a[
      ScalaMutableMapSerializer
    ]
  }

  it should "serialize immutable maps" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    val map1 = Map(
      "Rome" -> "Italy",
      "London" -> "England",
      "Paris" -> "France",
      "New York" -> "USA",
      "Tokyo" -> "Japan",
      "Peking" -> "China",
      "Brussels" -> "Belgium"
    )
    val map2 = map1 + ("Moscow" -> "Russia")
    val map3 = map2 + ("Berlin" -> "Germany")
    val map4 = map3 + ("Germany" -> "Berlin", "Russia" -> "Moscow")
    roundTrip(map1, kryo)
    roundTrip(map2, kryo)
    roundTrip(map3, kryo)
    roundTrip(map4, kryo)
  }

  def roundTrip[T](obj: T, kryo: Kryo): T = {
    val outStream = new ByteArrayOutputStream()
    val output = new Output(outStream, 4096)
    kryo.writeClassAndObject(output, obj)
    output.flush()

    val input = new Input(new ByteArrayInputStream(outStream.toByteArray), 4096)
    val obj1 = kryo.readClassAndObject(input)

    assert(obj == obj1)

    obj1.asInstanceOf[T]
  }

}

Source File: BytecodeUtils.scala From graphx-algorithm with GNU General Public License v2.0

5 votes

package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.spark.util.Utils

import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._



  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM4) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM4) {
          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
}

Source File: FakeMavenRepository.scala From exodus with MIT License

5 votes

package com.wixpress.build.maven

import java.io.ByteArrayInputStream

import org.apache.commons.codec.digest.DigestUtils
import org.codehaus.mojo.mrm.api.maven.Artifact
import org.codehaus.mojo.mrm.impl.maven.{ArtifactStoreFileSystem, MemoryArtifactStore}
import org.codehaus.mojo.mrm.plugin.FileSystemServer

class FakeMavenRepository(port: Int = 0) {

  implicit class ExtendedArtifactDescriptor(artifact: ArtifactDescriptor) {
    def asArtifact(ofType: String): Artifact = {
      val parent = artifact.parentCoordinates
      val groupId = artifact.groupId
        .getOrElse(parent.map(_.groupId).getOrElse(throw new RuntimeException("missing groupId or parent.groupId")))
      val version = artifact.version
        .getOrElse(parent.map(_.version).getOrElse(throw new RuntimeException("missing version or parent.version")))
      new Artifact(groupId, artifact.artifactId, version, ofType)
    }
  }

  private val inMemoryArtifactStore = new MemoryArtifactStore
  private val mavenRepoManager = new FileSystemServer("foo", port,
    new ArtifactStoreFileSystem(inMemoryArtifactStore), "")

  def url: String = mavenRepoManager.getUrl

  def start(): Unit = mavenRepoManager.ensureStarted()

  def stop(): Unit = {
    mavenRepoManager.finish()
    mavenRepoManager.waitForFinished()
  }

  def addArtifacts(artifact: ArtifactDescriptor*): Unit = addArtifacts(artifact.toSet)

  def addCoordinates(coordinatesSet: Coordinates*): Unit = addCoordinates(coordinatesSet.toSet)

  def addArtifacts(artifacts: Set[ArtifactDescriptor]): Unit = artifacts.foreach(addSingleArtifact)

  def addCoordinates(coordinatesSet: Set[Coordinates]): Unit = coordinatesSet.foreach(addSingleCoordinates)

  def addSingleCoordinates(coordinates: Coordinates): Unit = addSingleArtifact(ArtifactDescriptor.anArtifact(coordinates))

  def addSingleArtifact(artifact: ArtifactDescriptor): Unit = {
    val xml = artifact.pomXml
    val md5 = DigestUtils.md5Hex(xml)
    val sha1 = DigestUtils.sha1Hex(xml)
    inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom"), streamFrom(xml))
    inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom.md5"), streamFrom(md5))
    inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom.sha1"), streamFrom(sha1))
  }


  private def streamFrom(input: String) = {
    new ByteArrayInputStream(input.getBytes("UTF-8"))
  }

  def addJarArtifact(artifact: Coordinates, jar: Array[Byte]) =
    inMemoryArtifactStore.set(
      new Artifact(artifact.groupId, artifact.artifactId, artifact.version, artifact.classifier.orNull, "jar"), new ByteArrayInputStream(jar))

  def addJarSha256(artifact: Coordinates, sha256: String) =
    inMemoryArtifactStore.set(
      new Artifact(
        artifact.groupId,
        artifact.artifactId,
        artifact.version,
        artifact.classifier.orNull,
        artifact.packaging.value + ".sha256"), streamFrom(sha256))

}

Source File: RulesTxtDeploymentServiceSpec.scala From smui with Apache License 2.0

5 votes

package models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils
import org.scalatest.{FlatSpec, Matchers}

class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase {

  private lazy val service = injector.instanceOf[RulesTxtDeploymentService]
  private var inputIds: Seq[SearchInputId] = Seq.empty

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    createTestCores()
    inputIds = createTestRule()
  }

  private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith =>
                           |	SYNONYM: mercury
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |mercury =>
                           |	SYNONYM: aerosmith
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |shipping =>
                           |	DECORATE: REDIRECT http://xyz.com/shipping
                           |	@{
                           |	  "_log" : "${ruleIds.last}"
                           |	}@""".stripMargin

  "RulesTxtDeploymentService" should "generate rules files with correct file names" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    rulesTxt.solrIndexId shouldBe core1Id
    rulesTxt.decompoundRules shouldBe empty
    rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds)

    rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp"
    rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt"
  }

  it should "validate the rules files correctly" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty

    val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file"))
    service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction")
  }

  it should "provide a zip file with all rules files" in {
    val out = new ByteArrayOutputStream()
    service.writeAllRulesTxtFilesAsZipFileToStream(out)

    val bytes = out.toByteArray
    val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes))
    val firstEntry = zipStream.getNextEntry
    firstEntry.getName shouldBe "rules_core1.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds)
    val secondEntry = zipStream.getNextEntry
    secondEntry.getName shouldBe "rules_core2.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe ""
  }

}

Source File: ImageLoaderUtils.scala From keystone with Apache License 2.0

5 votes

package keystoneml.loaders

import java.awt.image.BufferedImage
import java.io.{InputStream, ByteArrayInputStream}
import java.net.URI
import java.util.zip.GZIPInputStream
import javax.imageio.ImageIO

import keystoneml.loaders.VOCLoader._
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import keystoneml.pipelines.Logging
import keystoneml.utils._

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag

object ImageLoaderUtils extends Logging {
  
  def loadFiles[L, I <: AbstractLabeledImage[L] : ClassTag](
      filePathsRDD: RDD[URI],
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I, // TODO(etrain): We can probably do this with implicits.
      namePrefix: Option[String] = None): RDD[I] = {
    filePathsRDD.flatMap(fileUri => loadFile(fileUri, labelsMap, imageBuilder, namePrefix))
  }

  private def loadFile[L, I <: AbstractLabeledImage[L]](
      fileUri: URI,
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I,
      namePrefix: Option[String]): Iterator[I] = {
    val filePath = new Path(fileUri)
    val conf = new Configuration(true)
    val fs = FileSystem.get(filePath.toUri(), conf)
    val fStream = fs.open(filePath)

    val tarStream = new ArchiveStreamFactory().createArchiveInputStream(
      "tar", fStream).asInstanceOf[TarArchiveInputStream]

    var entry = tarStream.getNextTarEntry()
    val imgs = new ArrayBuffer[I]
    while (entry != null) {
      if (!entry.isDirectory && (namePrefix.isEmpty || entry.getName.startsWith(namePrefix.get))) {
        var offset = 0
        var ret = 0
        val content = new Array[Byte](entry.getSize().toInt)
        while (ret >= 0 && offset != entry.getSize()) {
          ret = tarStream.read(content, offset, content.length - offset)
          if (ret >= 0) {
            offset += ret
          }
        }

        val bais = new ByteArrayInputStream(content)

        val image = ImageUtils.loadImage(bais).map { img =>
          imageBuilder(img, labelsMap(entry.getName), Some(entry.getName))
        }

        imgs ++= image
      }
      entry = tarStream.getNextTarEntry()
    }

    imgs.iterator
  }
}

Source File: TestUtils.scala From keystone with Apache License 2.0

5 votes

package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
}

Source File: AggregatorTest.scala From noether with Apache License 2.0

5 votes

package com.spotify.noether

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.twitter.algebird.Aggregator
import org.scalatest._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

trait AggregatorTest extends AnyFlatSpec with Matchers {
  def run[A, B, C](aggregator: Aggregator[A, B, C])(as: Seq[A]): C = {
    val bs = as.map(aggregator.prepare _ compose ensureSerializable)
    val b = ensureSerializable(aggregator.reduce(bs))
    ensureSerializable(aggregator.present(b))
  }

  private def serializeToByteArray(value: Any): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  private def ensureSerializable[T](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
}

Source File: TestingTypedCount.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
}

Source File: CreateJacksonParser.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.json

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
import java.nio.channels.Channels
import java.nio.charset.Charset

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
import sun.nio.cs.StreamDecoder

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.unsafe.types.UTF8String

private[sql] object CreateJacksonParser extends Serializable {
  def string(jsonFactory: JsonFactory, record: String): JsonParser = {
    jsonFactory.createParser(record)
  }

  def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = {
    val bb = record.getByteBuffer
    assert(bb.hasArray)

    val bain = new ByteArrayInputStream(
      bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
  }

  def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
    jsonFactory.createParser(record.getBytes, 0, record.getLength)
  }

  // Jackson parsers can be ranked according to their performance:
  // 1. Array based with actual encoding UTF-8 in the array. This is the fastest parser
  //    but it doesn't allow to set encoding explicitly. Actual encoding is detected automatically
  //    by checking leading bytes of the array.
  // 2. InputStream based with actual encoding UTF-8 in the stream. Encoding is detected
  //    automatically by analyzing first bytes of the input stream.
  // 3. Reader based parser. This is the slowest parser used here but it allows to create
  //    a reader with specific encoding.
  // The method creates a reader for an array with given encoding and sets size of internal
  // decoding buffer according to size of input array.
  private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = {
    val bais = new ByteArrayInputStream(in, 0, length)
    val byteChannel = Channels.newChannel(bais)
    val decodingBufferSize = Math.min(length, 8192)
    val decoder = Charset.forName(enc).newDecoder()

    StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize)
  }

  def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = {
    val sd = getStreamDecoder(enc, record.getBytes, record.getLength)
    jsonFactory.createParser(sd)
  }

  def inputStream(jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(is)
  }

  def inputStream(enc: String, jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(new InputStreamReader(is, enc))
  }

  def internalRow(jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val ba = row.getBinary(0)

    jsonFactory.createParser(ba, 0, ba.length)
  }

  def internalRow(enc: String, jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val binary = row.getBinary(0)
    val sd = getStreamDecoder(enc, binary, binary.length)

    jsonFactory.createParser(sd)
  }
}

Source File: SQLRunnerSuite.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package com.sap.spark.cli

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}

import org.apache.spark.SparkContext
import org.apache.spark.sql.{GlobalSapSQLContext, SQLContext}
import org.scalatest.{BeforeAndAfterEach, FunSuite, ShouldMatchers}



    // good call
    val goodOpts =
      SQLRunner.parseOpts(List("a.sql", "b.sql", "-o", "output.csv"))

    goodOpts.sqlFiles should be(List("a.sql", "b.sql"))
    goodOpts.output should be(Some("output.csv"))

    // bad call
    val badOpts = SQLRunner.parseOpts(List())

    badOpts.sqlFiles should be(List())
    badOpts.output should be(None)

    // ugly call
    val uglyOpts =
      SQLRunner.parseOpts(List("a.sql", "-o", "output.csv", "b.sql"))

    uglyOpts.sqlFiles should be(List("a.sql", "b.sql"))
    uglyOpts.output should be(Some("output.csv"))
  }

  def runSQLTest(input: String, expectedOutput: String): Unit = {
    val inputStream: InputStream = new ByteArrayInputStream(input.getBytes())
    val outputStream = new ByteArrayOutputStream()

    SQLRunner.sql(inputStream, outputStream)

    val output = outputStream.toString
    output should be(expectedOutput)
  }

  test("can run dummy query") {
    val input = "SELECT 1;"
    val output = "1\n"

    runSQLTest(input, output)
  }

  test("can run multiple dummy queries") {
    val input = """
        |SELECT 1;SELECT 2;
        |SELECT 3;
      """.stripMargin

    val output = "1\n2\n3\n"

    runSQLTest(input, output)
  }

  test("can run a basic example with tables") {
    val input = """
                  |SELECT * FROM DEMO_TABLE;
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }

  test("can run an example with comments") {
    val input = """
                  |SELECT * FROM DEMO_TABLE; -- this is the first query
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |-- now let's drop a table
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }
}

Source File: S3SnapshotStore.scala From akka-persistence-s3 with MIT License

5 votes

package akka.persistence.s3
package snapshot

import java.io.ByteArrayInputStream
import akka.actor.ActorLogging
import akka.persistence.serialization.Snapshot
import akka.persistence.{ SelectedSnapshot, SnapshotMetadata, SnapshotSelectionCriteria }
import akka.persistence.snapshot.SnapshotStore
import akka.serialization.SerializationExtension
import com.amazonaws.services.s3.model.{ ObjectMetadata, S3ObjectInputStream, ListObjectsRequest }
import com.typesafe.config.Config
import scala.collection.JavaConversions._
import scala.collection.immutable
import scala.concurrent.Future
import scala.util.control.NonFatal

case class SerializationResult(stream: ByteArrayInputStream, size: Int)

class S3SnapshotStore(config: Config) extends SnapshotStore with ActorLogging with SnapshotKeySupport {
  import context.dispatcher

  val settings = new S3SnapshotConfig(config)

  val s3Client: S3Client = new S3Client {
    val s3ClientConfig = new S3ClientConfig(context.system.settings.config.getConfig("s3-client"))
  }

  private val serializationExtension = SerializationExtension(context.system)

  private val s3Dispatcher = context.system.dispatchers.lookup("s3-snapshot-store.s3-client-dispatcher")

  val extensionName = settings.extension

  override def loadAsync(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[Option[SelectedSnapshot]] = {
    snapshotMetadatas(persistenceId, criteria)
      .map(_.sorted.takeRight(settings.maxLoadAttempts))
      .flatMap(load)
  }

  private def load(metadata: immutable.Seq[SnapshotMetadata]): Future[Option[SelectedSnapshot]] = metadata.lastOption match {
    case None => Future.successful(None)
    case Some(md) =>
      s3Client.getObject(settings.bucketName, snapshotKey(md))(s3Dispatcher)
        .map { obj =>
          val snapshot = deserialize(obj.getObjectContent)
          Some(SelectedSnapshot(md, snapshot.data))
        } recoverWith {
          case NonFatal(e) =>
            log.error(e, s"Error loading snapshot [${md}]")
            load(metadata.init) // try older snapshot
        }
  }

  override def saveAsync(metadata: SnapshotMetadata, snapshot: Any): Future[Unit] = {
    val serialized = serialize(Snapshot(snapshot))
    val objectMetadata = new ObjectMetadata()
    objectMetadata.setContentLength(serialized.size)
    s3Client.putObject(
      settings.bucketName,
      snapshotKey(metadata),
      serialized.stream,
      objectMetadata
    )(s3Dispatcher).map(_ => ())
  }

  override def deleteAsync(metadata: SnapshotMetadata): Future[Unit] = {
    if (metadata.timestamp == 0L)
      deleteAsync(metadata.persistenceId, SnapshotSelectionCriteria(metadata.sequenceNr, Long.MaxValue, metadata.sequenceNr, Long.MinValue))
    else
      s3Client.deleteObject(settings.bucketName, snapshotKey(metadata))(s3Dispatcher)
  }

  override def deleteAsync(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[Unit] = {
    val metadatas = snapshotMetadatas(persistenceId, criteria)
    metadatas.map(list => Future.sequence(list.map(deleteAsync)))
  }

  private def snapshotMetadatas(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[List[SnapshotMetadata]] = {
    s3Client.listObjects(
      new ListObjectsRequest()
        .withBucketName(settings.bucketName)
        .withPrefix(prefixFromPersistenceId(persistenceId))
        .withDelimiter("/")
    )(s3Dispatcher)
      .map(_.getObjectSummaries.toList.map(s => parseKeyToMetadata(s.getKey))
        .filter(m => m.sequenceNr >= criteria.minSequenceNr && m.sequenceNr <= criteria.maxSequenceNr && m.timestamp >= criteria.minTimestamp && m.timestamp <= criteria.maxTimestamp))

  }

  protected def deserialize(inputStream: S3ObjectInputStream): Snapshot =
    serializationExtension.deserialize(akka.persistence.serialization.streamToBytes(inputStream), classOf[Snapshot]).get

  protected def serialize(snapshot: Snapshot): SerializationResult = {
    val serialized = serializationExtension.findSerializerFor(snapshot).toBinary(snapshot)
    SerializationResult(new ByteArrayInputStream(serializationExtension.findSerializerFor(snapshot).toBinary(snapshot)), serialized.size)
  }
}

Source File: SerializedWithSchemaToObject.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.storm.bolts

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets
import java.util

import com.hortonworks.registries.schemaregistry.SchemaMetadata
import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData}
import com.typesafe.scalalogging.Logger
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.storm.task.{OutputCollector, TopologyContext}
import org.apache.storm.topology.OutputFieldsDeclarer
import org.apache.storm.topology.base.BaseRichBolt
import org.apache.storm.tuple.{Fields, Tuple, Values}

import scala.collection.JavaConversions._


class SerializedWithSchemaToObject extends BaseRichBolt {

  private lazy val log = Logger(this.getClass)
  private var outputCollector: OutputCollector = _

  // Declare schema-related fields to be initialized when this component's prepare() method is called
  private var schemaRegistryClient: SchemaRegistryClient = _
  private var deserializer: AvroSnapshotDeserializer = _
  private var truckDataSchemaMetadata: SchemaMetadata = _
  private var trafficDataSchemaMetadata: SchemaMetadata = _

  override def prepare(stormConf: util.Map[_, _], context: TopologyContext, collector: OutputCollector): Unit = {

    outputCollector = collector

    val schemaRegistryUrl = stormConf.get(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name()).toString
    val clientConfig = Map(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name() -> schemaRegistryUrl)

    schemaRegistryClient = new SchemaRegistryClient(clientConfig)
    truckDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("EnrichedTruckData").getSchemaMetadata
    trafficDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("TrafficData").getSchemaMetadata
    deserializer = schemaRegistryClient.getDefaultDeserializer(AvroSchemaProvider.TYPE).asInstanceOf[AvroSnapshotDeserializer]
    deserializer.init(clientConfig)
  }

  override def execute(tuple: Tuple): Unit = {

    // Deserialize each tuple and convert it into its proper case class (e.g. EnrichedTruckData or TrafficData)
    val str = tuple.getStringByField("data").getBytes(StandardCharsets.UTF_8)
    log.info(s"str2: ${tuple.getStringByField("data")}")
    val bytes = new ByteArrayInputStream(str)
    log.info(s"bytes: $bytes")
    val (dataType, data) = tuple.getStringByField("dataType") match {
      case typ @ "EnrichedTruckData" =>
        log.info(s"des: ${deserializer.deserialize(bytes, null)}")
        (typ, recordToEnrichedTruckData(deserializer.deserialize(bytes, null).asInstanceOf[GenericData.Record]))
      case typ @ "TrafficData" =>
        log.info(s"des: ${deserializer.deserialize(bytes, null)}")
        (typ, recordToTrafficData(deserializer.deserialize(bytes, null).asInstanceOf[GenericData.Record]))
    }

    outputCollector.emit(new Values(data, dataType))
    outputCollector.ack(tuple)
  }

  override def declareOutputFields(declarer: OutputFieldsDeclarer): Unit = declarer.declare(new Fields("data", "dataType"))

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData =
    EnrichedTruckData(
      r.get("eventTime").toString.toLong,
      r.get("truckId").toString.toInt,
      r.get("driverId").toString.toInt,
      r.get("driverName").toString,
      r.get("routeId").toString.toInt,
      r.get("routeName").toString,
      r.get("latitude").toString.toDouble,
      r.get("longitude").toString.toDouble,
      r.get("speed").toString.toInt,
      r.get("eventType").toString,
      r.get("foggy").toString.toInt,
      r.get("rainy").toString.toInt,
      r.get("windy").toString.toInt)

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToTrafficData(r: GenericRecord): TrafficData =
    TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt)
}

Source File: NiFiPacketWithSchemaToObject.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.storm.bolts

import java.io.ByteArrayInputStream
import java.util

import com.hortonworks.registries.schemaregistry.SchemaMetadata
import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData}
import com.typesafe.scalalogging.Logger
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.nifi.storm.NiFiDataPacket
import org.apache.storm.task.{OutputCollector, TopologyContext}
import org.apache.storm.topology.OutputFieldsDeclarer
import org.apache.storm.topology.base.BaseRichBolt
import org.apache.storm.tuple.{Fields, Tuple, Values}

import scala.collection.JavaConversions._


class NiFiPacketWithSchemaToObject extends BaseRichBolt {

  private lazy val log = Logger(this.getClass)
  private var outputCollector: OutputCollector = _

  // Declare schema-related fields to be initialized when this component's prepare() method is called
  private var schemaRegistryClient: SchemaRegistryClient = _
  private var deserializer: AvroSnapshotDeserializer = _
  private var truckDataSchemaMetadata: SchemaMetadata = _
  private var trafficDataSchemaMetadata: SchemaMetadata = _

  override def prepare(stormConf: util.Map[_, _], context: TopologyContext, collector: OutputCollector): Unit = {

    outputCollector = collector

    val schemaRegistryUrl = stormConf.get(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name()).toString
    val clientConfig = Map(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name() -> schemaRegistryUrl)

    schemaRegistryClient = new SchemaRegistryClient(clientConfig)
    truckDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("EnrichedTruckData").getSchemaMetadata
    trafficDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("TrafficData").getSchemaMetadata
    deserializer = schemaRegistryClient.getDefaultDeserializer(AvroSchemaProvider.TYPE).asInstanceOf[AvroSnapshotDeserializer]
    deserializer.init(clientConfig)
  }

  override def execute(tuple: Tuple): Unit = {
    val dp = tuple.getValueByField("nifiDataPacket").asInstanceOf[NiFiDataPacket]

    // Deserialize each tuple and convert it into its proper case class (e.g. EnrichedTruckData or TrafficData)
    val (dataType, data) = dp.getAttributes.get("dataType") match {
      case typ @ "EnrichedTruckData" => (typ, recordToEnrichedTruckData(deserializer.deserialize(new ByteArrayInputStream(dp.getContent), null).asInstanceOf[GenericData.Record]))
      case typ @ "TrafficData" => (typ, recordToTrafficData(deserializer.deserialize(new ByteArrayInputStream(dp.getContent), null).asInstanceOf[GenericData.Record]))
    }

    outputCollector.emit(new Values(data, dataType))
    outputCollector.ack(tuple)
  }

  override def declareOutputFields(declarer: OutputFieldsDeclarer): Unit = declarer.declare(new Fields("data", "dataType"))

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData =
    EnrichedTruckData(
      r.get("eventTime").toString.toLong,
      r.get("truckId").toString.toInt,
      r.get("driverId").toString.toInt,
      r.get("driverName").toString,
      r.get("routeId").toString.toInt,
      r.get("routeName").toString,
      r.get("latitude").toString.toDouble,
      r.get("longitude").toString.toDouble,
      r.get("speed").toString.toInt,
      r.get("eventType").toString,
      r.get("foggy").toString.toInt,
      r.get("rainy").toString.toInt,
      r.get("windy").toString.toInt)

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToTrafficData(r: GenericRecord): TrafficData =
    TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt)
}

Source File: BytesWithSchemaToObject.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.storm.bolts

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets
import java.util

import com.hortonworks.registries.schemaregistry.SchemaMetadata
import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData}
import com.typesafe.scalalogging.Logger
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.storm.task.{OutputCollector, TopologyContext}
import org.apache.storm.topology.OutputFieldsDeclarer
import org.apache.storm.topology.base.BaseRichBolt
import org.apache.storm.tuple.{Fields, Tuple, Values}

import scala.collection.JavaConversions._



  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData =
    EnrichedTruckData(
      r.get("eventTime").toString.toLong,
      r.get("truckId").toString.toInt,
      r.get("driverId").toString.toInt,
      r.get("driverName").toString,
      r.get("routeId").toString.toInt,
      r.get("routeName").toString,
      r.get("latitude").toString.toDouble,
      r.get("longitude").toString.toDouble,
      r.get("speed").toString.toInt,
      r.get("eventType").toString,
      r.get("foggy").toString.toInt,
      r.get("rainy").toString.toInt,
      r.get("windy").toString.toInt)

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToTrafficData(r: GenericRecord): TrafficData =
    TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt)
}

Source File: RegisterNodeSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.RegisterNode
import justin.db.consistenthashing.NodeId
import org.scalatest.{FlatSpec, Matchers}

class RegisterNodeSerializerTest extends FlatSpec with Matchers {

  behavior of "RegisterNode Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[RegisterNode], RegisterNodeSerializer)

    // object
    val serializedData = RegisterNode(NodeId(1))

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[RegisterNode])

    serializedData shouldBe deserializedData
  }
}

Source File: DataSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class DataSerializerTest extends FlatSpec with Matchers {

  behavior of "Data Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[justin.db.Data], DataSerializer)

    // object
    val vClock         = VectorClock[NodeId](Map(NodeId(1) -> Counter(3)))
    val timestamp      = System.currentTimeMillis()
    val serializedData = Data(id = UUID.randomUUID(), value = "some value", vClock, timestamp)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[Data])

    serializedData shouldBe deserializedData
  }
}

Source File: StorageNodeWriteDataLocalSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.actors.protocol.StorageNodeWriteDataLocal
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeWriteDataLocalSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeWriteDataLocal Serializer"

  it should "serialize/deserialize StorageNodeWriteDataLocal" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer)

    // object
    val data = Data(
      id        = UUID.randomUUID(),
      value     = "some value",
      vclock    = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))),
      timestamp = System.currentTimeMillis()
    )
    val serializedData = StorageNodeWriteDataLocal(data)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeWriteDataLocal])

    serializedData shouldBe deserializedData
  }
}

Source File: StorageNodeLocalReadSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.StorageNodeLocalRead
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeLocalReadSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeLocalReader Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeLocalRead], StorageNodeLocalReadSerializer)

    // object
    val serializedData = StorageNodeLocalRead(UUID.randomUUID())

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeLocalRead])

    serializedData shouldBe deserializedData
  }
}

Source File: RocksDBStorage.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData}
import org.rocksdb.{FlushOptions, Options, RocksDB}

import scala.concurrent.Future

// TODO:
// Current version store every single data under one file (totally doesn't care about data originality).
// Data should be eventually splitted by ring partitionId.
// This might be an issue during possible data movements between nodes.
final class RocksDBStorage(dir: File) extends PluggableStorageProtocol {
  import RocksDBStorage._

  {
    RocksDB.loadLibrary()
  }

  private[this] val kryo = new Kryo()

  private[this] val db: RocksDB = {
    val options: Options = new Options().setCreateIfMissing(true)
    RocksDB.open(options, dir.getPath)
  }

  override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = {
    val key: Array[Byte] = uuid2bytes(kryo, id)
    val dataBytes: Array[Byte] = db.get(key)

    val justinDataOpt = Option(dataBytes).map { dataBytes =>
      val input = new Input(new ByteArrayInputStream(dataBytes))
      JustinDataSerializer.read(kryo, input, classOf[JustinData])
    }

    Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None))
  }

  override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = {
    val key: Array[Byte] = uuid2bytes(kryo, data.id)
    val dataBytes: Array[Byte] = {
      val output = new Output(new ByteArrayOutputStream())
      JustinDataSerializer.write(kryo, output, data)
      output.getBuffer
    }

    db.put(key, dataBytes)
    db.flush(new FlushOptions().setWaitForFlush(true))

    Ack.future
  }
}

object RocksDBStorage {

  def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = {
    val output = new Output(new ByteArrayOutputStream(), 16)
    UUIDSerializer.write(kryo, output, id)
    output.getBuffer
  }

  object UUIDSerializer extends Serializer[UUID] {
    override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = {
      new UUID(input.readLong, input.readLong)
    }

    override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = {
      output.writeLong(uuid.getMostSignificantBits)
      output.writeLong(uuid.getLeastSignificantBits)
    }
  }

  object JustinDataSerializer extends Serializer[JustinData] {
    override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = {
      JustinData(
        id        = UUIDSerializer.read(kryo, input, classOf[UUID]),
        value     = input.readString(),
        vclock    = input.readString(),
        timestamp = input.readLong()
      )
    }

    override def write(kryo: Kryo, output: Output, data: JustinData): Unit = {
      UUIDSerializer.write(kryo, output, data.id)
      output.writeString(data.value)
      output.writeString(data.vclock)
      output.writeLong(data.timestamp)
    }
  }
}

Source File: JustinDataSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.storage.RocksDBStorage.JustinDataSerializer
import org.scalatest.{FlatSpec, Matchers}

class JustinDataSerializerTest extends FlatSpec with Matchers {

  behavior of "JustinDataSerializer"

  it should "serialize/deserialize JustinData with Kryo" in {
    val kryo = new Kryo()
    val data = JustinData(
      id        = UUID.randomUUID,
      value     = "to jest przykladowa wartość",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )

    // serialize
    val output = new Output(new ByteArrayOutputStream())
    JustinDataSerializer.write(kryo, output, data)
    val dataBytes = output.getBuffer

    // deserialize
    val input = new Input(new ByteArrayInputStream(dataBytes))
    JustinDataSerializer.read(kryo, input, classOf[JustinData]) shouldBe data
  }
}

Source File: UUIDSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.io.ByteArrayInputStream
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.Input
import justin.db.storage.RocksDBStorage.UUIDSerializer
import org.scalatest.{FlatSpec, Matchers}

class UUIDSerializerTest extends FlatSpec with Matchers {

  behavior of "UUIDSerializer"

  it should "serialize/deserialize UUID with Kryo" in {
    val uuid = UUID.randomUUID()
    val kryo = new Kryo()

    // serialize
    val bytes = RocksDBStorage.uuid2bytes(kryo, uuid)

    // deserialize
    val input = new Input(new ByteArrayInputStream(bytes))
    val id = UUIDSerializer.read(kryo, input, classOf[UUID])

    uuid shouldBe id
  }
}

Source File: AmqpXPathCheckMaterializer.scala From gatling-amqp-plugin with Apache License 2.0

5 votes

package ru.tinkoff.gatling.amqp.checks

import java.io.{ByteArrayInputStream, InputStreamReader}

import io.gatling.commons.validation.{safely, _}
import io.gatling.core.check.xpath.{Dom, XPathCheckType, XmlParsers}
import io.gatling.core.check.{CheckMaterializer, Preparer}
import org.xml.sax.InputSource
import ru.tinkoff.gatling.amqp.AmqpCheck
import ru.tinkoff.gatling.amqp.request.AmqpProtocolMessage

class AmqpXPathCheckMaterializer(xmlParsers: XmlParsers)
    extends CheckMaterializer[XPathCheckType, AmqpCheck, AmqpProtocolMessage, Option[Dom]](identity) {
  private val ErrorMapper = "Could not parse response into a DOM Document: " + _

  override protected def preparer: Preparer[AmqpProtocolMessage, Option[Dom]] =
    message =>
      safely(ErrorMapper) {
        message match {
          case AmqpProtocolMessage(_, payload, _) =>
            val in = new ByteArrayInputStream(payload)
            Some(xmlParsers.parse(new InputSource(new InputStreamReader(in)))).success
          case _ => "Unsupported message type".failure
        }
      }
}

Source File: AmqpJsonPathCheckMaterializer.scala From gatling-amqp-plugin with Apache License 2.0

5 votes

package ru.tinkoff.gatling.amqp.checks

import java.io.ByteArrayInputStream
import java.nio.charset.Charset

import io.gatling.core.check.jsonpath.JsonPathCheckType
import io.gatling.core.check.{CheckMaterializer, Preparer}
import io.gatling.core.json.JsonParsers
import ru.tinkoff.gatling.amqp.AmqpCheck
import ru.tinkoff.gatling.amqp.request.AmqpProtocolMessage

import scala.util.Try

class AmqpJsonPathCheckMaterializer(jsonParsers: JsonParsers)
  extends CheckMaterializer[JsonPathCheckType, AmqpCheck, AmqpProtocolMessage, Any](identity) {
  override protected def preparer: Preparer[AmqpProtocolMessage, Any] =
    AmqpJsonPathCheckMaterializer.jsonPathPreparer(jsonParsers)
}

object AmqpJsonPathCheckMaterializer {
  private val CharsParsingThreshold = 200 * 1000

  private def jsonPathPreparer(jsonParsers: JsonParsers): Preparer[AmqpProtocolMessage, Any] =
    replyMessage => {
      val bodyCharset = Try(Charset.forName(replyMessage.amqpProperties.getContentEncoding))
        .getOrElse(Charset.defaultCharset())

      if (replyMessage.payload.length > CharsParsingThreshold)
        jsonParsers.safeParse(new ByteArrayInputStream(replyMessage.payload), bodyCharset)
      else
        jsonParsers.safeParse(new String(replyMessage.payload, bodyCharset))
    }
}

Source File: JQHttpClient.scala From ledger-manager-chrome with MIT License

5 votes

package co.ledger.manager.web.core.net

import java.io.ByteArrayInputStream

import co.ledger.wallet.core.net.{BasicHttpRequestLogger, HttpClient, HttpRequestExecutor, HttpRequestLogger}
import co.ledger.manager.web.core.utils.JQueryHelper
import org.scalajs.jquery.JQueryXHR

import scala.concurrent.ExecutionContext
import scala.scalajs.js


class JQHttpClient(override val baseUrl: String) extends HttpClient {
  override implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.Implicits.global

  override protected val executor: HttpRequestExecutor = new HttpRequestExecutor {
    override def execute(responseBuilder: co.ledger.wallet.core.net.HttpClient#ResponseBuilder): Unit = {
      val request = responseBuilder.request
      val headers = js.Dictionary[js.Any]()
      request.headers foreach {
        case (key, value) =>
          headers(key) = value.toString
      }
      try {
        JQueryHelper.$.ajax(js.Dictionary[js.Any](
          "method" -> request.method,
          "url" -> request.url,
          "headers" -> headers,
          "timeout" -> (request.readTimeout.toMillis + request.connectionTimeout.toMillis),
          "data" -> request.bodyAsString,
          "complete" -> { (xhr: JQueryXHR, status: String) =>
            responseBuilder.statusCode = xhr.status
            responseBuilder.statusMessage = xhr.statusText
            responseBuilder.bodyEncoding = "utf-8"
            if (xhr.status != 0)
              responseBuilder.body = new ByteArrayInputStream(xhr.responseText.getBytes)
            responseBuilder.build()
          }
        ))
      } catch {
        case er: Throwable =>
          responseBuilder.failure(er)
      }
      request.body.close()
    }
  }
  override var defaultLogger: HttpRequestLogger = new BasicHttpRequestLogger
}

object JQHttpClient {
  val etcInstance = new JQHttpClient("https://api.ledgerwallet.com/blockchain/v2/ethc")
  val ethInstance = new JQHttpClient("https://api.ledgerwallet.com/blockchain/v2/eth")
}

Source File: SerializableSerializerTest.scala From spark-util with Apache License 2.0

5 votes

package org.hammerlab.hadoop.kryo

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream }

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{ Input, Output }
import org.hammerlab.test.Suite

class SerializableSerializerTest
  extends Suite {
  test("serde") {
    val kryo = new Kryo()
    kryo.setRegistrationRequired(true)
    val baos = new ByteArrayOutputStream()
    val output = new Output(baos)

    val foo = new Foo
    foo.n = 123
    foo.s = "abc"

    intercept[IllegalArgumentException] {
      kryo.writeClassAndObject(output, foo)
    }
    .getMessage should startWith("Class is not registered: org.hammerlab.hadoop.kryo.Foo")

    kryo.register(classOf[Foo], SerializableSerializer[Foo]())

    kryo.writeClassAndObject(output, foo)

    output.close()

    val bytes = baos.toByteArray
    bytes.length should be(93)

    val bais = new ByteArrayInputStream(bytes)

    val input = new Input(bais)
    val after = kryo.readClassAndObject(input).asInstanceOf[Foo]

    after.n should be(foo.n)
    after.s should be(foo.s)
  }
}

class Foo
  extends Serializable {

  var n = 0
  var s = ""

  private def writeObject(out: ObjectOutputStream): Unit = {
    out.writeInt(n)
    out.writeUTF(s)
  }

  private def readObject(in: ObjectInputStream): Unit = {
    n = in.readInt()
    s = in.readUTF()
  }
}

Source File: CloudFrontSigner.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.s3
import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets.UTF_8
import java.security.PrivateKey
import java.time.Instant
import java.util.Date

import akka.http.scaladsl.model.Uri
import com.amazonaws.auth.PEM
import com.amazonaws.services.cloudfront.CloudFrontUrlSigner
import com.amazonaws.services.cloudfront.util.SignerUtils
import com.amazonaws.services.cloudfront.util.SignerUtils.Protocol

import scala.concurrent.duration._

case class CloudFrontConfig(domainName: String,
                            keyPairId: String,
                            privateKey: String,
                            timeout: FiniteDuration = 10.minutes)

case class CloudFrontSigner(config: CloudFrontConfig) extends UrlSigner {
  private val privateKey = createPrivateKey(config.privateKey)

  override def getSignedURL(s3ObjectKey: String): Uri = {
    val resourcePath = SignerUtils.generateResourcePath(Protocol.https, config.domainName, s3ObjectKey)
    val date = Date.from(Instant.now().plusSeconds(config.timeout.toSeconds))
    val url = CloudFrontUrlSigner.getSignedURLWithCannedPolicy(resourcePath, config.keyPairId, privateKey, date)
    Uri(url)
  }

  override def toString: String = s"CloudFront Signer - ${config.domainName}"

  private def createPrivateKey(keyContent: String): PrivateKey = {
    val is = new ByteArrayInputStream(keyContent.getBytes(UTF_8))
    PEM.readPrivateKey(is)
  }
}

Source File: DefaultRowReader.scala From mleap with Apache License 2.0

5 votes

package ml.combust.mleap.binary

import java.io.{ByteArrayInputStream, DataInputStream}
import java.nio.charset.Charset

import ml.combust.mleap.runtime.serialization.{BuiltinFormats, RowReader}
import ml.combust.mleap.core.types.StructType
import ml.combust.mleap.runtime.frame.{ArrayRow, Row}
import resource._

import scala.util.Try


class DefaultRowReader(override val schema: StructType) extends RowReader {
  private val serializers = schema.fields.map(_.dataType).map(ValueSerializer.serializerForDataType)

  override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[Row] = {
    (for(in <- managed(new ByteArrayInputStream(bytes))) yield {
      val din = new DataInputStream(in)
      val row = ArrayRow(new Array[Any](schema.fields.length))
      var i = 0
      for(s <- serializers) {
        row.set(i, s.read(din))
        i = i + 1
      }
      row
    }).tried
  }
}

Source File: DefaultFrameReader.scala From mleap with Apache License 2.0

5 votes

package ml.combust.mleap.binary

import java.io.{ByteArrayInputStream, DataInputStream}
import java.nio.charset.Charset

import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameReader}
import ml.combust.mleap.core.types.StructType
import ml.combust.mleap.json.JsonSupport._
import ml.combust.mleap.runtime.frame.{ArrayRow, DefaultLeapFrame, Row}
import spray.json._
import resource._

import scala.collection.mutable
import scala.util.Try


class DefaultFrameReader extends FrameReader {
  override def fromBytes(bytes: Array[Byte],
                         charset: Charset = BuiltinFormats.charset): Try[DefaultLeapFrame] = {
    (for(in <- managed(new ByteArrayInputStream(bytes))) yield {
      val din = new DataInputStream(in)
      val length = din.readInt()
      val schemaBytes = new Array[Byte](length)
      din.readFully(schemaBytes)
      val schema = new String(schemaBytes, BuiltinFormats.charset).parseJson.convertTo[StructType]
      val serializers = schema.fields.map(_.dataType).map(ValueSerializer.serializerForDataType)
      val rowCount = din.readInt()
      val rows = mutable.WrappedArray.make[Row](new Array[Row](rowCount))

      for(i <- 0 until rowCount) {
        val row = new ArrayRow(new Array[Any](schema.fields.length))

        var j = 0
        for(s <- serializers) {
          row.set(j, s.read(din))
          j = j + 1
        }

        rows(i) = row
      }

      DefaultLeapFrame(schema, rows)
    }).tried
  }
}

Source File: XGBoostRegressionOp.scala From mleap with Apache License 2.0

5 votes

package ml.combust.mleap.xgboost.runtime.bundle.ops

import java.io.ByteArrayInputStream
import java.nio.file.Files

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl.{Model, Value}
import ml.combust.bundle.op.OpModel
import ml.combust.mleap.bundle.ops.MleapOp
import ml.combust.mleap.runtime.MleapContext
import ml.combust.mleap.xgboost.runtime.{XGBoostRegression, XGBoostRegressionModel}
import ml.dmlc.xgboost4j.scala.XGBoost
import resource._


class XGBoostRegressionOp extends MleapOp[XGBoostRegression, XGBoostRegressionModel] {
  override val Model: OpModel[MleapContext, XGBoostRegressionModel] = new OpModel[MleapContext, XGBoostRegressionModel] {
    override val klazz: Class[XGBoostRegressionModel] = classOf[XGBoostRegressionModel]

    override def opName: String = "xgboost.regression"

    override def store(model: Model, obj: XGBoostRegressionModel)
                      (implicit context: BundleContext[MleapContext]): Model = {
      val out = Files.newOutputStream(context.file("xgboost.model"))
      obj.booster.saveModel(out)

      model
        .withValue("num_features", Value.int(obj.numFeatures))
        .withValue("tree_limit", Value.int(obj.treeLimit))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[MleapContext]): XGBoostRegressionModel = {
      val bytes = Files.readAllBytes(context.file("xgboost.model"))
      val booster = XGBoost.loadModel(new ByteArrayInputStream(bytes))
      val treeLimit = model.value("tree_limit").getInt

      XGBoostRegressionModel(booster,
        numFeatures = model.value("num_features").getInt,
        treeLimit = treeLimit)
    }
  }

  override def model(node: XGBoostRegression): XGBoostRegressionModel = node.model
}

Source File: TestSpec.scala From spark-distcp with Apache License 2.0

5 votes

package com.coxautodata

import java.io.ByteArrayInputStream
import java.nio.file.Files

import com.coxautodata.objects.SerializableFileStatus
import com.coxautodata.utils.FileListing
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path}
import org.scalatest.{BeforeAndAfterEach, FunSpec, Matchers}

trait TestSpec extends FunSpec with Matchers with BeforeAndAfterEach {

  var testingBaseDir: java.nio.file.Path = _
  var testingBaseDirName: String = _
  var testingBaseDirPath: Path = _
  var localFileSystem: LocalFileSystem = _

  override def beforeEach(): Unit = {
    super.beforeEach()
    testingBaseDir = Files.createTempDirectory("test_output")
    testingBaseDirName = testingBaseDir.toString
    localFileSystem = FileSystem.getLocal(new Configuration())
    testingBaseDirPath = localFileSystem.makeQualified(new Path(testingBaseDirName))
  }

  override def afterEach(): Unit = {
    super.afterEach()
    FileUtils.deleteDirectory(testingBaseDir.toFile)
  }

  def createFile(relativePath: Path, content: Array[Byte]): SerializableFileStatus = {
    val path = new Path(testingBaseDirPath, relativePath)
    localFileSystem.mkdirs(path.getParent)
    val in = new ByteArrayInputStream(content)
    val out = localFileSystem.create(path)
    IOUtils.copy(in, out)
    in.close()
    out.close()
    SerializableFileStatus(localFileSystem.getFileStatus(path))
  }

  def fileStatusToResult(f: SerializableFileStatus): FileListing = {
    FileListing(f.getPath.toString, if (f.isFile) Some(f.getLen) else None)
  }

}

Source File: DesignSerializationTest.scala From airframe with Apache License 2.0

5 votes

package wvlet.airframe

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import DesignTest._
import wvlet.airspec.AirSpec

object DesignSerializationTest {
  def serialize(d: Design): Array[Byte] = {
    val b  = new ByteArrayOutputStream()
    val oo = new ObjectOutputStream(b)
    oo.writeObject(d)
    oo.close()
    b.toByteArray
  }

  def deserialize(b: Array[Byte]): Design = {
    val in  = new ByteArrayInputStream(b)
    val oi  = new ObjectInputStream(in)
    val obj = oi.readObject().asInstanceOf[Design]
    obj.asInstanceOf[Design]
  }
}


class DesignSerializationTest extends AirSpec {
  import DesignSerializationTest._

  def `be serializable`: Unit = {
    val b   = serialize(d1)
    val d1s = deserialize(b)
    d1s shouldBe (d1)
  }

  def `serialize instance binding`: Unit = {
    val d  = Design.blanc.bind[Message].toInstance(Hello("world"))
    val b  = serialize(d)
    val ds = deserialize(b)
    ds shouldBe (d)
  }
}

Source File: SerializationTest.scala From airframe with Apache License 2.0

5 votes

package wvlet.log

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import wvlet.log.io.IOUtil

object SerializationTest {
  trait A extends LogSupport {
    debug("new A")
    def hello = debug("hello")
  }
}


class SerializationTest extends Spec {
  import SerializationTest._

  def `logger should be serializable`: Unit = {
    val a = new A {}
    val b = new ByteArrayOutputStream()
    IOUtil.withResource(new ObjectOutputStream(b)) { out => out.writeObject(a) }
    val ser = b.toByteArray
    IOUtil.withResource(new ObjectInputStream(new ByteArrayInputStream(ser))) { in =>
      debug("deserialization")
      val a = in.readObject().asInstanceOf[A]
      a.hello
    }
  }
}

Source File: TypeInformationDataInputFormat.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.dataformats

import java.io.{ByteArrayInputStream, EOFException, InputStream}

import com.amazon.milan.dataformats.DataInputFormat
import com.amazon.milan.typeutil.TypeDescriptor
import org.apache.flink.api.common.ExecutionConfig
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.core.memory.DataInputViewStreamWrapper



class TypeInformationDataInputFormat[T](typeInfo: TypeInformation[T]) extends DataInputFormat[T] {
  @transient private lazy val serializer = this.createSerializer()

  override def getGenericArguments: List[TypeDescriptor[_]] = {
    // This class is not intended to be serialized by GenericTypedJsonSerializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = {
    // This class is not intended to be deserialized by GenericTypedJsonDeserializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = {
    val input = new DataInputViewStreamWrapper(new ByteArrayInputStream(bytes, offset, length))
    Some(this.serializer.deserialize(input))
  }

  override def readValues(stream: InputStream): TraversableOnce[T] = {
    val input = new DataInputViewStreamWrapper(stream)
    Stream.continually(0)
      .map(_ =>
        try {
          Some(this.serializer.deserialize(input))
        }
        catch {
          case _: EOFException => None
        })
      .takeWhile(_.isDefined)
      .map(_.get)
  }

  private def createSerializer(): TypeSerializer[T] = {
    val config = new ExecutionConfig()
    this.typeInfo.createSerializer(config)
  }
}

Source File: ObjectStreamUtil.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.testutil

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

object ObjectStreamUtil {
  def serializeAndDeserialize[T](value: T): T = {
    val outputStream = new ByteArrayOutputStream()
    val objectOutputStream = new ObjectOutputStream(outputStream)
    objectOutputStream.writeObject(value)

    val bytes = outputStream.toByteArray
    val objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytes))
    objectInputStream.readObject().asInstanceOf[T]
  }
}

Source File: package.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.amazon.milan.compiler.flink.runtime.{UnwrapRecordsMapFunction, WrapRecordsMapFunction}
import com.amazon.milan.compiler.flink.testing.IntKeyValueRecord
import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.core.memory.{DataInputView, DataInputViewStreamWrapper, DataOutputView, DataOutputViewStreamWrapper}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.datastream.DataStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment

import scala.language.implicitConversions
import scala.util.Random


package object testutil {
  def getTestExecutionEnvironment: StreamExecutionEnvironment = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setBufferTimeout(0)
    env
  }

  def copyWithSerializer[T](value: T, serializer: TypeSerializer[T]): T = {
    val outputStream = new ByteArrayOutputStream()
    val outputView = new DataOutputViewStreamWrapper(outputStream)
    serializer.serialize(value, outputView)

    val bytes = outputStream.toByteArray
    val inputStream = new ByteArrayInputStream(bytes)
    val inputView = new DataInputViewStreamWrapper(inputStream)
    serializer.deserialize(inputView)
  }

  def copyData[T](writeValue: DataOutputView => Unit, readValue: DataInputView => T): T = {
    val outputStream = new ByteArrayOutputStream()
    val outputView = new DataOutputViewStreamWrapper(outputStream)
    writeValue(outputView)

    val bytes = outputStream.toByteArray
    val inputStream = new ByteArrayInputStream(bytes)
    val inputView = new DataInputViewStreamWrapper(inputStream)
    readValue(inputView)
  }

  def generateIntKeyValueRecords(recordCount: Int, keyCount: Int, maxValue: Int): List[IntKeyValueRecord] = {
    val rand = new Random(0)
    List.tabulate(recordCount)(_ => IntKeyValueRecord(rand.nextInt(keyCount), rand.nextInt(maxValue + 1)))
  }

  implicit class WrappedDataStreamExtensions[T >: Null, TKey >: Null <: Product](dataStream: DataStream[RecordWrapper[T, TKey]]) {
    def unwrap(recordTypeInformation: TypeInformation[T]): DataStream[T] = {
      val mapper = new UnwrapRecordsMapFunction[T, TKey](recordTypeInformation)
      this.dataStream.map(mapper)
    }

    def unwrap(): DataStream[T] = {
      val recordType = this.dataStream.getType.asInstanceOf[RecordWrapperTypeInformation[T, TKey]].valueTypeInformation
      this.unwrap(recordType)
    }
  }

  implicit class DataStreamExtensions[T >: Null](dataStream: DataStream[T]) {
    def wrap(recordTypeInformation: TypeInformation[T]): DataStream[RecordWrapper[T, Product]] = {
      val mapper = new WrapRecordsMapFunction[T](recordTypeInformation)
      this.dataStream.map(mapper)
    }

    def wrap(): DataStream[RecordWrapper[T, Product]] = {
      val recordType = this.dataStream.asInstanceOf[ResultTypeQueryable[T]].getProducedType
      this.wrap(recordType)
    }
  }

}

Source File: HiveQlParserImplTest.scala From schedoscope with Apache License 2.0

5 votes

package org.schedoscope.lineage.parser

import java.io.ByteArrayInputStream

import org.apache.calcite.avatica.util.Casing
import org.apache.calcite.sql.{SqlCall, SqlKind, SqlSelect}
import org.scalatest.{FlatSpec, Matchers}

class HiveQlParserImplTest extends FlatSpec with Matchers {
  "The HiveQlParserImpl" should "parse the <=> operator correctly" in {
    val sql = "SELECT * FROM a WHERE x <=> y"
    val stream = new ByteArrayInputStream(sql.getBytes)
    val parser = new HiveQlParserImpl(stream)
    parser.setIdentifierMaxLength(255)
    parser.setUnquotedCasing(Casing.UNCHANGED)

    val sqlNode = parser.parseSqlStmtEof
    val select = sqlNode.asInstanceOf[SqlSelect]
    val where = select.getWhere.asInstanceOf[SqlCall]
    where.getOperator.getKind should be(SqlKind.EQUALS)
  }
}

Source File: CodecFactory.scala From OAP with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.oap.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream}

import scala.collection.mutable

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodec}
import org.apache.hadoop.util.ReflectionUtils
import org.apache.parquet.format.{CompressionCodec => ParquetCodec}
import org.apache.parquet.hadoop.metadata.CompressionCodecName

// This is a simple version of parquet's CodeFactory.
// TODO: [linhong] Need change this into Scala Code style
private[oap] class CodecFactory(conf: Configuration) {

  private val compressors = new mutable.HashMap[ParquetCodec, BytesCompressor]
  private val decompressors = new mutable.HashMap[ParquetCodec, BytesDecompressor]
  private val codecByName = new mutable.HashMap[String, CompressionCodec]

  private def getCodec(codecString: String): Option[CompressionCodec] = {
    codecByName.get(codecString) match {
      case Some(codec) => Some(codec)
      case None =>
        val codecName = CompressionCodecName.valueOf(codecString)
        val codecClass = codecName.getHadoopCompressionCodecClass
        if (codecClass == null) {
          None
        } else {
          val codec = ReflectionUtils.newInstance(codecClass, conf).asInstanceOf[CompressionCodec]
          codecByName.put(codecString, codec)
          Some(codec)
        }
    }
  }

  def getCompressor(codec: ParquetCodec): BytesCompressor = {
    compressors.getOrElseUpdate(codec, new BytesCompressor(getCodec(codec.name)))
  }

  def getDecompressor(codec: ParquetCodec): BytesDecompressor = {
    decompressors.getOrElseUpdate(codec, new BytesDecompressor(getCodec(codec.name)))
  }

  def release(): Unit = {
    compressors.values.foreach(_.release())
    compressors.clear()
    decompressors.values.foreach(_.release())
    decompressors.clear()
  }
}

private[oap] class BytesCompressor(compressionCodec: Option[CompressionCodec]) {

  private lazy val compressedOutBuffer = new ByteArrayOutputStream()
  private lazy val compressor = compressionCodec match {
    case Some(codec) => CodecPool.getCompressor(codec)
    case None => null
  }

  def compress(bytes: Array[Byte]): Array[Byte] = {
    compressionCodec match {
      case Some(codec) =>
        compressedOutBuffer.reset()
        // null compressor for non-native gzip
        if (compressor != null) {
          compressor.reset()
        }
        val cos = codec.createOutputStream(compressedOutBuffer, compressor)
        cos.write(bytes)
        cos.finish()
        cos.close()
        compressedOutBuffer.toByteArray
      case None => bytes
    }
  }

  def release(): Unit = CodecPool.returnCompressor(compressor)
}

private[oap] class BytesDecompressor(compressionCodec: Option[CompressionCodec]) {

  private lazy val decompressor = compressionCodec match {
    case Some(codec) => CodecPool.getDecompressor(codec)
    case None => null
  }

  def decompress(bytes: Array[Byte], uncompressedSize: Int): Array[Byte] = {
    compressionCodec match {
      case Some(codec) =>
        decompressor.reset()
        val cis = codec.createInputStream(new ByteArrayInputStream(bytes), decompressor)
        val decompressed = new Array[Byte](uncompressedSize)
        new DataInputStream(cis).readFully(decompressed)
        decompressed
      case None => bytes
    }
  }

  def release(): Unit = CodecPool.returnDecompressor(decompressor)
}

Source File: Command.scala From scala-ssh with Apache License 2.0

5 votes

package com.decodified.scalassh

import net.schmizz.sshj.connection.channel.direct.Session
import java.io.{ FileInputStream, File, ByteArrayInputStream, InputStream }

case class Command(command: String, input: CommandInput = CommandInput.NoInput, timeout: Option[Int] = None)

object Command {
  implicit def string2Command(cmd: String) = Command(cmd)
}

case class CommandInput(inputStream: Option[InputStream])

object CommandInput {
  lazy val NoInput = CommandInput(None)
  implicit def apply(input: String, charsetName: String = "UTF8"): CommandInput = apply(input.getBytes(charsetName))
  implicit def apply(input: Array[Byte]): CommandInput = apply(Some(new ByteArrayInputStream(input)))
  implicit def apply(input: InputStream): CommandInput = apply(Some(input))
  def fromFile(file: String): CommandInput = fromFile(new File(file))
  def fromFile(file: File): CommandInput = new FileInputStream(file)
  def fromResource(resource: String): CommandInput = getClass.getClassLoader.getResourceAsStream(resource)
}

class CommandResult(val channel: Session.Command) {
  def stdErrStream: InputStream = channel.getErrorStream
  def stdOutStream: InputStream = channel.getInputStream
  lazy val stdErrBytes = new StreamCopier().emptyToByteArray(stdErrStream)
  lazy val stdOutBytes = new StreamCopier().emptyToByteArray(stdOutStream)
  def stdErrAsString(charsetname: String = "utf8") = new String(stdErrBytes, charsetname)
  def stdOutAsString(charsetname: String = "utf8") = new String(stdOutBytes, charsetname)
  lazy val exitSignal: Option[String] = Option(channel.getExitSignal).map(_.toString)
  lazy val exitCode: Option[Int] = Option(channel.getExitStatus)
  lazy val exitErrorMessage: Option[String] = Option(channel.getExitErrorMessage)
}

Source File: CCGBankToCabochaFormat.scala From jigg with Apache License 2.0

5 votes

package jigg.nlp.ccg


object CCGBankToCabochaFormat {

  case class Opts(
    @Help(text="Path to CCGBank file") ccgbank: File = new File(""),
    @Help(text="Path to output") output: File = new File(""),
    @Help(text="Cabocha command (path to cabocha)") cabocha: String = "cabocha"
  )

  type Tree = ParseTree[NodeLabel]

  def main(args:Array[String]) = {
    val opts = CommandLineParser.readIn[Opts](args)

    val dict = new JapaneseDictionary()
    val extractors = TreeExtractor(
      new JapaneseParseTreeConverter(dict),
      new CCGBankReader)

    val trees = extractors.readTrees(opts.ccgbank, -1, true)
    val rawString = trees map (extractors.treeConv.toSentenceFromLabelTree) map (_.wordSeq.mkString("")) mkString ("\n")
    val is = new java.io.ByteArrayInputStream(rawString.getBytes("UTF-8"))
    val out = (Process(s"${opts.cabocha} -f1") #< is).lineStream_!

    val os = jigg.util.IOUtil.openOut(opts.output.getPath)
    out foreach { line =>
      os.write(line + "\n")
    }
    os.flush
    os.close
  }
}

Source File: HadoopConfig.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.fsio

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import scala.language.implicitConversions

import org.apache.hadoop.conf.Configuration

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.util.Constants._

class HadoopConfig(config: UserConfig) {

  def withHadoopConf(conf: Configuration): UserConfig = {
    config.withBytes(HADOOP_CONF, serializeHadoopConf(conf))
  }

  def hadoopConf: Configuration = deserializeHadoopConf(config.getBytes(HADOOP_CONF).get)

  private def serializeHadoopConf(conf: Configuration): Array[Byte] = {
    val out = new ByteArrayOutputStream()
    val dataOut = new DataOutputStream(out)
    conf.write(dataOut)
    dataOut.close()
    out.toByteArray
  }

  private def deserializeHadoopConf(bytes: Array[Byte]): Configuration = {
    val in = new ByteArrayInputStream(bytes)
    val dataIn = new DataInputStream(in)
    val result = new Configuration()
    result.readFields(dataIn)
    dataIn.close()
    result
  }
}

object HadoopConfig {
  def empty: HadoopConfig = new HadoopConfig(UserConfig.empty)
  def apply(config: UserConfig): HadoopConfig = new HadoopConfig(config)

  implicit def userConfigToHadoopConfig(userConf: UserConfig): HadoopConfig = {
    HadoopConfig(userConf)
  }
}

Source File: WindowsPluginFrontendSpec.scala From protoc-bridge with Apache License 2.0

5 votes

package protocbridge.frontend

import java.io.ByteArrayInputStream

import protocbridge.ProtocCodeGenerator

import scala.sys.process.ProcessLogger
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers

class WindowsPluginFrontendSpec extends AnyFlatSpec with Matchers {
  if (PluginFrontend.isWindows) {
    it must "execute a program that forwards input and output to given stream" in {
      val toSend = "ping"
      val toReceive = "pong"

      val fakeGenerator = new ProtocCodeGenerator {
        override def run(request: Array[Byte]): Array[Byte] = {
          request mustBe toSend.getBytes
          toReceive.getBytes
        }
      }
      val (path, state) = WindowsPluginFrontend.prepare(fakeGenerator)
      val actualOutput = scala.collection.mutable.Buffer.empty[String]
      val process = sys.process
        .Process(path.toAbsolutePath.toString)
        .#<(new ByteArrayInputStream(toSend.getBytes))
        .run(ProcessLogger(o => actualOutput.append(o)))
      process.exitValue()
      actualOutput.mkString mustBe toReceive
      WindowsPluginFrontend.cleanup(state)
    }
  }
}

Source File: PluginFrontendSpec.scala From protoc-bridge with Apache License 2.0

5 votes

package protocbridge.frontend

import java.io.ByteArrayInputStream

import com.google.protobuf.compiler.PluginProtos.CodeGeneratorResponse
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers

class PluginFrontendSpec
    extends AnyFlatSpec
    with Matchers
    with ScalaCheckDrivenPropertyChecks {
  def expected(error: String) =
    CodeGeneratorResponse.newBuilder().setError(error).build()

  def actual(error: String) =
    CodeGeneratorResponse.parseFrom(
      PluginFrontend.createCodeGeneratorResponseWithError(error)
    )

  "createCodeGeneratorResponseWithError" should "create valid objects" in {
    actual("") must be(expected(""))
    actual("foo") must be(expected("foo"))
    actual("\u2035") must be(expected("\u2035"))
    actual("a" * 128) must be(expected("a" * 128))
    actual("a" * 256) must be(expected("a" * 256))
    actual("\u3714\u3715" * 256) must be(expected("\u3714\u3715" * 256))
    actual("abc" * 1000) must be(expected("abc" * 1000))
    forAll(MinSuccessful(1000)) { s: String =>
      actual(s) must be(expected(s))
    }

  }

  "readInputStreamToByteArray" should "read the input stream to a byte array" in {
    def readInput(bs: Array[Byte]) =
      PluginFrontend.readInputStreamToByteArray(new ByteArrayInputStream(bs))

    readInput(Array.empty) must be(Array())
    readInput(Array[Byte](1, 2, 3, 4)) must be(Array(1, 2, 3, 4))
    val special = Array.tabulate[Byte](10000) { n =>
      (n % 37).toByte
    }
    readInput(special) must be(special)
  }
}

Source File: StreamingParserSpec.scala From cormorant with MIT License

5 votes

package io.chrisdavenport.cormorant
package fs2

import cats.data.NonEmptyList
import cats.effect._
import cats.effect.testing.specs2.CatsIO
import _root_.fs2.Stream
import io.chrisdavenport.cormorant._
// import io.chrisdavenport.cormorant.implicits._
// import scala.concurrent.duration._
import java.io.ByteArrayInputStream
import java.io.InputStream

class StreamingParserSpec extends CormorantSpec with CatsIO {

  def ruinDelims(str: String) = augmentString(str).flatMap {
    case '\n' => "\r\n"
    case c => c.toString
  }

  "Streaming Parser" should {
    // https://github.com/ChristopherDavenport/cormorant/pull/84
    "parse a known value that did not work with streaming" in {
      val x = """First Name,Last Name,Email
Larry,Bordowitz,[email protected]
Anonymous,Hippopotamus,[email protected]"""
      val source = IO.pure(new ByteArrayInputStream(ruinDelims(x).getBytes): InputStream)
      Stream.resource(Blocker[IO]).flatMap{blocker => 
        _root_.fs2.io.readInputStream(
          source,
          chunkSize = 4,
          blocker
        )
      }
        .through(_root_.fs2.text.utf8Decode)
        .through(parseComplete[IO])
        .compile
        .toVector
        .map{ v => 
          val header = CSV.Headers(NonEmptyList.of(CSV.Header("First Name"), CSV.Header("Last Name"), CSV.Header("Email")))
          val row1 = CSV.Row(NonEmptyList.of(CSV.Field("Larry"), CSV.Field("Bordowitz"), CSV.Field("[email protected]")))
          val row2 = CSV.Row(NonEmptyList.of(CSV.Field("Anonymous"), CSV.Field("Hippopotamus"), CSV.Field("[email protected]")))
          Vector(
            (header, row1),
            (header, row2)
          ) must_=== v
        }
    }
  }

  

}

Source File: TestHelper.scala From spark-summit-2018 with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

}

@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

}

Source File: HDFSCredentialProvider.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.deploy.yarn.security

import java.io.{ByteArrayInputStream, DataInputStream}

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.mapred.Master
import org.apache.hadoop.security.Credentials

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._

private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging {
  // Token renewal interval, this value will be set in the first call,
  // if None means no token renewer specified, so cannot get token renewal interval.
  private var tokenRenewalInterval: Option[Long] = null

  override val serviceName: String = "hdfs"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    // NameNode to access, used to get tokens from different FileSystems
    nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
      val dstFs = dst.getFileSystem(hadoopConf)
      logInfo("getting token for namenode: " + dst)
      dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds)
    }

    // Get the token renewal interval if it is not set. It will only be called once.
    if (tokenRenewalInterval == null) {
      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf)
    }

    // Get the time of next renewal.
    tokenRenewalInterval.map { interval =>
      creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .map { t =>
          val identifier = new DelegationTokenIdentifier()
          identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
          identifier.getIssueDate + interval
      }.foldLeft(0L)(math.max)
    }
  }

  private def getTokenRenewalInterval(
      hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = {
    // We cannot use the tokens generated with renewer yarn. Trying to renew
    // those will fail with an access control issue. So create new tokens with the logged in
    // user as renewer.
    sparkConf.get(PRINCIPAL).flatMap { renewer =>
      val creds = new Credentials()
      nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
        val dstFs = dst.getFileSystem(hadoopConf)
        dstFs.addDelegationTokens(renewer, creds)
      }
      val hdfsToken = creds.getAllTokens.asScala
        .find(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
      hdfsToken.map { t =>
        val newExpiration = t.renew(hadoopConf)
        val identifier = new DelegationTokenIdentifier()
        identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
        val interval = newExpiration - identifier.getIssueDate
        logInfo(s"Renewal Interval is $interval")
        interval
      }
    }
  }

  private def getTokenRenewer(conf: Configuration): String = {
    val delegTokenRenewer = Master.getMasterPrincipal(conf)
    logDebug("delegation token renewer is: " + delegTokenRenewer)
    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
      logError(errorMessage)
      throw new SparkException(errorMessage)
    }

    delegTokenRenewer
  }

  private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
    sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet +
      sparkConf.get(STAGING_DIR).map(new Path(_))
        .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory)
  }
}

Source File: GenericAvroSerializerSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: CompressionCodecSuite.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import org.scalatest.FunSuite

import org.apache.spark.SparkConf

class CompressionCodecSuite extends FunSuite {
  val conf = new SparkConf(false)

  def testCodec(codec: CompressionCodec) {
    // Write 1000 integers to the output stream, compressed.
    val outputStream = new ByteArrayOutputStream()
    val out = codec.compressedOutputStream(outputStream)
    for (i <- 1 until 1000) {
      out.write(i % 256)
    }
    out.close()

    // Read the 1000 integers back.
    val inputStream = new ByteArrayInputStream(outputStream.toByteArray)
    val in = codec.compressedInputStream(inputStream)
    for (i <- 1 until 1000) {
      assert(in.read() === i % 256)
    }
    in.close()
  }

  test("default compression codec") {
    val codec = CompressionCodec.createCodec(conf)
    assert(codec.getClass === classOf[SnappyCompressionCodec])
    testCodec(codec)
  }

  test("lz4 compression codec") {
    val codec = CompressionCodec.createCodec(conf, classOf[LZ4CompressionCodec].getName)
    assert(codec.getClass === classOf[LZ4CompressionCodec])
    testCodec(codec)
  }

  test("lz4 compression codec short form") {
    val codec = CompressionCodec.createCodec(conf, "lz4")
    assert(codec.getClass === classOf[LZ4CompressionCodec])
    testCodec(codec)
  }

  test("lzf compression codec") {
    val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
    assert(codec.getClass === classOf[LZFCompressionCodec])
    testCodec(codec)
  }

  test("lzf compression codec short form") {
    val codec = CompressionCodec.createCodec(conf, "lzf")
    assert(codec.getClass === classOf[LZFCompressionCodec])
    testCodec(codec)
  }

  test("snappy compression codec") {
    val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
    assert(codec.getClass === classOf[SnappyCompressionCodec])
    testCodec(codec)
  }

  test("snappy compression codec short form") {
    val codec = CompressionCodec.createCodec(conf, "snappy")
    assert(codec.getClass === classOf[SnappyCompressionCodec])
    testCodec(codec)
  }

  test("bad compression codec") {
    intercept[IllegalArgumentException] {
      CompressionCodec.createCodec(conf, "foobar")
    }
  }
}

Source File: package.scala From chronicler with Apache License 2.0

5 votes

package com.github.fsanaulla.chronicler.core

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}

package object gzip {

  
  def compress(data: Array[Byte]): (Int, Array[Byte]) = {
    val bArrOut    = new ByteArrayOutputStream()
    val gzippedOut = new GZIPOutputStream(bArrOut)

    gzippedOut.write(data)
    gzippedOut.close()

    val gzippedData   = bArrOut.toByteArray
    val contentLength = gzippedData.length
    contentLength -> gzippedData
  }

  def decompress(data: Array[Byte]): Array[Byte] = {
    val gis = new GZIPInputStream(new ByteArrayInputStream(data))
    val out = new ByteArrayOutputStream()
    val buf = new Array[Byte](1024)

    var res = 0
    while (res >= 0) {
      res = gis.read(buf, 0, buf.length)
      if (res > 0) out.write(buf, 0, res)
    }

    out.toByteArray
  }
}

Source File: S3Util.scala From redshift-fake-driver with Apache License 2.0

5 votes

package jp.ne.opt.redshiftfake

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.charset.StandardCharsets
import java.util.zip.GZIPOutputStream

import com.amazonaws.services.s3.AmazonS3
import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest}
import jp.ne.opt.redshiftfake.util.Loan.using
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream

object S3Util {

   def loadGzippedDataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = {
    val arrayOutputStream = new ByteArrayOutputStream()
    using(new GZIPOutputStream(arrayOutputStream)) (gzipOutStream => {
      gzipOutStream.write(data.getBytes(StandardCharsets.UTF_8))
    })
    val buf = arrayOutputStream.toByteArray
    val metadata = new ObjectMetadata
    metadata.setContentLength(buf.length)
    val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata)

    s3Client.putObject(request)
  }

   def loadBzipped2DataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = {
    val arrayOutputStream = new ByteArrayOutputStream()
    using(new BZip2CompressorOutputStream(arrayOutputStream)) (bzip2OutStream => {
      bzip2OutStream.write(data.getBytes(StandardCharsets.UTF_8))
    })
    val buf = arrayOutputStream.toByteArray
    val metadata = new ObjectMetadata
    metadata.setContentLength(buf.length)
    val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata)

    s3Client.putObject(request)
  }

   def loadDataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = {
    val buf = data.getBytes
    val metadata = new ObjectMetadata
    metadata.setContentLength(buf.length)
    val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata)

    s3Client.putObject(request)
  }
}

Source File: AvroTypeSpec.scala From shapeless-datatype with Apache License 2.0

5 votes

package shapeless.datatype.avro

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.net.URI
import java.nio.ByteBuffer

import com.google.protobuf.ByteString
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
import org.joda.time.Instant
import org.scalacheck.Prop.forAll
import org.scalacheck.ScalacheckShapeless._
import org.scalacheck._
import shapeless._
import shapeless.datatype.record._

import scala.reflect.runtime.universe._

object AvroTypeSpec extends Properties("AvroType") {
  import shapeless.datatype.test.Records._
  import shapeless.datatype.test.SerializableUtils._

  implicit def compareByteArrays(x: Array[Byte], y: Array[Byte]) = java.util.Arrays.equals(x, y)
  implicit def compareIntArrays(x: Array[Int], y: Array[Int]) = java.util.Arrays.equals(x, y)

  def roundTrip[A: TypeTag, L <: HList](m: A)(implicit
    gen: LabelledGeneric.Aux[A, L],
    fromL: FromAvroRecord[L],
    toL: ToAvroRecord[L],
    mr: MatchRecord[L]
  ): Boolean = {
    val t = ensureSerializable(AvroType[A])
    val f1: SerializableFunction[A, GenericRecord] =
      new SerializableFunction[A, GenericRecord] {
        override def apply(m: A): GenericRecord = t.toGenericRecord(m)
      }
    val f2: SerializableFunction[GenericRecord, Option[A]] =
      new SerializableFunction[GenericRecord, Option[A]] {
        override def apply(m: GenericRecord): Option[A] = t.fromGenericRecord(m)
      }
    val toFn = ensureSerializable(f1)
    val fromFn = ensureSerializable(f2)
    val copy = fromFn(roundTripRecord(toFn(m)))
    val rm = RecordMatcher[A]
    copy.exists(rm(_, m))
  }

  def roundTripRecord(r: GenericRecord): GenericRecord = {
    val writer = new GenericDatumWriter[GenericRecord](r.getSchema)
    val baos = new ByteArrayOutputStream()
    val encoder = EncoderFactory.get().binaryEncoder(baos, null)
    writer.write(r, encoder)
    encoder.flush()
    baos.close()
    val bytes = baos.toByteArray

    val reader = new GenericDatumReader[GenericRecord](r.getSchema)
    val bais = new ByteArrayInputStream(bytes)
    val decoder = DecoderFactory.get().binaryDecoder(bais, null)
    reader.read(null, decoder)
  }

  implicit val byteStringAvroType = AvroType.at[ByteString](Schema.Type.BYTES)(
    v => ByteString.copyFrom(v.asInstanceOf[ByteBuffer]),
    v => ByteBuffer.wrap(v.toByteArray)
  )
  implicit val instantAvroType =
    AvroType.at[Instant](Schema.Type.LONG)(v => new Instant(v.asInstanceOf[Long]), _.getMillis)
  property("required") = forAll { m: Required => roundTrip(m) }
  property("optional") = forAll { m: Optional => roundTrip(m) }
  property("repeated") = forAll { m: Repeated => roundTrip(m) }
  property("mixed") = forAll { m: Mixed => roundTrip(m) }
  property("nested") = forAll { m: Nested => roundTrip(m) }
  property("seqs") = forAll { m: Seqs => roundTrip(m) }

  implicit val uriAvroType =
    AvroType.at[URI](Schema.Type.STRING)(v => URI.create(v.toString), _.toString)
  property("custom") = forAll { m: Custom => roundTrip(m) }
}

Source File: SerializableUtils.scala From protobuf-generic with Apache License 2.0

5 votes

package me.lyh.protobuf.generic.test

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

object SerializableUtils {
  private def serializeToByteArray(value: Serializable): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  def ensureSerializable[T <: Serializable](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
}

Source File: SerializableUtils.scala From protobuf-generic with Apache License 2.0

5 votes

package me.lyh.protobuf.generic.test

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

object SerializableUtils {
  private def serializeToByteArray(value: Serializable): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  def ensureSerializable[T <: Serializable](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
}

Source File: ProtobufGenericSpec.scala From protobuf-generic with Apache License 2.0

5 votes

package me.lyh.protobuf.generic.test

import java.io.ByteArrayInputStream
import java.nio.ByteBuffer

import com.google.protobuf.{ByteString, Message}
import me.lyh.protobuf.generic._
import me.lyh.protobuf.generic.proto2.Schemas._

import scala.reflect.ClassTag
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ProtobufGenericSpec extends AnyFlatSpec with Matchers {
  def roundTrip[T <: Message: ClassTag](record: T): Unit = {
    val schema = SerializableUtils.ensureSerializable(Schema.of[T])
    val schemaCopy = Schema.fromJson(schema.toJson)
    schemaCopy shouldBe schema

    val reader = SerializableUtils.ensureSerializable(GenericReader.of(schema))
    val writer = SerializableUtils.ensureSerializable(GenericWriter.of(schema))
    val jsonRecord = reader.read(record.toByteArray).toJson
    jsonRecord shouldBe reader.read(ByteBuffer.wrap(record.toByteArray)).toJson
    jsonRecord shouldBe reader.read(new ByteArrayInputStream(record.toByteArray)).toJson
    val bytes = writer.write(GenericRecord.fromJson(jsonRecord))

    val recordCopy = ProtobufType[T].parseFrom(bytes)
    recordCopy shouldBe record
  }

  "ProtobufGeneric" should "round trip required" in {
    roundTrip[Required](Records.required)
  }

  it should "round trip optional" in {
    roundTrip[Optional](Records.optional)
    roundTrip[Optional](Records.optionalEmpty)
  }

  it should "round trip repeated" in {
    roundTrip[Repeated](Records.repeated)
    roundTrip[Repeated](Records.repeatedEmpty)
    roundTrip[RepeatedPacked](Records.repeatedPacked)
    roundTrip[RepeatedUnpacked](Records.repeatedUnpacked)
  }

  it should "round trip oneofs" in {
    Records.oneOfs.foreach(roundTrip[OneOf])
  }

  it should "round trip mixed" in {
    roundTrip[Mixed](Records.mixed)
    roundTrip[Mixed](Records.mixedEmpty)
  }

  it should "round trip nested" in {
    roundTrip[Nested](Records.nested)
    roundTrip[Nested](Records.nestedEmpty)
  }

  it should "round trip with custom options" in {
    roundTrip[CustomOptionMessage](Records.customOptionMessage)
    roundTrip[CustomOptionMessage](Records.customOptionMessageEmpty)
  }

  it should "round trip with custom defaults" in {
    roundTrip[CustomDefaults](CustomDefaults.getDefaultInstance)
  }

  it should "populate default values" in {
    val schema = Schema.of[CustomDefaults]
    val record = GenericReader.of(schema).read(CustomDefaults.getDefaultInstance.toByteArray)
    record.get("double_field") shouldBe 101.0
    record.get("float_field") shouldBe 102.0f
    record.get("int32_field") shouldBe 103
    record.get("int64_field") shouldBe 104L
    record.get("uint32_field") shouldBe 105
    record.get("uint64_field") shouldBe 106L
    record.get("sint32_field") shouldBe 107
    record.get("sint64_field") shouldBe 108L
    record.get("fixed32_field") shouldBe 109
    record.get("fixed64_field") shouldBe 110L
    record.get("sfixed32_field") shouldBe 111
    record.get("sfixed64_field") shouldBe 112L
    record.get("bool_field") shouldBe true
    record.get("string_field") shouldBe "hello"
    record.get("bytes_field") shouldBe
      Base64.encode(ByteString.copyFromUtf8("world").toByteArray)
    record.get("color_field") shouldBe "GREEN"
  }
}

Source File: ProtobufTypeSpec.scala From protobuf-generic with Apache License 2.0

5 votes

package me.lyh.protobuf.generic.test

import java.io.ByteArrayInputStream

import com.google.protobuf.CodedInputStream
import me.lyh.protobuf.generic._
import me.lyh.protobuf.generic.proto2.Schemas._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ProtobufTypeSpec extends AnyFlatSpec with Matchers {
  private val pt = ProtobufType[Optional]
  private val record = Records.optional

  "ProtobufType.descriptor" should "work" in {
    pt.descriptor shouldBe Optional.getDescriptor
  }

  "ProtobufType.newBuilder" should "work" in {
    pt.newBuilder().build() shouldBe Optional.newBuilder().build()
  }

  "ProtobufType.parseFrom" should "support byte array" in {
    pt.parseFrom(record.toByteArray) shouldBe record
  }

  it should "support ByteString" in {
    pt.parseFrom(record.toByteString) shouldBe record
  }

  it should "support InputStream" in {
    pt.parseFrom(new ByteArrayInputStream(record.toByteArray)) shouldBe record
  }

  it should "support CodedInputStream" in {
    pt.parseFrom(CodedInputStream.newInstance(record.toByteArray)) shouldBe record
  }
}

Source File: OdfExtract.scala From docspell with GNU General Public License v3.0

5 votes

package docspell.extract.odf

import java.io.{ByteArrayInputStream, InputStream}

import scala.util.Try

import cats.effect._
import cats.implicits._
import fs2.Stream

import docspell.extract.internal.Text

import org.apache.tika.metadata.Metadata
import org.apache.tika.parser.ParseContext
import org.apache.tika.parser.odf.OpenDocumentParser
import org.apache.tika.sax.BodyContentHandler

object OdfExtract {

  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)

  def get(is: InputStream) =
    Try {
      val handler  = new BodyContentHandler()
      val pctx     = new ParseContext()
      val meta     = new Metadata()
      val ooparser = new OpenDocumentParser()
      ooparser.parse(is, handler, meta, pctx)
      Text(Option(handler.toString))
    }.toEither

}

Source File: RtfExtract.scala From docspell with GNU General Public License v3.0

5 votes

package docspell.extract.rtf

import java.io.{ByteArrayInputStream, InputStream}
import javax.swing.text.rtf.RTFEditorKit

import scala.util.Try

import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.common.MimeType
import docspell.extract.internal.Text

object RtfExtract {

  val rtfType = MimeType.application("rtf")

  def get(is: InputStream): Either[Throwable, Text] =
    Try {
      val kit = new RTFEditorKit()
      val doc = kit.createDefaultDocument()
      kit.read(is, doc, 0)
      Text(doc.getText(0, doc.getLength))
    }.toEither

  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
}

Source File: PoiExtract.scala From docspell with GNU General Public License v3.0

5 votes

package docspell.extract.poi

import java.io.{ByteArrayInputStream, InputStream}

import scala.util.Try

import cats.data.EitherT
import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.common._
import docspell.extract.internal.Text
import docspell.files.TikaMimetype

import org.apache.poi.hssf.extractor.ExcelExtractor
import org.apache.poi.hssf.usermodel.HSSFWorkbook
import org.apache.poi.hwpf.extractor.WordExtractor
import org.apache.poi.xssf.extractor.XSSFExcelExtractor
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import org.apache.poi.xwpf.extractor.XWPFWordExtractor
import org.apache.poi.xwpf.usermodel.XWPFDocument

object PoiExtract {

  def get[F[_]: Sync](
      data: Stream[F, Byte],
      hint: MimeTypeHint
  ): F[Either[Throwable, Text]] =
    TikaMimetype.detect(data, hint).flatMap(mt => get(data, mt))

  def get[F[_]: Sync](
      data: Stream[F, Byte],
      mime: MimeType
  ): F[Either[Throwable, Text]] =
    mime match {
      case PoiType.doc =>
        getDoc(data)
      case PoiType.xls =>
        getXls(data)
      case PoiType.xlsx =>
        getXlsx(data)
      case PoiType.docx =>
        getDocx(data)
      case PoiType.msoffice =>
        EitherT(getDoc[F](data))
          .recoverWith({
            case _ => EitherT(getXls[F](data))
          })
          .value
      case PoiType.ooxml =>
        EitherT(getDocx[F](data))
          .recoverWith({
            case _ => EitherT(getXlsx[F](data))
          })
          .value
      case mt =>
        Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}")))
    }

  def getDocx(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new XWPFWordExtractor(new XWPFDocument(is))
      Text(Option(xt.getText))
    }.toEither

  def getDoc(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new WordExtractor(is)
      Text(Option(xt.getText))
    }.toEither

  def getXlsx(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new XSSFExcelExtractor(new XSSFWorkbook(is))
      Text(Option(xt.getText))
    }.toEither

  def getXls(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new ExcelExtractor(new HSSFWorkbook(is))
      Text(Option(xt.getText))
    }.toEither

  def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx)

  def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc)

  def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx)

  def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls)

}

Source File: ImageSize.scala From docspell with GNU General Public License v3.0

5 votes

package docspell.files

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.file.Path
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
import javax.imageio.{ImageIO, ImageReader}

import scala.jdk.CollectionConverters._
import scala.util.{Try, Using}

import cats.effect._
import cats.implicits._
import fs2.Stream

object ImageSize {

  
  def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] =
    data.take(768).compile.to(Array).map { ar =>
      val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar))
      if (iis == null) sys.error("no reader given for the array")
      else getDimension(iis)
    }

  private def getDimension(in: ImageInputStream): Option[Dimension] =
    ImageIO
      .getImageReaders(in)
      .asScala
      .to(LazyList)
      .collectFirst(Function.unlift { reader =>
        val dim = getDimension(in, reader).toOption
        reader.dispose()
        dim
      })

  private def getDimension(
      in: ImageInputStream,
      reader: ImageReader
  ): Either[Throwable, Dimension] =
    Try {
      reader.setInput(in)
      val width  = reader.getWidth(reader.getMinIndex)
      val height = reader.getHeight(reader.getMinIndex)
      Dimension(width, height)
    }.toEither
}

Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
}

Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
}

Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
}

Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
}

Source File: FileInputImpl.scala From chatoverflow with Eclipse Public License 2.0

5 votes

package org.codeoverflow.chatoverflow.requirement.service.file.impl

import java.awt.image.BufferedImage
import java.io.ByteArrayInputStream
import java.util.Optional

import javax.imageio.ImageIO
import org.codeoverflow.chatoverflow.WithLogger
import org.codeoverflow.chatoverflow.api.io.input.FileInput
import org.codeoverflow.chatoverflow.registry.Impl
import org.codeoverflow.chatoverflow.requirement.impl.InputImpl
import org.codeoverflow.chatoverflow.requirement.service.file.FileConnector

@Impl(impl = classOf[FileInput], connector = classOf[FileConnector])
class FileInputImpl extends InputImpl[FileConnector] with FileInput with WithLogger {

  override def getFile(pathInResources: String): Optional[String] = Optional.ofNullable(sourceConnector.get.getFile(pathInResources).orNull)

  override def getBinaryFile(pathInResources: String): Optional[Array[Byte]] = Optional.ofNullable(sourceConnector.get.getBinaryFile(pathInResources).orNull)

  override def getImage(pathInResources: String): Optional[BufferedImage] = {
    val data = sourceConnector.get.getBinaryFile(pathInResources)
    if (data.isEmpty) {
      None
    }
    val bis = new ByteArrayInputStream(data.get)
    Optional.of(ImageIO.read(bis))
  }

  override def start(): Boolean = true

  
  override def stop(): Boolean = true
}

Source File: VerifyingSpec.scala From jsoniter-scala with MIT License

5 votes

package com.github.plokhotnyuk.jsoniter_scala.macros

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets.UTF_8

import com.github.plokhotnyuk.jsoniter_scala.core._
import org.scalatest.wordspec.AnyWordSpec
import org.scalatest.matchers.should.Matchers

class VerifyingSpec extends AnyWordSpec with Matchers {
  def verifySerDeser[T](codec: JsonValueCodec[T], obj: T, json: String, cfg: WriterConfig = WriterConfig): Unit = {
    verifySer(codec, obj, json, cfg)
    verifyDeser(codec, obj, json)
  }

  def verifySer[T](codec: JsonValueCodec[T], obj: T, json: String, cfg: WriterConfig = WriterConfig): Unit = {
    val len = json.getBytes(UTF_8).length
    verifyDirectByteBufferSer(codec, obj, len, cfg, json)
    verifyHeapByteBufferSer(codec, obj, len, cfg, json)
    verifyOutputStreamSer(codec, obj, cfg, json)
    verifyArraySer(codec, obj, cfg, json)
  }

  def verifyDeser[T](codec: JsonValueCodec[T], obj: T, json: String): Unit =
    verifyDeserByCheck[T](codec, json, check = (_: T) shouldBe obj)

  def verifyDeserByCheck[T](codec: JsonValueCodec[T], json: String, check: T => Unit): Unit = {
    val jsonBytes = json.getBytes(UTF_8)
    verifyDirectByteBufferDeser(codec, jsonBytes, check)
    verifyHeapByteBufferDeser(codec, jsonBytes, check)
    verifyInputStreamDeser(codec, jsonBytes, check)
    verifyByteArrayDeser(codec, jsonBytes, check)
  }

  def verifyDeserError[T](codec: JsonValueCodec[T], json: String, msg: String): Unit =
    verifyDeserError(codec, json.getBytes(UTF_8), msg)

  def verifyDeserError[T](codec: JsonValueCodec[T], jsonBytes: Array[Byte], msg: String): Unit = {
    assert(intercept[JsonReaderException](verifyDirectByteBufferDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
    assert(intercept[JsonReaderException](verifyHeapByteBufferDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
    assert(intercept[JsonReaderException](verifyInputStreamDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
    assert(intercept[JsonReaderException](verifyByteArrayDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
  }

  def verifyDirectByteBufferSer[T](codec: JsonValueCodec[T], obj: T, len: Int, cfg: WriterConfig, expected: String): Unit = {
    val directBuf = ByteBuffer.allocateDirect(len + 100)
    directBuf.position(0)
    writeToByteBuffer(obj, directBuf, cfg)(codec)
    directBuf.position(0)
    val buf = new Array[Byte](len)
    directBuf.get(buf)
    toString(buf) shouldBe expected
  }

  def verifyHeapByteBufferSer[T](codec: JsonValueCodec[T], obj: T, len: Int, cfg: WriterConfig, expected: String): Unit = {
    val heapBuf = ByteBuffer.wrap(new Array[Byte](len + 100))
    heapBuf.position(0)
    writeToByteBuffer(obj, heapBuf, cfg)(codec)
    heapBuf.position(0)
    val buf = new Array[Byte](len)
    heapBuf.get(buf)
    toString(buf) shouldBe expected
  }

  def verifyOutputStreamSer[T](codec: JsonValueCodec[T], obj: T, cfg: WriterConfig, expected: String): Unit = {
    val baos = new ByteArrayOutputStream
    writeToStream(obj, baos, cfg)(codec)
    toString(baos.toByteArray) shouldBe expected
  }

  def verifyArraySer[T](codec: JsonValueCodec[T], obj: T, cfg: WriterConfig, expected: String): Unit =
    toString(writeToArray(obj, cfg)(codec)) shouldBe expected

  def verifyDirectByteBufferDeser[T](codec: JsonValueCodec[T], json:  Array[Byte], check: T => Unit): Unit = {
    val directBuf = ByteBuffer.allocateDirect(json.length)
    directBuf.put(json)
    directBuf.position(0)
    check(readFromByteBuffer(directBuf)(codec))
  }

  def verifyHeapByteBufferDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit =
    check(readFromByteBuffer(ByteBuffer.wrap(json))(codec))

  def verifyInputStreamDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit =
    check(readFromStream(new ByteArrayInputStream(json))(codec))

  def verifyByteArrayDeser[T](codec: JsonValueCodec[T], json:  Array[Byte], check: T => Unit): Unit =
    check(readFromArray(json)(codec))

  def toString(json:  Array[Byte]): String = new String(json, 0, json.length, UTF_8)
}

Source File: Release.scala From ionroller with MIT License

5 votes

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.nio.file.{Files, Paths}

import com.amazonaws.services.s3.model._
import com.amazonaws.services.s3.transfer.Transfer.TransferState
import com.amazonaws.services.s3.transfer.TransferManager
import com.amazonaws.util.IOUtils
import sbt._

import scalaz.concurrent.Task

object Release {

  lazy val releaseCli = taskKey[Unit]("Releases ION-Roller CLI")

  def release(ver: String, zip: File, install: File) = {
    val files = Seq(
      (install.getName, replaceVersionAndReadBytes(ver, install), "text/plain"),
      (zip.getName, readBytes(zip), "application/zip"))
    val tx = new TransferManager
    val tasks = for {
      f <- files
    } yield uploadFile(tx, f._1, f._2, f._3)
    val t = for {
      results <- Task.gatherUnordered(tasks)
      finalResult = if (results.forall(_ == TransferState.Completed)) TransferState.Completed else TransferState.Failed
      printTask <- Task.delay(println(finalResult))
    } yield printTask
    t.run
  }

  def uploadFile(tx: TransferManager, name: String, getBytes: Task[Array[Byte]], contentType: String): Task[TransferState] = {
    for {
      bytes <- getBytes
      meta <- metadata(bytes, contentType)
      transferState <- upload(tx, bytes, name, meta)
    } yield transferState
  }

  def metadata(bytes: Array[Byte], contentType: String): Task[ObjectMetadata] = {
    Task.delay({
      val out = new ByteArrayOutputStream
      out.write(bytes)
      val metadata = new ObjectMetadata
      metadata.setContentType(contentType)
      val contentBytes = IOUtils.toByteArray(new ByteArrayInputStream(out.toByteArray)).length.toLong
      // we need to call new ByteArrayInputStream again, as checking the length reads the stream
      metadata.setContentLength(contentBytes)
      metadata
    })
  }

  def upload(tx: TransferManager, in: Array[Byte], name: String, meta: ObjectMetadata): Task[TransferState] = {
    Task.delay({
      println(s"Uploading $name...")
      val upload = tx.upload(
        new PutObjectRequest("ionroller-cli", name, new ByteArrayInputStream(in), meta)
          .withCannedAcl(CannedAccessControlList.PublicRead)
      )
      while (!upload.isDone) {
        Thread.sleep(2000)
        println(upload.getProgress.getPercentTransferred.toInt + "%")
      }
      upload.getState
    })
  }

  def replaceVersionAndReadBytes(ver: String, file: File): Task[Array[Byte]] = {
    Task.delay({
      scala.io.Source.fromFile(file).getLines()
        .map(in => if (in startsWith "VERSION=") s"VERSION=$ver" else in)
        .mkString("\n")
        .getBytes
        .toSeq
        .toArray
    })
  }

  def readBytes(file: File): Task[Array[Byte]] = Task.delay({
    Files.readAllBytes(Paths.get(file.getAbsolutePath))
  })

}

Source File: TemplateSpec.scala From cluster-broccoli with Apache License 2.0

5 votes

package de.frosner.broccoli.models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import org.specs2.mutable.Specification
import play.api.libs.json.Json

import Template.{templateApiWrites, templatePersistenceReads}

class TemplateSpec extends Specification {

  "A template" should {

    "extract only parameters specified in the parameters" in {
      Template("test",
               "Hallo {{id}}. I like {{person_name}}.",
               "desc",
               Map("id" -> ParameterInfo("id", None, None, None, ParameterType.Raw, None))).parameters === Set("id")
    }

    "not automatically extract parameters from a template" in {
      Template("test", "Hallo {{id}}, how is {{object}}", "desc", Map.empty).parameters === Set.empty
    }

    "create the template version correctly in" in {
      Template("test", "template JSON", "desc", Map.empty).version === "889df4c8118c30a28ed4f51674a0f19d"
    }

    "result in different template versions if the template JSON differs" in {
      Template("test", "template JSON", "desc", Map.empty).version !== Template("test",
                                                                                "template JSONs",
                                                                                "desc",
                                                                                Map.empty).version
    }

    "result in different template versions if the template parameter info differs" in {
      Template(
        id = "test",
        template = "template JSON {{id}}",
        description = "desc",
        parameterInfos = Map.empty
      ).version !== Template(
        id = "test",
        template = "template JSON {{id}}",
        description = "desc",
        parameterInfos = Map(
          "id" -> ParameterInfo("id",
                                None,
                                None,
                                secret = Some(false),
                                `type` = ParameterType.String,
                                orderIndex = None)
        )
      ).version
    }

  }

  "Template serialization" should {

    "work correctly" in {
      val originalTemplate = Template("test", "Hallo {{name}}", "desc", Map.empty)
      val bos = new ByteArrayOutputStream()
      val oos = new ObjectOutputStream(bos)
      oos.writeObject(originalTemplate)
      oos.close()

      val ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray))
      val deserializedTemplate = ois.readObject()
      ois.close()

      originalTemplate === deserializedTemplate
    }

  }

  "Template back-end JSON serialization" should {

    "work" in {
      val template = Template(
        id = "t",
        template = "{{id}}",
        description = "d",
        parameterInfos = Map.empty
      )
      Json
        .fromJson(Json.toJson(template)(Template.templatePersistenceWrites))(Template.templatePersistenceReads)
        .get === template
    }

  }

}

Source File: JavaSerializationBenchmark.scala From scala-commons with MIT License

5 votes

package com.avsystem.commons
package rpc.akka.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Fork, Measurement, Mode, Scope, State, Warmup}
import org.openjdk.jmh.infra.Blackhole


@Warmup(iterations = 5)
@Measurement(iterations = 20)
@Fork(1)
@BenchmarkMode(Array(Mode.Throughput))
@State(Scope.Thread)
class JavaSerializationBenchmark {

  val something = Something(42, Nested(4 :: 8 :: 15 :: 16 :: 23 :: 42 :: Nil, 0), "lol")
  val array = {
    val baos = new ByteArrayOutputStream()
    val o = new ObjectOutputStream(baos)

    o.writeObject(something)
    o.close()

    baos.toByteArray
  }

  @Benchmark
  def byteStringOutput(): Something = {
    val baos = new ByteArrayOutputStream()
    val o = new ObjectOutputStream(baos)

    o.writeObject(something)
    o.close()

    val array = baos.toByteArray

    new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something]
  }

  @Benchmark
  def writeTest(): Array[Byte] = {
    val baos = new ByteArrayOutputStream()
    val o = new ObjectOutputStream(baos)

    o.writeObject(something)
    o.close()

    baos.toByteArray
  }

  @Benchmark
  def readTest(): Something = {
    new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something]
  }
}

Source File: StreamInputOutputBenchmark.scala From scala-commons with MIT License

5 votes

package com.avsystem.commons
package ser

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import com.avsystem.commons.serialization.{GenCodec, StreamInput, StreamOutput}
import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Fork, Measurement, Mode, Scope, State, Warmup}
import org.openjdk.jmh.infra.Blackhole


case class Toplevel(int: Int, nested: Nested, str: String)
case class Nested(list: List[Int], int: Int)

object Toplevel {
  implicit val nestedCodec: GenCodec[Nested] = GenCodec.materialize[Nested]
  implicit val codec: GenCodec[Toplevel] = GenCodec.materialize[Toplevel]
}

@Warmup(iterations = 10)
@Measurement(iterations = 20)
@Fork(1)
@BenchmarkMode(Array(Mode.Throughput))
@State(Scope.Thread)
class StreamInputOutputBenchmark {

  val something = Toplevel(35, Nested(List(121, 122, 123, 124, 125, 126), 53), "lol")

  val inputArray: Array[Byte] = {
    val os = new ByteArrayOutputStream()

    GenCodec.write(new StreamOutput(new DataOutputStream(os)), something)
    os.toByteArray
  }

  @Benchmark
  def testEncode(bh: Blackhole): Unit = {
    val os = new ByteArrayOutputStream(inputArray.length)
    val output = new StreamOutput(new DataOutputStream(os))
    GenCodec.write(output, something)
    bh.consume(os.toByteArray)
  }

  @Benchmark
  def testDecode(bh: Blackhole): Unit = {
    val is = new DataInputStream(new ByteArrayInputStream(inputArray))
    val input = new StreamInput(is)
    bh.consume(GenCodec.read[Toplevel](input))
  }

  @Benchmark
  def testEncodeRaw(bh: Blackhole): Unit = {
    val os = new ByteArrayOutputStream(inputArray.length)
    val output = new StreamOutput(new DataOutputStream(os))
    val toplevelOutput = output.writeObject()
    toplevelOutput.writeField("int").writeSimple().writeInt(35)
    val nestedOutput = toplevelOutput.writeField("nested").writeObject()
    val listOutput = nestedOutput.writeField("list").writeList()
    listOutput.writeElement().writeSimple().writeInt(121)
    listOutput.writeElement().writeSimple().writeInt(122)
    listOutput.writeElement().writeSimple().writeInt(123)
    listOutput.writeElement().writeSimple().writeInt(124)
    listOutput.writeElement().writeSimple().writeInt(125)
    listOutput.writeElement().writeSimple().writeInt(126)
    listOutput.finish()
    nestedOutput.writeField("int").writeSimple().writeInt(53)
    nestedOutput.finish()
    toplevelOutput.writeField("str").writeSimple().writeString("lol")
    toplevelOutput.finish()
    bh.consume(os.toByteArray)
  }

  @Benchmark
  def testDecodeRaw(bh: Blackhole): Unit = {
    val is = new DataInputStream(new ByteArrayInputStream(inputArray))
    val input = new StreamInput(is)
    val objInput = input.readObject()
    val intField = objInput.nextField().readSimple().readInt()
    val nestedInput = objInput.nextField().readObject()
    val listInput = nestedInput.nextField().readList()
    val listNested = List(
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt()
    )
    listInput.hasNext
    val intNested = nestedInput.nextField().readSimple().readInt()
    nestedInput.hasNext
    val strField = objInput.nextField().readSimple().readString()
    objInput.hasNext
    bh.consume(Toplevel(intField, Nested(listNested, intNested), strField))
  }
}

Source File: StreamGenCodecTest.scala From scala-commons with MIT License

5 votes

package com.avsystem.commons
package serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

class StreamGenCodecTest extends GenCodecRoundtripTest {
  type Raw = Array[Byte]

  def writeToOutput(write: Output => Unit): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    write(new StreamOutput(new DataOutputStream(baos)))
    baos.toByteArray
  }

  def createInput(raw: Array[Byte]): Input =
    new StreamInput(new DataInputStream(new ByteArrayInputStream(raw)))
}

Source File: MessageSerializationSuite.scala From bahir with Apache License 2.0

5 votes

package org.apache.spark.streaming.pubnub

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.ObjectInputStream
import java.io.ObjectOutputStream

import com.google.gson.JsonParser
import com.pubnub.api.models.consumer.pubsub.PNMessageResult

import org.apache.spark.SparkFunSuite

class MessageSerializationSuite extends SparkFunSuite {
  test("Full example") {
    checkMessageSerialization(
      "{\"message\":\"Hello, World!\"}", "channel1",
      "publisher1", "subscription1", System.currentTimeMillis * 10000
    )
  }

  test("Message from channel") {
    checkMessageSerialization("{\"message\":\"Hello, World!\"}", "c", "p", null, 13534398158620385L)
  }

  test("Message from subscription") {
    checkMessageSerialization("{\"message\":\"Hello, World!\"}", null, "p", "s", 13534397812467596L)
  }

  def checkMessageSerialization(payload: String, channel: String,
      publisher: String, subscription: String, timestamp: Long): Unit = {
    val builder = PNMessageResult.builder
      .message(if (payload != null) new JsonParser().parse(payload) else null)
      .channel(channel)
      .publisher(publisher)
      .subscription(subscription)
      .timetoken(timestamp)
    val pubNubMessage = builder.build()
    val sparkMessage = new SparkPubNubMessage
    sparkMessage.message = pubNubMessage

    // serializer
    val byteOutStream = new ByteArrayOutputStream
    val outputStream = new ObjectOutputStream(byteOutStream)
    outputStream.writeObject(sparkMessage)
    outputStream.flush()
    outputStream.close()
    byteOutStream.close()
    val serializedBytes = byteOutStream.toByteArray

    // deserialize
    val byteInStream = new ByteArrayInputStream(serializedBytes)
    val inputStream = new ObjectInputStream(byteInStream)
    val deserializedMessage = inputStream.readObject().asInstanceOf[SparkPubNubMessage]
    inputStream.close()
    byteInStream.close()

    assert(payload.equals(deserializedMessage.getPayload))
    if (channel != null) {
      assert(channel.equals(deserializedMessage.getChannel))
    } else {
      assert(deserializedMessage.getChannel == null)
    }
    if (subscription != null) {
      assert(subscription.equals(deserializedMessage.getSubscription))
    } else {
      assert(deserializedMessage.getSubscription == null)
    }
    assert(publisher.equals(deserializedMessage.getPublisher))
    val unixTimestamp = Math.ceil(timestamp / 10000).longValue()
    assert(unixTimestamp.equals(deserializedMessage.getTimestamp))
  }
}

Source File: TDMLInfosetOutputter.scala From incubator-daffodil with Apache License 2.0

5 votes

package org.apache.daffodil.tdml

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream

import org.apache.daffodil.infoset.DIArray
import org.apache.daffodil.infoset.DIComplex
import org.apache.daffodil.infoset.DISimple
import org.apache.daffodil.infoset.InfosetOutputter
import org.apache.daffodil.infoset.JDOMInfosetInputter
import org.apache.daffodil.infoset.JDOMInfosetOutputter
import org.apache.daffodil.infoset.JsonInfosetInputter
import org.apache.daffodil.infoset.JsonInfosetOutputter
import org.apache.daffodil.infoset.ScalaXMLInfosetInputter
import org.apache.daffodil.infoset.ScalaXMLInfosetOutputter
import org.apache.daffodil.infoset.W3CDOMInfosetInputter
import org.apache.daffodil.infoset.W3CDOMInfosetOutputter
import org.apache.daffodil.infoset.XMLTextInfosetInputter
import org.apache.daffodil.infoset.XMLTextInfosetOutputter

class TDMLInfosetOutputter() extends InfosetOutputter {

  private def implString: String = "daffodil"

  private val jsonStream = new ByteArrayOutputStream()
  private val xmlStream = new ByteArrayOutputStream()

  private val scalaOut = new ScalaXMLInfosetOutputter()
  private val jdomOut = new JDOMInfosetOutputter()
  private val w3cdomOut = new W3CDOMInfosetOutputter()
  private val jsonOut = new JsonInfosetOutputter(jsonStream, false)
  private val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)

  private val outputters = Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)

  override def reset(): Unit = {
    outputters.foreach(_.reset())
  }

  override def startSimple(simple: DISimple): Boolean = {
    if (!outputters.forall(_.startSimple(simple)))
      throw TDMLException("startSimple failed", Some(implString))
    true
  }

  override def endSimple(simple: DISimple): Boolean = {
    if (!outputters.forall(_.endSimple(simple)))
      throw TDMLException("endSimple failed", Some(implString))
    true
  }

  override def startComplex(complex: DIComplex): Boolean = {
    if (!outputters.forall(_.startComplex(complex)))
      throw TDMLException("startComplex failed", Some(implString))
    true
  }

  override def endComplex(complex: DIComplex): Boolean = {
    if (!outputters.forall(_.endComplex(complex)))
      throw TDMLException("endComplex failed", Some(implString))
    true
  }

  override def startArray(array: DIArray): Boolean = {
    if (!outputters.forall(_.startArray(array)))
      throw TDMLException("startArray failed", Some(implString))
    true
  }

  override def endArray(array: DIArray): Boolean = {
    if (!outputters.forall(_.endArray(array)))
      throw TDMLException("endArray failed", Some(implString))
    true
  }

  override def startDocument(): Boolean = {
    if (!outputters.forall(_.startDocument()))
      throw TDMLException("startDocument failed", Some(implString))
    true
  }

  override def endDocument(): Boolean = {
    if (!outputters.forall(_.endDocument()))
      throw TDMLException("endDocument failed", Some(implString))
    true
  }

  def getResult() = scalaOut.getResult

  def toInfosetInputter() = {
    val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
    val jdomIn = new JDOMInfosetInputter(jdomOut.getResult)
    val w3cdomIn = new W3CDOMInfosetInputter(w3cdomOut.getResult)
    val jsonIn = new JsonInfosetInputter(new ByteArrayInputStream(jsonStream.toByteArray))
    val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray))
    new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn))
  }
}

Source File: Implicits.scala From incubator-daffodil with Apache License 2.0

5 votes

package org.apache.daffodil

import java.io.{ ByteArrayInputStream, BufferedInputStream }

import org.apache.daffodil.xml.NS
import org.apache.daffodil.exceptions.Assert
import scala.language.reflectiveCalls
import scala.language.implicitConversions
import scala.language.{ implicitConversions, reflectiveCalls } // silences scala 2.10 warnings

object Implicits {

  object ImplicitsSuppressUnusedImportWarning {
    def apply() = if (scala.math.random.isNaN()) Assert.impossible()
  }

  
  def intercept[T <: AnyRef](body: => Any)(implicit tag: scala.reflect.ClassTag[T]): T = {
    val clazz = tag.runtimeClass.asInstanceOf[Class[T]]
    val caught = try {
      body
      None
    } catch {
      case npe: NullPointerException => throw npe
      case s: scala.util.control.ControlThrowable => throw s
      case u: Throwable => {
        if (!clazz.isAssignableFrom(u.getClass)) {
          throw new InterceptFailedException(
            "Failed to intercept expected exception. Expected '%s' but got '%s'.".format(clazz.getName, u.getClass.getName))
        } else {
          Some(u)
        }
      }
    }
    caught match {
      case None => throw new InterceptFailedException("Failed to intercept any exceptions.")
      case Some(e) => e.asInstanceOf[T]
    }
  }

  class InterceptFailedException(msg: String) extends RuntimeException(msg)

}

Source File: TestSerializationAndLazy.scala From incubator-daffodil with Apache License 2.0

5 votes

package org.apache.daffodil.util

import org.junit.Assert._
import java.io.ByteArrayOutputStream
import java.io.ObjectOutputStream
import java.io.ByteArrayInputStream
import java.io.ObjectInputStream
import org.junit.Test

class ToSerialize extends Serializable {

  val v = 5
  var lazyValWasEvaluated = false
  lazy val x = {
    // println("v is " + v)
    lazyValWasEvaluated = true
    2 * v
  }

}


class TestSerializationAndLazy {

  @Test
  def testSerializeBeforeLazyEval(): Unit = {
    val instance = new ToSerialize
    val baos = new ByteArrayOutputStream
    val stream = new ObjectOutputStream(baos)
    stream.writeObject(instance)
    stream.flush()
    stream.close()
    assertFalse(instance.lazyValWasEvaluated)
    val ba = baos.toByteArray()
    val bais = new ByteArrayInputStream(ba)
    val istream = new ObjectInputStream(bais)
    val restoredInstance = istream.readObject()
    istream.close()
    assertTrue(restoredInstance.isInstanceOf[ToSerialize])
    val ts = restoredInstance.asInstanceOf[ToSerialize]
    assertFalse(ts.lazyValWasEvaluated)
    ts.x
    assertTrue(ts.lazyValWasEvaluated)
  }

}

Source File: TarFlowSpec.scala From nexus with Apache License 2.0

5 votes

package ch.epfl.bluebrain.nexus.storage

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Path, Paths}

import akka.actor.ActorSystem
import akka.stream.alpakka.file.scaladsl.Directory
import akka.stream.scaladsl.{FileIO, Source}
import akka.testkit.TestKit
import akka.util.ByteString
import ch.epfl.bluebrain.nexus.storage.utils.{EitherValues, IOEitherValues, Randomness}
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.io.FileUtils
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatest.{BeforeAndAfterAll, Inspectors, OptionValues}

import scala.annotation.tailrec

class TarFlowSpec
    extends TestKit(ActorSystem("TarFlowSpec"))
    with AnyWordSpecLike
    with Matchers
    with IOEitherValues
    with Randomness
    with EitherValues
    with OptionValues
    with Inspectors
    with BeforeAndAfterAll {

  val basePath = Files.createTempDirectory("tarflow")
  val dir1     = basePath.resolve("one")
  val dir2     = basePath.resolve("two")

  override def afterAll(): Unit = {
    super.afterAll()
    FileUtils.cleanDirectory(basePath.toFile)
    ()
  }

  type PathAndContent = (Path, String)

  "A TarFlow" should {

    Files.createDirectories(dir1)
    Files.createDirectories(dir2)

    def relativize(path: Path): String = basePath.getParent().relativize(path).toString

    "generate the byteString for a tar file correctly" in {
      val file1        = dir1.resolve("file1.txt")
      val file1Content = genString()
      val file2        = dir1.resolve("file3.txt")
      val file2Content = genString()
      val file3        = dir2.resolve("file3.txt")
      val file3Content = genString()
      val files        = List(file1 -> file1Content, file2 -> file2Content, file3 -> file3Content)
      forAll(files) {
        case (file, content) => Source.single(ByteString(content)).runWith(FileIO.toPath(file)).futureValue
      }
      val byteString   = Directory.walk(basePath).via(TarFlow.writer(basePath)).runReduce(_ ++ _).futureValue
      val bytes        = new ByteArrayInputStream(byteString.toArray)
      val tar          = new TarArchiveInputStream(bytes)

      @tailrec def readEntries(
          tar: TarArchiveInputStream,
          entries: List[PathAndContent] = Nil
      ): List[PathAndContent] = {
        val entry = tar.getNextTarEntry
        if (entry == null) entries
        else {
          val data = Array.ofDim[Byte](entry.getSize.toInt)
          tar.read(data)
          readEntries(tar, (Paths.get(entry.getName) -> ByteString(data).utf8String) :: entries)
        }
      }
      val directories = List(relativize(basePath) -> "", relativize(dir1) -> "", relativize(dir2) -> "")
      val untarred    = readEntries(tar).map { case (path, content) => path.toString -> content }
      val expected    = files.map { case (path, content) => relativize(path) -> content } ++ directories
      untarred should contain theSameElementsAs expected
    }
  }

}

Source File: ProcessBuilderUtils.scala From scalastringcourseday7 with Apache License 2.0

5 votes

package util

import java.io.ByteArrayInputStream
import java.nio.charset.{Charset, CodingErrorAction}

import text.StringOption

import scala.collection.mutable.ListBuffer
import scala.io.{Codec, Source}
import scala.sys.process.ProcessBuilder


object ProcessBuilderUtils {
  implicit def processToProcessUtils(repr: ProcessBuilder): ProcessBuilderUtils = {
    new ProcessBuilderUtils(repr)
  }
}

class ProcessBuilderUtils(repr: ProcessBuilder) {
  def lineStream(encoding: Charset,
                 onMalformedInput: CodingErrorAction,
                 onUnmappableCharacter: CodingErrorAction,
                 replacementOpt: StringOption): Iterator[String] = {
    val lines: Iterator[String] = repr.lineStream_!.iterator
    val byteBuffer = ListBuffer.empty[Byte]
    while (lines.hasNext) {
      val line: String = lines.next.trim concat "\n"
      byteBuffer ++= line.getBytes
    }
    implicit val codec = Codec(encoding).
      onMalformedInput(onMalformedInput).
      onUnmappableCharacter(onUnmappableCharacter)
    if (replacementOpt.nonEmpty) {
      codec.decodingReplaceWith(replacementOpt.get)
    }
    Source.fromInputStream(new ByteArrayInputStream(byteBuffer.toArray)).getLines
  }
}

Source File: H2OLoader.scala From ForestFlow with Apache License 2.0

5 votes

package ai.forestflow.serving.impl

import java.io.{ByteArrayInputStream, FileReader}
import java.nio.file.Paths

import ai.forestflow.serving.MLFlow.H2OMLFlowSpec
import ai.forestflow.serving.interfaces.Loader
import cats.syntax.either._
import ai.forestflow.domain.{FQRV, FlavorShim, ServableSettings}
import ai.forestflow.serving.MLFlow.H2OMLFlowSpec
import ai.forestflow.serving.interfaces.Loader
import hex.genmodel.MojoReaderBackendFactory
import hex.genmodel.MojoReaderBackendFactory.CachingStrategy
import io.circe.{Error, yaml}

trait H2OLoader extends Loader {
  def version: Option[String]
  override def createServable(servableBinary: Array[Byte], fqrv: FQRV, settings: ServableSettings)(implicit eCTX: EnvironmentContext): H2OServable = {
    import hex.genmodel.MojoModel

    val mojoReader = MojoReaderBackendFactory.createReaderBackend(
      new ByteArrayInputStream(servableBinary),
      CachingStrategy.MEMORY)

    H2OServable(MojoModel.load(mojoReader), fqrv, settings)
  }
}

case class MLFlowH2OLoader(dataPath: String, version: Option[String]) extends H2OLoader {

  override def getRelativeServablePath(implicit eCTX: EnvironmentContext): String = {
    val json = yaml.parser.parse(new FileReader(Paths.get(eCTX.localDir.getAbsolutePath, dataPath, "h2o.yaml").toFile)) // TODO move "h2o.yaml" constant to configuration

    val h2oSpec = json
      .leftMap(err => err: Error)
      .flatMap(_.as[H2OMLFlowSpec])
      .valueOr(throw _)

    Paths.get(dataPath, h2oSpec.modelFile).toString
  }
}


trait BasicH2OMojoLoader extends H2OLoader  {
  this : FlavorShim with Loader =>
  val mojoPath: String
  val version: Option[String]

  override def getRelativeServablePath(implicit eCTX: EnvironmentContext): String = mojoPath
}

Source File: ParseTests.scala From coursier with Apache License 2.0

5 votes

package coursier.benchmark

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit

import coursier.maven.MavenRepository
import coursier.moduleString
import org.apache.maven.model.io.xpp3.MavenXpp3Reader
import org.openjdk.jmh.annotations._

import scala.concurrent.Await
import scala.concurrent.duration.Duration

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MILLISECONDS)
class ParseTests {

  @Benchmark
  def parseSparkParent(state: TestState): Unit = {
    val t = state.repositories.head.find(
      mod"org.apache.spark:spark-parent_2.12",
      "2.4.0",
      state.fetcher
    ).run
    val e = Await.result(t.future()(state.ec), Duration.Inf)
    assert(e.isRight)
  }

  @Benchmark
  def parseSparkParentXmlDom(state: TestState): Unit = {
    val content = state.inMemoryCache.fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom")
    val res = MavenRepository.parseRawPomDom(content)
    assert(res.isRight)
  }

  @Benchmark
  def parseSparkParentXmlSax(state: TestState): Unit = {
    val content = state.inMemoryCache.fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom")
    val res = MavenRepository.parseRawPomSax(content)
    assert(res.isRight)
  }

  @Benchmark
  def parseApacheParent(state: TestState): Unit = {
    val t = state.repositories.head.find(
      mod"org.apache:apache",
      "18",
      state.fetcher
    ).run
    val e = Await.result(t.future()(state.ec), Duration.Inf)
    assert(e.isRight)
  }

  @Benchmark
  def parseSparkParentMavenModel(state: TestState): Unit = {
    val b = state
      .inMemoryCache
      .fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom")
      .getBytes(StandardCharsets.UTF_8)
    val reader = new MavenXpp3Reader
    val model = reader.read(new ByteArrayInputStream(b))
  }

}

Source File: ZipTests.scala From coursier with Apache License 2.0

5 votes

package coursier.cli.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Random
import java.util.zip.{Deflater, ZipEntry, ZipInputStream, ZipOutputStream}

import coursier.launcher.internal.Zip
import org.junit.runner.RunWith
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatestplus.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class ZipTests extends AnyFlatSpec {

  "zipEntries" should "be fine with custom deflaters" in {

    // Inspired by https://github.com/spring-projects/spring-boot/commit/a50646b7cc3ad941e748dfb450077e3a73706205#diff-2297c301250b25e3b80301c58daf3ea0R621

    val baos = new ByteArrayOutputStream
    val output = new ZipOutputStream(baos) {
      `def` = new Deflater(Deflater.NO_COMPRESSION, true)
    }
    val data = Array.ofDim[Byte](1024 * 1024)
    new Random().nextBytes(data)
    val entry = new ZipEntry("entry.dat")
    output.putNextEntry(entry)
    output.write(data)
    output.closeEntry()
    output.close()

    val result = baos.toByteArray

    val zos = new ZipOutputStream(new ByteArrayOutputStream)
    val entryNames = Zip.zipEntries(new ZipInputStream(new ByteArrayInputStream(result)))
      .map {
        case (ent, content) =>
          println(ent.getCompressedSize)
          val name = ent.getName
          zos.putNextEntry(ent)
          zos.write(content)
          zos.closeEntry()
          name
      }
      .toVector
    zos.close()
    assert(entryNames == Vector("entry.dat"))
  }

}

Source File: VectorGrid.scala From osmesa with Apache License 2.0

5 votes

package osmesa.analytics

import java.io.ByteArrayInputStream
import java.net.URI
import java.util.zip.GZIPInputStream

import geotrellis.proj4.WebMercator
import geotrellis.spark.tiling.ZoomedLayoutScheme
import geotrellis.vector.{Extent, PointFeature}
import geotrellis.vectortile.{Layer, VInt64, VectorTile}
import org.apache.commons.io.IOUtils
import org.apache.spark.internal.Logging
import osmesa.analytics.updater.Implicits._
import osmesa.analytics.updater._

import scala.collection.GenMap
import scala.collection.parallel.TaskSupport

trait VectorGrid extends Logging {
  // Default base zoom (highest resolution tiles produced)
  val DefaultBaseZoom: Int = 10

  // Number of cells per side in a gridded tile
  implicit val Cells: Int = 128

  // Number of cells in a gridded tile at the base of the pyramid (may be used for over-zooming)
  val BaseCells: Int = Cells

  // Default upload concurrency
  val DefaultUploadConcurrency: Int = 8

  implicit val LayoutScheme: ZoomedLayoutScheme = ZoomedLayoutScheme(WebMercator)
  val SequenceLayerName: String = "__sequences__"

  def getCommittedSequences(tile: VectorTile): Set[Int] =
    // NOTE when working with hashtags, this should be the changeset sequence, since changes from a
    // single sequence may appear in different batches depending on when changeset metadata arrives
    tile.layers
      .get(SequenceLayerName)
      .map(_.features.flatMap(f => f.data.values.map(valueToLong).map(_.intValue)))
      .map(_.toSet)
      .getOrElse(Set.empty)

  def makeSequenceLayer(sequences: Set[Int], extent: Extent, tileWidth: Int = 4096): (String, Layer) = {
    // create a second layer w/ a feature corresponding to committed sequences (in the absence of
    // available tile / layer metadata)
    val updatedSequences =
      sequences.toSeq.sorted
        .takeRight(1000)
        .zipWithIndex
        .map {
          case (seq, idx) =>
            idx.toString -> VInt64(seq)
        }
        .toMap

    val sequenceFeature = PointFeature(extent.center, updatedSequences)

    makeLayer(SequenceLayerName, extent, Seq(sequenceFeature), tileWidth)
  }

  def loadMVTs(urls: Map[URI, Extent])(
      implicit taskSupport: TaskSupport): GenMap[URI, VectorTile] = {
    // convert to a parallel collection to load more tiles concurrently
    val parUrls = urls.par
    parUrls.tasksupport = taskSupport

    parUrls.map {
      case (uri, extent) =>
        (uri,
         read(uri).map(
           bytes =>
             VectorTile.fromBytes(
               IOUtils.toByteArray(new GZIPInputStream(new ByteArrayInputStream(bytes))),
               extent)))
    } filter {
      case (_, mvt) => mvt.isDefined
    } map {
      case (uri, mvt) => uri -> mvt.get
    }
  }
}

Source File: CodecSpec.scala From hail with MIT License

5 votes

package is.hail.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream}

import is.hail.annotations.{Region, RegionValue}
import is.hail.asm4s.{Code, TypeInfo, Value}
import is.hail.expr.ir.{EmitClassBuilder, EmitFunctionBuilder, ExecuteContext, typeToTypeInfo}
import is.hail.types.encoded.EType
import is.hail.types.physical.PType
import is.hail.types.virtual.Type
import is.hail.rvd.RVDContext
import is.hail.sparkextras.ContextRDD
import is.hail.utils.using
import org.apache.spark.rdd.RDD

trait AbstractTypedCodecSpec extends Spec {
  def encodedType: EType
  def encodedVirtualType: Type

  type StagedEncoderF[T] = (Value[Region], Value[T], Value[OutputBuffer]) => Code[Unit]
  type StagedDecoderF[T] = (Value[Region], Value[InputBuffer]) => Code[T]

  def buildEncoder(ctx: ExecuteContext, t: PType): (OutputStream) => Encoder

  def decodedPType(requestedType: Type): PType

  def buildDecoder(ctx: ExecuteContext, requestedType: Type): (PType, (InputStream) => Decoder)

  def encode(ctx: ExecuteContext, t: PType, offset: Long): Array[Byte] = {
    val baos = new ByteArrayOutputStream()
    using(buildEncoder(ctx, t)(baos))(_.writeRegionValue(offset))
    baos.toByteArray
  }

  def decode(ctx: ExecuteContext, requestedType: Type, bytes: Array[Byte], region: Region): (PType, Long) = {
    val bais = new ByteArrayInputStream(bytes)
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, dec(bais).readRegionValue(region))
  }

  def buildCodeInputBuffer(is: Code[InputStream]): Code[InputBuffer]

  def buildCodeOutputBuffer(os: Code[OutputStream]): Code[OutputBuffer]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_]): (PType, StagedDecoderF[T])

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_]): StagedEncoderF[T]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_], ti: TypeInfo[T]): (PType, StagedDecoderF[T]) = {
    val (ptype, dec) = buildEmitDecoderF[T](requestedType, cb)
    assert(ti == typeToTypeInfo(requestedType))
    ptype -> dec
  }

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_], ti: TypeInfo[T]): StagedEncoderF[T] = {
    assert(ti == typeToTypeInfo(t))
    buildEmitEncoderF[T](t, cb)
  }

  // FIXME: is there a better place for this to live?
  def decodeRDD(ctx: ExecuteContext, requestedType: Type, bytes: RDD[Array[Byte]]): (PType, ContextRDD[Long]) = {
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, ContextRDD.weaken(bytes).cmapPartitions { (ctx, it) =>
      RegionValue.fromBytes(dec, ctx.region, it)
    })
  }

  override def toString: String = super[Spec].toString
}

Source File: StringTests.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.util.string.test

import java.io.{ByteArrayInputStream, InputStream}

import org.scalatest.{FunSpec, Matchers}
import cmwell.util.string._

class StringTests extends FunSpec with Matchers {
  private def mkString(is: InputStream) = {
    val buffSrc = scala.io.Source.fromInputStream(is)
    val res = buffSrc.mkString
    buffSrc.close()
    res
  }

  describe("mapInputStreamLines should") {
    it("return empty for empty input") {
      val input = new ByteArrayInputStream(Array.emptyByteArray)
      val result = mapInputStreamLines(input)(identity)
      result.read() should be(-1)
      input.close()
      result.close()
    }
    it("provide the delimiter as well") {
      val delim = '\n'
      val s = "provide the\ndelimiter as well"
      val expectedAmount = s.count(delim.==)

      val input = stringToInputStream(s)
      val result = mapInputStreamLines(input)(_.toUpperCase)
      mkString(result).count(delim.==) should be(expectedAmount)
      input.close()
      result.close()
    }
    it("not end with the delimiter") {
      val input = stringToInputStream("not end with\nthe delimiter")
      val result = mapInputStreamLines(input)(_.toUpperCase)
      mkString(result).last should be('R')
      input.close()
      result.close()
    }
    it("handle a concat mapper") {
      val input = stringToInputStream("handle\na\nconcat\nmapper")
      val result = mapInputStreamLines(input)(_ + " not")
      mkString(result) should be("handle not\na not\nconcat not\nmapper not")
      input.close()
      result.close()
    }
  }

}

Source File: S3KVPersisted.scala From fotm-info with MIT License

5 votes

package info.fotm.util

import java.io.ByteArrayInputStream

import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.{GetObjectRequest, ObjectListing, ObjectMetadata, S3ObjectInputStream}
import com.amazonaws.util.IOUtils
import com.twitter.bijection.Bijection

import scala.collection.JavaConverters._
import scala.collection.breakOut
import scala.util.Try

class S3KVPersisted[K, V](bucket: String, keyPathBijection: Bijection[K, String])
                         (implicit valueSerializer: Bijection[V, Array[Byte]])
  extends Persisted[Map[K, V]] {

  val s3client = new AmazonS3Client()

  override def save(state: Map[K, V]): Try[Unit] = Try {
    for ((k, v) <- state) {
      val path: String = keyPathBijection(k)
      val bytes = valueSerializer(v)
      val stream = new ByteArrayInputStream(bytes)
      val meta = new ObjectMetadata()
      meta.setContentLength(bytes.length)
      s3client.putObject(bucket, path, stream, meta)
    }
  }

  override def fetch(): Try[Map[K, V]] = Try {
    val listing: ObjectListing = s3client.listObjects(bucket)
    val bucketEntries = listing.getObjectSummaries.asScala.toList
    val s3keys = bucketEntries.map(_.getKey)

    val result: Map[K, V] = (
      for (s3key <- s3keys) yield {
        println(s"Loading $s3key...")
        val request = new GetObjectRequest(bucket, s3key)
        val s3object = s3client.getObject(request)
        val objectData: S3ObjectInputStream = s3object.getObjectContent
        val bytes = IOUtils.toByteArray(objectData)
        objectData.close()
        println(s"Loaded $s3key! Deserializing...")
        val k = keyPathBijection.inverse(s3key)
        val v = valueSerializer.inverse(bytes)
        println(s"Done with $s3key.")
        (k, v)
      })(breakOut)

    result
  }
}

Source File: SubEntryTest.scala From lila-openingexplorer with GNU Affero General Public License v3.0

5 votes

package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import org.specs2.mutable._

import chess.{ Color, Pos }
import chess.format.Uci

class SubEntryTest extends Specification {

  private def pipe(entry: SubEntry): SubEntry = {
    val out = new ByteArrayOutputStream()
    entry.write(out)

    val in = new ByteArrayInputStream(out.toByteArray)
    SubEntry.read(in)
  }

  "master database packer" should {

    "pack a single game" in {
      val ref   = GameRef("ref00000", Some(Color.White), SpeedGroup.Blitz, 1230)
      val entry = SubEntry.fromGameRef(ref, Left(Uci.Move(Pos.E2, Pos.E4)))

      pipe(entry).gameRefs mustEqual List(ref)
    }

    "pack two games" in {
      val move  = Left(Uci.Move(Pos.D2, Pos.D4))
      val g1    = GameRef("g0000001", Some(Color.Black), SpeedGroup.Classical, 2300)
      val g2    = GameRef("g0000002", None, SpeedGroup.Classical, 2455)
      val entry = SubEntry.fromGameRef(g1, move).withGameRef(g2, move)

      pipe(entry).gameRefs mustEqual List(g2, g1)
    }
  }
}

Source File: PackHelperTest.scala From lila-openingexplorer with GNU Affero General Public License v3.0

5 votes

package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import org.specs2.mutable._
import chess.format.Uci
import chess.Pos
import chess.{ King, Rook }

class PackHelperTest extends Specification with PackHelper {

  def pipeMove(move: Either[Uci.Move, Uci.Drop]): Either[Uci.Move, Uci.Drop] = {
    val out = new ByteArrayOutputStream()
    writeUci(out, move)

    val in = new ByteArrayInputStream(out.toByteArray)
    readUci(in)
  }

  "the pack helper" should {
    "correctly pack moves" in {
      val move = Uci.Move(Pos.E2, Pos.E3)
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack promotions" in {
      val move = Uci.Move(Pos.A7, Pos.A8, Some(Rook))
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack drops" in {
      val drop = Uci.Drop(King, Pos.H3)
      pipeMove(Right(drop)) mustEqual Right(drop)
    }
  }

  List(7, 127, 128, 129, 254, 255, 256, 257, 1234, 864197252500L).foreach { x =>
    "correctly pack uint: " + x in {
      val out = new ByteArrayOutputStream()
      writeUint(out, x)

      val in = new ByteArrayInputStream(out.toByteArray)
      readUint(in) mustEqual x
    }
  }
}

Source File: Json4sSerialization.scala From kafka-serialization with Apache License 2.0

5 votes

package com.ovoenergy.kafka.serialization.json4s

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.ovoenergy.kafka.serialization.core._
import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import org.json4s.Formats
import org.json4s.native.Serialization.{read, write}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._

trait Json4sSerialization {

  def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO Use scala-arm
    try {
      write(data, writer)
      writer.flush()
    } finally {
      writer.close()
    }
    bout.toByteArray
  }

  def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) =>
    val tt = implicitly[TypeTag[T]]
    implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe))
    read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8))
  }

}

Source File: GenericAvroSerializerSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: MqttConfig.scala From akka-iot-mqtt-v2 with GNU Lesser General Public License v3.0

5 votes

package akkaiot

import scala.concurrent.duration._

import java.io.Serializable
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.ObjectInputStream
import java.io.ObjectOutputStream

import com.sandinh.paho.akka._
import com.sandinh.paho.akka.MqttPubSub._

object MqttConfig {
  val topic = "akka-iot-mqtt-topic"

  // Pub-Sub config
  val psConfig = PSConfig(
    brokerUrl = "tcp://test.mosquitto.org:1883",
    userName = null,
    password = null,
    stashTimeToLive = 1.minute,
    stashCapacity = 8000,
    reconnectDelayMin = 10.millis,
    reconnectDelayMax = 30.seconds,
    cleanSession = false
  )

  // Serialize object to byte array
  def writeToByteArray(obj: Any): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    val oos = new ObjectOutputStream(baos)
    try {
      oos.writeObject(obj)
      baos.toByteArray
    } finally {
      try {
        oos.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }

  // Deserialize object from byte array
  def readFromByteArray[A](bytes: Array[Byte]): A = {
    val bais = new ByteArrayInputStream(bytes)
    val ois = new ObjectInputStream(bais)
    try {
      val obj = ois.readObject
      obj.asInstanceOf[A]
    } finally {
      try {
        ois.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }
}

Source File: Sedes.scala From shc with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.hbase

import java.io.ByteArrayInputStream

import org.apache.avro.Schema
import org.apache.avro.Schema.Type._
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io._
import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.types._

trait Sedes {
  def serialize(value: Any): Array[Byte]
  def deserialize(bytes: Array[Byte], start: Int, end: Int): Any
}

class DoubleSedes extends Sedes {
  override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double])
  override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = {
    Bytes.toLong(bytes, start)
  }
}

Source File: package.scala From pulsar4s with Apache License 2.0

5 votes

package com.sksamuel.pulsar4s

import java.io.ByteArrayOutputStream
import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import com.sksamuel.avro4s.AvroSchema
import com.sksamuel.avro4s.AvroInputStream
import com.sksamuel.avro4s.AvroOutputStream
import com.sksamuel.avro4s.Decoder
import com.sksamuel.avro4s.Encoder
import com.sksamuel.avro4s.SchemaFor
import org.apache.pulsar.client.api.Schema
import org.apache.pulsar.common.schema.{SchemaInfo, SchemaType}

import scala.annotation.implicitNotFound


package object avro {

  @implicitNotFound("No Avro Schema for type ${T} found.")
  implicit def avroSchema[T: Manifest: SchemaFor: Encoder: Decoder]: Schema[T] = new Schema[T] {

    val schema: org.apache.avro.Schema = AvroSchema[T]

    override def clone(): Schema[T] = this

    override def encode(t: T): Array[Byte] = {
      val baos = new ByteArrayOutputStream
      val aos = AvroOutputStream.binary[T].to(baos).build(schema)
      aos.write(t)
      aos.flush()
      aos.close()
      baos.toByteArray()
    }

    override def decode(bytes: Array[Byte]): T = {
      val bais = new ByteArrayInputStream(bytes)
      val ais = AvroInputStream.binary[T].from(bais).build(schema)
      val first = ais.iterator.next()
      ais.close()
      first
    }

    override def getSchemaInfo: SchemaInfo =
      new SchemaInfo()
        .setName(manifest[T].runtimeClass.getCanonicalName)
        .setType(SchemaType.AVRO)
        .setSchema(schema.toString.getBytes(StandardCharsets.UTF_8))
  }
}

Source File: Logger.scala From c4proto with Apache License 2.0

5 votes

package ee.cone.c4actor_logback_impl

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Path, Paths}
import java.nio.charset.StandardCharsets.UTF_8

import ch.qos.logback.classic.LoggerContext
import ch.qos.logback.classic.joran.JoranConfigurator
import com.typesafe.scalalogging.LazyLogging
import ee.cone.c4actor._
import ee.cone.c4di.c4
import org.slf4j.LoggerFactory

import scala.annotation.tailrec

@c4("BasicLoggingApp") final class LoggerTest extends Executable with Early with LazyLogging {
  def run(): Unit = if(Option(System.getenv("C4LOGBACK_TEST")).nonEmpty) iteration(0L)
  @tailrec private def iteration(v: Long): Unit = {
    Thread.sleep(1000)
    logger.warn(s"logger test $v")
    logger.debug(s"logger test $v")
    iteration(v+1L)
  }
}

@c4("BasicLoggingApp") final class DefLoggerConfigurator(
  config: ListConfig,
  catchNonFatal: CatchNonFatal
) extends LoggerConfigurator(
  config.get("C4LOGBACK_XML").map(Paths.get(_)) ::: Paths.get("/tmp/logback.xml") :: Nil,
  catchNonFatal,
  5000
) with Executable with Early

class LoggerConfigurator(paths: List[Path], catchNonFatal: CatchNonFatal, scanPeriod: Long) extends Executable {
  def run(): Unit = iteration("")
  @tailrec private def iteration(wasContent: String): Unit = {
    val content =
      s"""
      <configuration>
        <statusListener class="ch.qos.logback.core.status.NopStatusListener" />
        ${paths.map(path=>if(Files.exists (path)) new String(Files.readAllBytes(path), UTF_8) else "").mkString}
        <appender name="CON" class="ch.qos.logback.core.ConsoleAppender">
          <encoder><pattern>%d{HH:mm:ss.SSS} %-5level %logger{36} - %msg%n</pattern></encoder>
        </appender>
        <appender name="ASYNСCON" class="ch.qos.logback.classic.AsyncAppender">
          <discardingThreshold>0</discardingThreshold>
          <queueSize>1000000</queueSize>
          <appender-ref ref="CON" />
        </appender>
        <root level="INFO">
          <appender-ref ref="ASYNСCON" />
        </root>
        <shutdownHook/>
      </configuration>
      """
    if(wasContent != content) reconfigure(content)
    Thread.sleep(scanPeriod)
    iteration(content)
  }
  def reconfigure(content: String): Unit = catchNonFatal{
    println("logback reconfigure 2 started")
    val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext]
    val configurator = new JoranConfigurator()
    configurator.setContext(context)
    context.reset()
    configurator.doConfigure(new ByteArrayInputStream(content.getBytes(UTF_8)))
    println("logback reconfigure 2 ok")
  }("reconfigure"){ e => () }
}

Source File: GenericAvroSerializerSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: SerializationTestHelper.scala From xmlconfect with Apache License 2.0

5 votes

package com.mthaler.xmlconfect

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream }

object SerializationTestHelper {

  
  def serializeDeserialize[T](obj: T): T = {
    val bout = new ByteArrayOutputStream()
    val out = new ObjectOutputStream(bout)
    out.writeObject(obj)
    val bin = new ByteArrayInputStream(bout.toByteArray)
    val in = new ObjectInputStream(bin)
    in.readObject().asInstanceOf[T]
  }
}

Source File: ToCurlConverterTest.scala From sttp with Apache License 2.0

5 votes

package sttp.client

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ToCurlConverterTest extends AnyFlatSpec with Matchers with ToCurlConverterTestExtension {
  private val localhost = uri"http://localhost"

  it should "convert base request" in {
    basicRequest
      .get(uri"$localhost")
      .toCurl shouldBe """curl -L --max-redirs 32 -X GET 'http://localhost'"""
  }

  it should "convert request with method to curl" in {
    basicRequest.get(localhost).toCurl should include("-X GET")
    basicRequest.post(localhost).toCurl should include("-X POST")
    basicRequest.put(localhost).toCurl should include("-X PUT")
    basicRequest.delete(localhost).toCurl should include("-X DELETE")
    basicRequest.patch(localhost).toCurl should include("-X PATCH")
    basicRequest.head(localhost).toCurl should include("-X HEAD")
    basicRequest.options(localhost).toCurl should include("-X OPTIONS")
  }

  it should "convert request with header" in {
    basicRequest.header("User-Agent", "myapp").get(localhost).toCurl should include(
      """-H 'User-Agent: myapp'"""
    )
  }

  it should "convert request with body" in {
    basicRequest.body(Map("name" -> "john", "org" -> "sml")).post(localhost).toCurl should include(
      """-H 'Content-Type: application/x-www-form-urlencoded' -H 'Content-Length: 17' -F 'name=john&org=sml'"""
    )
    basicRequest.body("name=john").post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 9' --data 'name=john'"""
    )
    basicRequest.body("name=john", StandardCharsets.ISO_8859_1.name()).post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=ISO-8859-1' -H 'Content-Length: 9' --data 'name=john'"""
    )
    basicRequest.body("name='john'").post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 11' --data 'name=\'john\''"""
    )
    basicRequest.body("name=\"john\"").post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 11' --data 'name="john"'"""
    )
  }

  it should "convert request with options" in {
    basicRequest.followRedirects(false).get(localhost).toCurl should not include "-L"
    basicRequest.maxRedirects(11).get(localhost).toCurl should include("--max-redirs 11")
  }

  it should "put placeholder when sending binary data" in {
    val testBodyBytes = "this is the body".getBytes("UTF-8")

    val curl = basicRequest
      .post(localhost)
      .body(new ByteArrayInputStream(testBodyBytes))
      .toCurl
    curl should include("--data-binary <PLACEHOLDER>")
  }

  it should "render multipart form data if content is a plain string" in {
    basicRequest.multipartBody(multipart("k1", "v1"), multipart("k2", "v2")).post(localhost).toCurl should include(
      """--form 'k1=v1' --form 'k2=v2'"""
    )
  }
}

Source File: RetryWhenDefaultTest.scala From sttp with Apache License 2.0

5 votes

package sttp.client

import java.io.ByteArrayInputStream

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
import sttp.client
import sttp.model.StatusCode

class RetryWhenDefaultTest extends AnyFlatSpec with Matchers {
  private val simpleRequest = basicRequest.get(uri"http://localhost")

  it should "not retry 200 response" in {
    RetryWhen.Default(simpleRequest, Right(Response.ok(""))) shouldBe false
  }

  it should "retry 500 response" in {
    RetryWhen.Default(simpleRequest, Right(Response("", StatusCode.InternalServerError))) shouldBe true
  }

  it should "retry connection exceptions" in {
    RetryWhen.Default(simpleRequest, Left(new client.SttpClientException.ConnectException(null))) shouldBe true
  }

  it should "not retry read exceptions" in {
    RetryWhen.Default(simpleRequest, Left(new client.SttpClientException.ReadException(null))) shouldBe false
  }

  it should "not retry input stream bodies" in {
    RetryWhen.Default(
      simpleRequest.body(new ByteArrayInputStream(new Array[Byte](8))),
      Right(Response("", StatusCode.InternalServerError))
    ) shouldBe false
  }
}

Source File: BackupWriter.scala From recogito2 with Apache License 2.0

5 votes

package controllers.document

import controllers.HasConfig
import java.io.{File, FileInputStream, FileOutputStream, BufferedInputStream, ByteArrayInputStream, InputStream, PrintWriter}
import java.nio.file.Paths
import java.math.BigInteger
import java.security.{MessageDigest, DigestInputStream}
import java.util.UUID
import java.util.zip.{ZipEntry, ZipOutputStream}
import services.HasDate
import services.annotation.{Annotation, AnnotationService}
import services.document.{ExtendedDocumentMetadata, DocumentToJSON}
import services.generated.tables.records.{DocumentRecord, DocumentFilepartRecord}
import play.api.libs.json.Json
import play.api.libs.Files.TemporaryFileCreator
import scala.concurrent.{ExecutionContext, Future}
import storage.TempDir
import storage.uploads.Uploads

trait BackupWriter extends HasBackupValidation { self: HasConfig =>
  
  // Frontend annotation format
  import services.annotation.FrontendAnnotation._
  
  private val BUFFER_SIZE = 2048
  
  private def writeToZip(inputStream: InputStream, filename: String, zip: ZipOutputStream) = {
    zip.putNextEntry(new ZipEntry(filename))
     
    val md = MessageDigest.getInstance(ALGORITHM)    
    val in = new DigestInputStream(new BufferedInputStream(inputStream), md)

    var data= new Array[Byte](BUFFER_SIZE)
    var count: Int = 0

    while ({ count = in.read(data, 0, BUFFER_SIZE); count } > -1) {
      zip.write(data, 0, count)
    }

    in.close()
    zip.closeEntry()
    
    new BigInteger(1, md.digest()).toString(16)
  }
  
  def createBackup(doc: ExtendedDocumentMetadata)(implicit ctx: ExecutionContext, uploads: Uploads, 
      annotations: AnnotationService, tmpFile: TemporaryFileCreator): Future[File] = {
    
    def getFileAsStream(owner: String, documentId: String, filename: String) = {
      val dir = uploads.getDocumentDir(owner, documentId).get // Fail hard if the dir doesn't exist
      new FileInputStream(new File(dir, filename))
    }
    
    def getManifestAsStream() = {
      val manifest = "Recogito-Version: 2.0.1-alpha"
      new ByteArrayInputStream(manifest.getBytes)
    }
    
    def getMetadataAsStream(doc: ExtendedDocumentMetadata) = {
      
      // DocumentRecord JSON serialization
      import services.document.DocumentToJSON._
      
      val json = Json.prettyPrint(Json.toJson((doc.document, doc.fileparts)))
      new ByteArrayInputStream(json.getBytes)
    }
    
    def getAnnotationsAsStream(docId: String, annotations: Seq[Annotation], parts: Seq[DocumentFilepartRecord]): InputStream = {
      val path = Paths.get(TempDir.get()(self.config), s"${docId}_annotations.json")
      val tmp = tmpFile.create(path)
      val writer = new PrintWriter(path.toFile)
      annotations.foreach(a => writer.println(Json.stringify(Json.toJson(a))))
      writer.close()
      new FileInputStream(path.toFile)
    }
    
    Future {
      tmpFile.create(Paths.get(TempDir.get()(self.config), s"${doc.id}.zip"))
    } flatMap { zipFile =>
      val zipStream = new ZipOutputStream(new FileOutputStream(zipFile.path.toFile))

      writeToZip(getManifestAsStream(), "manifest", zipStream)
      val metadataHash = writeToZip(getMetadataAsStream(doc), "metadata.json", zipStream)

      val fileHashes = doc.fileparts.map { part =>
        writeToZip(getFileAsStream(doc.ownerName, doc.id, part.getFile), "parts" + File.separator + part.getFile, zipStream)
      }

      annotations.findByDocId(doc.id).map { annotations =>
        val annotationsHash = writeToZip(getAnnotationsAsStream(doc.id, annotations.map(_._1), doc.fileparts), "annotations.jsonl", zipStream)
        
        val signature = computeSignature(metadataHash, fileHashes, annotationsHash)
        writeToZip(new ByteArrayInputStream(signature.getBytes), "signature", zipStream)
        
        zipStream.close()
        zipFile.path.toFile
      }
    }
  }
  
}

Source File: DefineMacroCmd.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.op.cmd

import java.io.{ObjectInputStream, ByteArrayInputStream, ObjectOutputStream, ByteArrayOutputStream}
import dbis.piglet.plan.DataflowPlan
import scala.collection.mutable.ListBuffer
import dbis.piglet.op.{Pipe,PigOperator}


case class DefineMacroCmd(
    out: Pipe, 
    macroName: String, 
    params: Option[List[String]], 
    stmts: List[PigOperator]
  ) extends PigOperator(out) {

  var subPlan: Option[DataflowPlan] = None
  var inPipes = List[Pipe]()

  def deepClone(): DefineMacroCmd = {
      val baos = new ByteArrayOutputStream()
      val oos = new ObjectOutputStream(baos)
      oos.writeObject(this)
      val bais = new ByteArrayInputStream(baos.toByteArray())
      val ois = new ObjectInputStream(bais)
      ois.readObject().asInstanceOf[DefineMacroCmd]
  }

  override def preparePlan: Unit = {
    
  def pipeParamPositions(): List[Int] = {
    val l = ListBuffer[Int]()
    inPipes.foreach(i => {
      val pos = params.get.indexOf(i.name.substring(1))
      if (pos >= 0) l += pos
    })
    l.toList
  }
}

Source File: ManifestUploader.scala From teamcity-s3-plugin with Apache License 2.0

5 votes

package com.gu.teamcity

import java.io.ByteArrayInputStream
import java.util.Date

import jetbrains.buildServer.messages.{BuildMessage1, DefaultMessagesInfo, Status}
import jetbrains.buildServer.serverSide.{BuildServerAdapter, SRunningBuild}
import org.joda.time.{DateTime, DateTimeZone}
import org.json4s.JsonAST.JObject
import org.json4s.JsonDSL._
import org.json4s.native.JsonMethods._

import scala.util.{Failure, Success}

class ManifestUploader(config: S3ConfigManager, s3: S3) extends BuildServerAdapter {

  override def beforeBuildFinish(runningBuild: SRunningBuild) {
    import scala.collection.convert.wrapAsScala._

    if (!runningBuild.isHasInternalArtifactsOnly) {
      val properties = Seq(
        "projectName" -> S3Plugin.cleanFullName(runningBuild),
        "buildNumber" -> runningBuild.getBuildNumber,
        "startTime" -> new DateTime(runningBuild.getStartDate).withZone(DateTimeZone.UTC).toString //Joda default is ISO8601
      ) ++ runningBuild.getRevisions.flatMap(revision => Seq(
        "revision" -> revision.getRevision,
        "vcsURL" -> revision.getRoot.getProperties.get("url")
      )) ++ Option(runningBuild.getBranch).map(b =>
        "branch" -> b.getDisplayName
      ).orElse(runningBuild.getVcsRootEntries.headOption.map(r =>
        "branch" -> r.getProperties.get("branch")
      ))

      val propertiesJSON = pretty(render(properties.foldLeft(JObject())(_ ~ _)))
      val jsBytes = propertiesJSON.getBytes("UTF-8")

      config.buildManifestBucket.map { bucket =>
        s3.upload(bucket, runningBuild, "build.json", new ByteArrayInputStream(jsBytes), jsBytes.length) match {			
          case Failure(e) => runningBuild.getBuildLog().message(s"Error uploading manifest: ${e.getMessage}",
              Status.ERROR,new Date,DefaultMessagesInfo.MSG_BUILD_FAILURE,DefaultMessagesInfo.SOURCE_ID,null)
          case Success(_) => runningBuild.getBuildLog().message("Manifest S3 upload complete",
              Status.NORMAL,new Date,DefaultMessagesInfo.MSG_TEXT,DefaultMessagesInfo.SOURCE_ID,null) 
        }
      }
    }
  }

  private def normalMessage(text: String) =
    new BuildMessage1(DefaultMessagesInfo.SOURCE_ID, DefaultMessagesInfo.MSG_TEXT, Status.NORMAL, new Date, text)
}

Source File: ModelStateSerde.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0

5 votes

package com.lightbend.scala.kafkastreams.store.store

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.util

import com.lightbend.model.modeldescriptor.ModelDescriptor
import com.lightbend.scala.modelServer.model.PMML.PMMLModel
import com.lightbend.scala.modelServer.model.tensorflow.TensorFlowModel
import com.lightbend.scala.modelServer.model.{ModelToServeStats, ModelWithDescriptor}
import com.lightbend.scala.kafkastreams.store.StoreState
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}


class ModelStateSerde extends Serde[StoreState] {

  private val mserializer = new ModelStateSerializer()
  private val mdeserializer = new ModelStateDeserializer()

  override def deserializer() = mdeserializer

  override def serializer() = mserializer

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}

  override def close() = {}
}

object ModelStateDeserializer {
  val factories = Map(
    ModelDescriptor.ModelType.PMML.index -> PMMLModel,
    ModelDescriptor.ModelType.TENSORFLOW.index -> TensorFlowModel
  )
}

class ModelStateDeserializer extends Deserializer[StoreState] {

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}

  override def deserialize(topic: String, data: Array[Byte]): StoreState = {
    if(data != null) {
      val input = new DataInputStream(new ByteArrayInputStream(data))
      new StoreState(ModelWithDescriptor.readModel(input), ModelWithDescriptor.readModel(input),
        ModelToServeStats.readServingInfo(input), ModelToServeStats.readServingInfo(input))
    }
    else new StoreState()
  }

  override def close(): Unit = {}

}

class ModelStateSerializer extends Serializer[StoreState] {

  private val bos = new ByteArrayOutputStream()

  override def serialize(topic: String, state: StoreState): Array[Byte] = {
    bos.reset()
    val output = new DataOutputStream(bos)
    ModelWithDescriptor.writeModel(output, state.currentModel.orNull)
    ModelWithDescriptor.writeModel(output, state.newModel.orNull)
    ModelToServeStats.writeServingInfo(output, state.currentState.orNull)
    ModelToServeStats.writeServingInfo(output, state.newState.orNull)
    try {
      output.flush()
      output.close()
    } catch {
      case t: Throwable =>
    }
    bos.toByteArray
  }

  override def close(): Unit = {}

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}
}

Source File: StringParserTest.scala From MoVE with Mozilla Public License 2.0

5 votes

package de.thm.move.loader.parser

import java.io.{ByteArrayInputStream, InputStreamReader}
import java.nio.charset.StandardCharsets

import scala.util._
import de.thm.move.MoveSpec
import de.thm.move.loader.parser.PropertyParser._
import de.thm.move.loader.parser.ast._

class StringParserTest extends MoveSpec {
  val parser = new ModelicaParser()
  def parseString(str:String): String = {
    parser.stringLiteral(str)
  }

  "The parser for Modelica strings" should "parse simple strings" in {
    val s = "this is a super awesome test"
    true shouldBe true
  }

  "PropertyParser#transformEscapeChars" should
    "transform literal escape characters to ansi escape characters" in {
      val s = "this\\t\\tis a\\n test\\rmöb\\b"
      parser.transformEscapeChars(s) shouldBe "this\t\tis a\n test\rmöb\b"

      val s2 = "\\n\\n\\t"
      parser.transformEscapeChars(s2) shouldBe "\n\n\t"
  }

  it should "return the same string for strings without escape characters" in {
    val s = "this is awesome"
    parser.transformEscapeChars(s) shouldBe s
  }
}

Source File: package.scala From MoVE with Mozilla Public License 2.0

5 votes

package de.thm.move.loader

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import scala.util._
import org.scalatest.Matchers._
import de.thm.move.loader.parser.ast._

package object parser {
  private  val parser:ModelicaParserLike = new ModelicaParser
  def parse(str:String): Try[List[Model]] =
    parser.parse(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)))

  val withParseSuccess: String => Model = parse(_) match {
    case Success(elem) => elem.head
    case Failure(exc) => throw exc
  }

  val withException: String => Unit = parse(_) match {
    case Success(_) => throw new IllegalStateException("Expected failure")
    case Failure(_) => //yeay
  }

  def iconEqual(icon1:Model, icon2:Model): Unit = {
    icon2.name shouldBe icon1.name
    (icon1.annot, icon2.annot) match {
      case (Icon(system1, shapes1, _,_),Icon(system2,shapes2,_,_)) =>
        system2 shouldBe system1
        shapes2 shouldBe shapes1
      case _ => throw new AssertionError(s"Given icon1 and icon2 aren't both Icons!")
    }
  }

  def annotationModel(modelname:String, content:String): String =
    s"""
       |model $modelname
       | annotation(
       |  $content
       | );
       |end $modelname;
     """.stripMargin

  def graphicModel(modelname:String, content:String):String = {
    annotationModel(modelname,
    s"""
       |Icon( graphics = {
       |$content
       |})
     """.stripMargin
    )
  }
}

Source File: ModelSerializationTestHelper.scala From aloha with MIT License

5 votes

package com.eharmony.aloha

import java.io.{ObjectInputStream, ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}


trait ModelSerializationTestHelper {
  def serializeDeserializeRoundTrip[A <: java.io.Serializable](a: A): A = {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(a)
    val bais = new ByteArrayInputStream(baos.toByteArray)
    val ois = new ObjectInputStream(bais)
    val out = ois.readObject()
    out.asInstanceOf[A]
  }
}

Source File: CypherParser.scala From ingraph with Eclipse Public License 1.0

5 votes

package ingraph.compiler.cypher2gplan

import java.io.ByteArrayInputStream

import ingraph.compiler.exceptions.CompilerException
import org.apache.log4j.{Level, Logger}
import org.eclipse.emf.common.util.URI
import org.eclipse.emf.ecore.resource.Resource
import org.eclipse.xtext.diagnostics.Severity
import org.eclipse.xtext.resource.{XtextResource, XtextResourceSet}
import org.eclipse.xtext.util.CancelIndicator
import org.eclipse.xtext.validation.CheckMode
import org.slizaa.neo4j.opencypher.OpenCypherStandaloneSetup
import org.slizaa.neo4j.opencypher.openCypher.Cypher

import scala.collection.JavaConverters._


object CypherParser {
  def parseFile(fileName: String): Cypher = {
    Logger.getLogger("org.eclipse.xtext").setLevel(Level.ERROR)

    // https://typefox.io/how-and-why-use-xtext-without-the-ide
    val injector = new OpenCypherStandaloneSetup().createInjectorAndDoEMFRegistration()
    val resourceSet = injector.getInstance(classOf[XtextResourceSet])
    val filePath = "../queries/" + fileName + ".cypher"
    val resource = resourceSet.getResource(URI.createFileURI(filePath), true)
    validateAndThrowError(resource)

    resource.getContents.get(0).asInstanceOf[Cypher]
  }

  def parseString(queryString: String): Cypher = {
    Logger.getLogger("org.eclipse.xtext").setLevel(Level.ERROR)

    // https://wiki.eclipse.org/Xtext/FAQ
    val injector = new OpenCypherStandaloneSetup().createInjectorAndDoEMFRegistration()
    val resourceSet = injector.getInstance(classOf[XtextResourceSet])
    val resource = resourceSet.createResource(URI.createURI("http:/example.cypher"))
    val in = new ByteArrayInputStream(queryString.getBytes())
    resource.load(in, resourceSet.getLoadOptions())
    validateAndThrowError(resource)

    resource.getContents.get(0).asInstanceOf[Cypher]
  }

  def validateAndThrowError(resource: Resource) {
    var seenError = false
    var firstError: String = null
    val validator = resource.asInstanceOf[XtextResource].getResourceServiceProvider.getResourceValidator
    val issues = validator.validate(resource, CheckMode.ALL, CancelIndicator.NullImpl).asScala
    for (issue <- issues) {
      if (issue.getSeverity == Severity.ERROR && !seenError) {
        seenError = true
        firstError = issue.getMessage
      }
      println(issue.getMessage)
    }
    if (seenError) {
      throw new CompilerException(s"Error during cypher parse, the first error was: ${firstError}")
    }
  }
}

Source File: JacksonParserSuite.scala From circe-jackson with Apache License 2.0

5 votes

package io.circe.jackson

import cats.data.Validated
import com.fasterxml.jackson.core.JsonToken
import com.fasterxml.jackson.databind.{ ObjectMapper, ObjectReader }
import io.circe.Json
import io.circe.testing.ParserTests
import java.io.{ ByteArrayInputStream, File }

import scala.io.Source

class JacksonParserSuite extends CirceSuite with JacksonInstances {
  checkAll("Parser", ParserTests(`package`).fromString(arbitraryCleanedJson, shrinkJson))
  checkAll(
    "Parser",
    ParserTests(`package`).fromFunction[Array[Byte]]("fromByteArray")(
      s => s.getBytes("UTF-8"),
      p => p.parseByteArray _,
      p => p.decodeByteArray[Json] _,
      p => p.decodeByteArrayAccumulating[Json] _
    )(arbitraryCleanedJson, shrinkJson)
  )

  "parse and decode(Accumulating)" should "fail on invalid input" in forAll { (s: String) =>
    assert(parse(s"Not JSON $s").isLeft)
    assert(decode[Json](s"Not JSON $s").isLeft)
    assert(decodeAccumulating[Json](s"Not JSON $s").isInvalid)
  }

  "parseFile and decodeFile(Accumulating)" should "parse a JSON file" in {
    val url = getClass.getResource("/io/circe/jackson/examples/glossary.json")
    val file = new File(url.toURI)

    assert(decodeFile[Json](file) === Right(glossary))
    assert(decodeFileAccumulating[Json](file) == Validated.valid(glossary))
    assert(parseFile(file) === Right(glossary))
  }

  "parseByteArray and decodeByteArray(Accumulating)" should "parse an array of elementAsBytes" in {
    val bytes = glossaryAsBytes

    assert(decodeByteArray[Json](bytes) === Right(glossary))
    assert(decodeByteArrayAccumulating[Json](bytes) === Validated.valid(glossary))
    assert(parseByteArray(bytes) === Right(glossary))
  }

  for (elementCount <- 1 to 4) {
    "CirceJsonDeserializer" should s"be useable with Jackson's MappingIterator " +
      s"with ${elementCount} elements in array" in {
      val input = new ByteArrayInputStream(createJsonArrayAsBytes(glossaryAsBytes, elementCount))
      val objectMapper = new ObjectMapper()
      objectMapper.registerModule(CirceJsonModule)
      val jsonParser = objectMapper.getFactory.createParser(input)

      assert(jsonParser.nextToken() == JsonToken.START_ARRAY)
      assert(jsonParser.nextToken() == JsonToken.START_OBJECT)

      val reader = createReader(objectMapper).forType(classOf[Json])
      val iterator = reader.readValues[Json](jsonParser)
      var counter = 0
      while (iterator.hasNext) {
        val glossaryFromIterator = iterator.next()
        assert(glossary == glossaryFromIterator)
        counter = counter + 1
      }
      assert(counter == elementCount)
    }
  }

  // workaround warnings from compiler with Jackson 2.5
  @unchecked
  private def createReader(objectMapper: ObjectMapper): ObjectReader =
    objectMapper.reader()

  private def createJsonArrayAsBytes(elementAsBytes: Array[Byte], elementCount: Int): Array[Byte] = {
    val byteArrayOutput = new java.io.ByteArrayOutputStream()
    byteArrayOutput.write('[')
    for (i <- 1 to elementCount) {
      if (i != 1) {
        byteArrayOutput.write(',')
      }
      byteArrayOutput.write(elementAsBytes)
    }
    byteArrayOutput.write(']')
    byteArrayOutput.toByteArray
  }

  private def glossaryAsBytes = {
    val stream = getClass.getResourceAsStream("/io/circe/jackson/examples/glossary.json")
    val source = Source.fromInputStream(stream)
    val bytes = source.map(_.toByte).toArray
    source.close()
    bytes
  }
}

Source File: Zip.scala From scala-clippy with Apache License 2.0

5 votes

package util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}

object Zip {
  private val BufferSize = 512

  def compress(string: String): Array[Byte] = {
    val os  = new ByteArrayOutputStream(string.length() / 5)
    val gos = new GZIPOutputStream(os)
    gos.write(string.getBytes("UTF-8"))
    gos.close()
    os.close()
    os.toByteArray
  }

  def decompress(compressed: Array[Byte]): String = {
    val is        = new ByteArrayInputStream(compressed)
    val gis       = new GZIPInputStream(is, BufferSize)
    val string    = new StringBuilder()
    val data      = new Array[Byte](BufferSize)
    var bytesRead = gis.read(data)
    while (bytesRead != -1) {
      string.append(new String(data, 0, bytesRead, "UTF-8"))
      bytesRead = gis.read(data)
    }
    gis.close()
    is.close()
    string.toString()
  }
}

Source File: PlayRequestToRawBody.scala From tapir with Apache License 2.0

5 votes

package sttp.tapir.server.play

import java.io.ByteArrayInputStream
import java.nio.charset.Charset

import akka.stream.Materializer
import akka.util.ByteString
import play.api.mvc.{RawBuffer, Request}
import play.core.parsers.Multipart
import sttp.model.Part
import sttp.tapir.{RawBodyType, RawPart}
import sttp.tapir.internal._

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

class PlayRequestToRawBody(serverOptions: PlayServerOptions) {
  def apply[R](bodyType: RawBodyType[R], charset: Option[Charset], request: Request[RawBuffer], body: ByteString)(implicit
      mat: Materializer
  ): Future[R] = {
    bodyType match {
      case RawBodyType.StringBody(defaultCharset) => Future(new String(body.toArray, charset.getOrElse(defaultCharset)))
      case RawBodyType.ByteArrayBody              => Future(body.toArray)
      case RawBodyType.ByteBufferBody             => Future(body.toByteBuffer)
      case RawBodyType.InputStreamBody            => Future(body.toArray).map(new ByteArrayInputStream(_))
      case RawBodyType.FileBody =>
        Future(java.nio.file.Files.write(serverOptions.temporaryFileCreator.create().path, body.toArray))
          .map(p => p.toFile)
      case m: RawBodyType.MultipartBody => multiPartRequestToRawBody(request, m, body)
    }
  }

  private def multiPartRequestToRawBody[R](request: Request[RawBuffer], m: RawBodyType.MultipartBody, body: ByteString)(implicit
      mat: Materializer
  ): Future[Seq[RawPart]] = {
    val bodyParser = serverOptions.playBodyParsers.multipartFormData(
      Multipart.handleFilePartAsTemporaryFile(serverOptions.temporaryFileCreator)
    )
    bodyParser.apply(request).run(body).flatMap {
      case Left(_) =>
        Future.failed(new IllegalArgumentException("Unable to parse multipart form data.")) // TODO
      case Right(value) =>
        val dataParts = value.dataParts.map {
          case (key, value) =>
            apply(
              m.partType(key).get,
              charset(m.partType(key).get),
              request,
              ByteString(value.flatMap(_.getBytes).toArray)
            ).map(body => Part(key, body).asInstanceOf[RawPart])
        }.toSeq

        val fileParts = value.files.map(f => {
          apply(
            m.partType(f.key).get,
            charset(m.partType(f.key).get),
            request,
            ByteString.apply(java.nio.file.Files.readAllBytes(f.ref.path))
          ).map(body =>
            Part(f.key, body, Map(f.key -> f.dispositionType, Part.FileNameDispositionParam -> f.filename), Nil)
              .asInstanceOf[RawPart]
          )
        })
        Future.sequence(dataParts ++ fileParts)
    }
  }
}

Source File: package.scala From tapir with Apache License 2.0

5 votes

package sttp.tapir.server.vertx

import java.io.{ByteArrayInputStream, InputStream}

import io.vertx.core.buffer.Buffer
import io.vertx.scala.core.Vertx

import scala.concurrent.Future

package object encoders {

  private val bufferSize = 1024

  
  private[vertx] def inputStreamToBuffer(is: InputStream, vertx: Vertx): Future[Buffer] = {
    is match {
      case _: ByteArrayInputStream => Future.successful(inputStreamToBufferUnsafe(is))
      case _                       => vertx.executeBlocking(() => inputStreamToBufferUnsafe(is))
    }
  }

  private def inputStreamToBufferUnsafe(is: InputStream): Buffer = {
    val buffer = Buffer.buffer()
    val buf = new Array[Byte](bufferSize)
    while (is.available() > 0) {
      val read = is.read(buf)
      buffer.appendBytes(buf, 0, read)
    }
    buffer
  }

}

Source File: FinatraRequestToRawBody.scala From tapir with Apache License 2.0

5 votes

package sttp.tapir.server.finatra

import java.io.ByteArrayInputStream
import java.nio.ByteBuffer
import java.nio.charset.Charset

import com.twitter.finagle.http.Request
import com.twitter.finatra.http.request.RequestUtils
import com.twitter.io.Buf
import com.twitter.util.Future
import org.apache.commons.fileupload.FileItemHeaders
import sttp.model.{Part, Header}
import sttp.tapir.{RawPart, RawBodyType}

import scala.collection.immutable.Seq
import scala.collection.JavaConverters._

class FinatraRequestToRawBody(serverOptions: FinatraServerOptions) {
  def apply[R](bodyType: RawBodyType[R], body: Buf, charset: Option[Charset], request: Request): Future[R] = {
    def asByteArray: Array[Byte] = {
      val array = new Array[Byte](body.length)
      body.write(array, 0)
      array
    }

    def asByteBuffer: ByteBuffer = {
      val buffer = ByteBuffer.allocate(body.length)
      body.write(buffer)
      buffer.flip()
      buffer
    }

    bodyType match {
      case RawBodyType.StringBody(defaultCharset) => Future.value[R](new String(asByteArray, charset.getOrElse(defaultCharset)))
      case RawBodyType.ByteArrayBody              => Future.value[R](asByteArray)
      case RawBodyType.ByteBufferBody             => Future.value[R](asByteBuffer)
      case RawBodyType.InputStreamBody            => Future.value[R](new ByteArrayInputStream(asByteArray))
      case RawBodyType.FileBody                   => serverOptions.createFile(asByteArray)
      case m: RawBodyType.MultipartBody           => multiPartRequestToRawBody(request, m)
    }
  }

  private def parseDispositionParams(headerValue: Option[String]): Map[String, String] =
    headerValue
      .map(
        _.split(";")
          .map(_.trim)
          .tail
          .map(_.split("="))
          .map(array => array(0) -> array(1))
          .toMap
      )
      .getOrElse(Map.empty)

  private def getCharset(contentType: Option[String]): Option[Charset] =
    contentType.flatMap(
      _.split(";")
        .map(_.trim)
        .tail
        .map(_.split("="))
        .map(array => array(0) -> array(1))
        .toMap
        .get("charset")
        .map(Charset.forName)
    )

  private def multiPartRequestToRawBody(request: Request, m: RawBodyType.MultipartBody): Future[Seq[RawPart]] = {
    def fileItemHeaders(headers: FileItemHeaders): Seq[Header] = {
      headers.getHeaderNames.asScala
        .flatMap { name => headers.getHeaders(name).asScala.map(name -> _) }
        .toSeq
        .filter(_._1.toLowerCase != "content-disposition")
        .map { case (k, v) => Header(k, v) }
        .toList
    }

    Future
      .collect(
        RequestUtils
          .multiParams(request)
          .flatMap {
            case (name, multiPartItem) =>
              val dispositionParams: Map[String, String] =
                parseDispositionParams(Option(multiPartItem.headers.getHeader("content-disposition")))
              val charset = getCharset(multiPartItem.contentType)

              for {
                partType <- m.partType(name)
                futureBody = apply(partType, Buf.ByteArray.Owned(multiPartItem.data), charset, request)
              } yield futureBody
                .map(body =>
                  Part(name, body, otherDispositionParams = dispositionParams - "name", headers = fileItemHeaders(multiPartItem.headers))
                    .asInstanceOf[RawPart]
                )
          }
          .toSeq
      )
      .map(_.toList)
  }
}

Source File: Http4sRequestToRawBody.scala From tapir with Apache License 2.0

5 votes

package sttp.tapir.server.http4s

import java.io.ByteArrayInputStream

import cats.effect.{Blocker, ContextShift, Sync}
import cats.implicits._
import fs2.Chunk
import org.http4s.headers.{`Content-Disposition`, `Content-Type`}
import org.http4s.{Charset, EntityDecoder, Request, multipart}
import sttp.model.{Header, Part}
import sttp.tapir.{RawPart, RawBodyType}

class Http4sRequestToRawBody[F[_]: Sync: ContextShift](serverOptions: Http4sServerOptions[F]) {
  def apply[R](body: fs2.Stream[F, Byte], bodyType: RawBodyType[R], charset: Option[Charset], req: Request[F]): F[R] = {
    def asChunk: F[Chunk[Byte]] = body.compile.to(Chunk)
    def asByteArray: F[Array[Byte]] = body.compile.to(Chunk).map(_.toByteBuffer.array())

    bodyType match {
      case RawBodyType.StringBody(defaultCharset) => asByteArray.map(new String(_, charset.map(_.nioCharset).getOrElse(defaultCharset)))
      case RawBodyType.ByteArrayBody              => asByteArray
      case RawBodyType.ByteBufferBody             => asChunk.map(_.toByteBuffer)
      case RawBodyType.InputStreamBody            => asByteArray.map(new ByteArrayInputStream(_))
      case RawBodyType.FileBody =>
        serverOptions.createFile(serverOptions.blockingExecutionContext, req).flatMap { file =>
          val fileSink = fs2.io.file.writeAll(file.toPath, Blocker.liftExecutionContext(serverOptions.blockingExecutionContext))
          body.through(fileSink).compile.drain.map(_ => file)
        }
      case m: RawBodyType.MultipartBody =>
        // TODO: use MultipartDecoder.mixedMultipart once available?
        implicitly[EntityDecoder[F, multipart.Multipart[F]]].decode(req, strict = false).value.flatMap {
          case Left(failure) =>
            throw new IllegalArgumentException("Cannot decode multipart body: " + failure) // TODO
          case Right(mp) =>
            val rawPartsF: Vector[F[RawPart]] = mp.parts
              .flatMap(part => part.name.flatMap(name => m.partType(name)).map((part, _)).toList)
              .map { case (part, codecMeta) => toRawPart(part, codecMeta, req).asInstanceOf[F[RawPart]] }

            val rawParts: F[Vector[RawPart]] = rawPartsF.sequence

            rawParts.asInstanceOf[F[R]] // R is Seq[RawPart]
        }
    }
  }

  private def toRawPart[R](part: multipart.Part[F], partType: RawBodyType[R], req: Request[F]): F[Part[R]] = {
    val dispositionParams = part.headers.get(`Content-Disposition`).map(_.parameters).getOrElse(Map.empty)
    val charset = part.headers.get(`Content-Type`).flatMap(_.charset)
    apply(part.body, partType, charset, req)
      .map(r =>
        Part(
          part.name.getOrElse(""),
          r,
          otherDispositionParams = dispositionParams - Part.NameDispositionParam,
          headers = part.headers.toList.map(h => Header(h.name.value, h.value))
        )
      )
  }
}

Source File: ConsoleModuleTest.scala From scala-server-toolkit with MIT License

5 votes

package com.avast.sst.jvm.system.console

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import cats.effect.SyncIO
import org.scalatest.funsuite.AnyFunSuite

import scala.{Console => SConsole}

class ConsoleModuleTest extends AnyFunSuite {

  test("Console input") {
    SConsole.withIn(new ByteArrayInputStream("test input\n".getBytes("UTF-8"))) {
      val test = for {
        line <- ConsoleModule.make[SyncIO].readLine
      } yield assert(line === "test input")

      test.unsafeRunSync()
    }
  }

  test("Console output") {
    val out = new ByteArrayOutputStream()
    SConsole.withOut(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLine("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

  test("Console error") {
    val out = new ByteArrayOutputStream()
    SConsole.withErr(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLineToError("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

}

Source File: DataWeaveCLITest.scala From data-weave-native with Apache License 2.0

5 votes

package org.mule.weave.dwnative.cli

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.PrintStream

import org.scalatest.FreeSpec
import org.scalatest.Matchers

import scala.io.Source

class DataWeaveCLITest extends FreeSpec with Matchers {

  "should work with output application/json" in {
    val out = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("output application/json --- (1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      println("Finish OK 3")
    }
  }

  "should work with simple script and not output" in {
    val defaultOut = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("(1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(defaultOut)
    }
  }

  "should work ok when sending payload from stdin" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[
          |  1,
          |  2,
          |  3
          |]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("payload[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }

  "should work with light formats" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[{
          |  "a" : 1,
          |  "b" : 2,
          |  "c" : 3
          |}]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("input payload json output csv header=false ---payload"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1,2,3"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }



}

Source File: ProxyRequestCodec.scala From aws-lambda-scala with MIT License

5 votes

package io.github.mkotsur.aws.codecs

import java.io.ByteArrayInputStream
import cats.syntax.either.catsSyntaxEither
import io.circe.generic.auto._
import io.github.mkotsur.aws.handler.CanDecode
import io.github.mkotsur.aws.proxy.ProxyRequest
import shapeless.Generic

import scala.language.{higherKinds, postfixOps}

private[aws] trait ProxyRequestCodec extends AllCodec with FutureCodec {

  
  def GenericProxyRequestOf[T] = shapeless.Generic[ProxyRequest[T]]

  implicit def canDecodeProxyRequest[T](implicit canDecode: CanDecode[T]) = CanDecode.instance[ProxyRequest[T]] { is =>
    {
      def extractBody(s: ProxyRequest[String]) = s.body match {
        case Some(bodyString) => canDecode.readStream(new ByteArrayInputStream(bodyString.getBytes)).map(Option.apply)
        case None             => Right(None)
      }

      def produceProxyResponse(decodedRequestString: ProxyRequest[String], bodyOption: Option[T]) = {
        val reqList = Generic[ProxyRequest[String]].to(decodedRequestString)
        Generic[ProxyRequest[T]].from((bodyOption :: reqList.reverse.tail).reverse)
      }

      for (decodedRequest$String <- CanDecode[ProxyRequest[String]].readStream(is);
           decodedBodyOption     <- extractBody(decodedRequest$String))
        yield produceProxyResponse(decodedRequest$String, decodedBodyOption)
    }
  }

}

Source File: Serialization.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization

Source File: GoogleDriveClient.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperations.readwritedataframe.googlestorage

import java.io.{ByteArrayInputStream, FileOutputStream}
import java.util

import com.google.api.client.googleapis.auth.oauth2.GoogleCredential
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport
import com.google.api.client.http.FileContent
import com.google.api.client.json.gson.GsonFactory
import com.google.api.services.drive.model.File
import com.google.api.services.drive.{Drive, DriveScopes}

import io.deepsense.commons.resources.ManagedResource
import io.deepsense.commons.utils.LoggerForCallerClass
import io.deepsense.deeplang.doperations.inout.CsvParameters.ColumnSeparatorChoice

private[googlestorage] object GoogleDriveClient {

  val logger = LoggerForCallerClass()

  val googleSheetCsvSeparator = ColumnSeparatorChoice.Comma()

  private val ApplicationName = "Seahorse"

  private val Scopes = util.Arrays.asList(DriveScopes.DRIVE)

  def uploadCsvFileAsGoogleSheet(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val fileMetadata = new File().setMimeType("application/vnd.google-apps.spreadsheet")
    val mediaContent = new FileContent("text/csv", new java.io.File(filePath))

    driveService(credentials).files.update(sheetId, fileMetadata, mediaContent).execute
  }

  def downloadGoogleSheetAsCsvFile(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val file = new java.io.File(filePath)
    file.getParentFile.mkdirs()

    ManagedResource(new FileOutputStream(file)) { fos =>
      driveService(credentials).files().export(sheetId, "text/csv").executeMediaAndDownloadTo(fos)
      logger.info(s"Downloaded google sheet id=$sheetId to the file $filePath")
    }
  }

  private def driveService(serviceAccountCredentials: String): Drive = {
    val credential = {
      val in = new ByteArrayInputStream(serviceAccountCredentials.getBytes)
      GoogleCredential.fromStream(in).createScoped(Scopes)
    }
    new Drive.Builder(
      GoogleNetHttpTransport.newTrustedTransport(),
      jsonFactory,
      credential
    ).setApplicationName(ApplicationName).build
  }

  // Default choice is JacksonFactory. However spark depends on Jackson as well
  // and google/spark jackson versions are binary incompatible with each other.
  private val jsonFactory = GsonFactory.getDefaultInstance

}

Source File: PythonNotebook.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperations

import java.io.ByteArrayInputStream

import io.deepsense.commons.utils.Version
import io.deepsense.deeplang.DOperation.Id
import io.deepsense.deeplang.ExecutionContext
import io.deepsense.deeplang.doperables.dataframe.DataFrame
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.reflect.runtime.{universe => ru}
import scala.util.Failure

import io.deepsense.commons.rest.client.NotebookRestClient

case class PythonNotebook()
  extends Notebook {

  override val id: Id = "e76ca616-0322-47a5-b390-70c9668265dd"
  override val name: String = "Python Notebook"
  override val description: String = "Creates a Python notebook with access to the DataFrame"

  override val since: Version = Version(1, 0, 0)
  override val notebookType: String = "python"

  override protected def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = {
    context.dataFrameStorage.setInputDataFrame(0, dataFrame.sparkDataFrame)
    headlessExecution(context)
  }

}

Source File: S3Brain.scala From sumobot with Apache License 2.0

5 votes

package com.sumologic.sumobot.brain

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Properties

import akka.actor.{Actor, Props}
import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider}
import com.amazonaws.services.s3.{AmazonS3Client, AmazonS3ClientBuilder}
import com.amazonaws.services.s3.model.ObjectMetadata
import com.sumologic.sumobot.brain.Brain._

import scala.collection.JavaConverters._
import scala.collection.immutable

object S3Brain {
  def props(credentials: AWSCredentials,
            bucket: String,
            s3Key: String): Props = Props(classOf[S3Brain], credentials, bucket, s3Key)
}

class S3Brain(credentials: AWSCredentials,
              bucket: String,
              s3Key: String) extends Actor {

  private val s3Client = AmazonS3ClientBuilder.standard()
    .withCredentials(new AWSStaticCredentialsProvider(credentials)).build

  private var brainContents: Map[String, String] = loadFromS3()

  override def receive: Receive = {
    case Store(key, value) =>
      brainContents += (key -> value)
      saveToS3(brainContents)

    case Remove(key) =>
      brainContents -= key
      saveToS3(brainContents)

    case Retrieve(key) =>
      brainContents.get(key) match {
        case Some(value) => sender() ! ValueRetrieved(key, value)
        case None => sender() ! ValueMissing(key)
      }

    case ListValues(prefix) =>
      sender() ! ValueMap(brainContents.filter(_._1.startsWith(prefix)))
  }

  private def loadFromS3(): Map[String, String] = {
    if (s3Client.doesBucketExistV2(bucket)) {
      val props = new Properties()
      props.load(s3Client.getObject(bucket, s3Key).getObjectContent)
      immutable.Map(props.asScala.toSeq: _*)
    } else {
      Map.empty
    }
  }

  private def saveToS3(contents: Map[String, String]): Unit = {
    if (!s3Client.doesBucketExistV2(bucket)) {
      s3Client.createBucket(bucket)
    }

    val props = new Properties()
    props.putAll(contents.asJava)
    val out = new ByteArrayOutputStream()
    props.store(out, "")
    out.flush()
    out.close()
    val in = new ByteArrayInputStream(out.toByteArray)
    s3Client.putObject(bucket, s3Key, in, new ObjectMetadata())
  }
}

Source File: IncrementalCache.scala From sbt-idea-plugin with Apache License 2.0

5 votes

package org.jetbrains.sbtidea.packaging.artifact

import java.io.{BufferedOutputStream, ByteArrayInputStream, ObjectInputStream, ObjectOutputStream}
import java.nio.file.{Files, Path}

import sbt.Keys.TaskStreams

import scala.collection.mutable

trait IncrementalCache extends AutoCloseable {
  def fileChanged(in: Path): Boolean
}

class DumbIncrementalCache extends IncrementalCache {
  override def fileChanged(in: Path): Boolean = true
  override def close(): Unit = ()
}

class PersistentIncrementalCache(private val root: Path)(implicit private val streams: TaskStreams) extends IncrementalCache {

  private val FILENAME = "sbtidea.cache"
  private val myFile   = root.resolve(FILENAME)
  private val myData   = loadOrCreate()

  type Data = mutable.HashMap[String, Long]

  private def loadFromDisk(): Either[String, Data] = {
    if (!Files.exists(myFile) || Files.size(myFile) <= 0)
      return Left("Cache file is empty or doesn't exist")
    val data = Files.readAllBytes(myFile)
    using(new ObjectInputStream(new ByteArrayInputStream(data))) { stream =>
      Right(stream.readObject().asInstanceOf[Data])
    }
  }

  private def loadOrCreate(): Data = loadFromDisk() match {
    case Left(message) =>
      streams.log.info(message)
      new Data()
    case Right(value) => value
  }

  private def saveToDisk(): Unit = {
    import java.nio.file.StandardOpenOption._
    if (!Files.exists(myFile.getParent)) {
      Files.createDirectories(myFile.getParent)
      Files.createFile(myFile)
    }
    using(new ObjectOutputStream(
          new BufferedOutputStream(
            Files.newOutputStream(myFile, CREATE, WRITE, TRUNCATE_EXISTING)))) { stream =>
      stream.writeObject(myData)
    }
  }

  override def close(): Unit = saveToDisk()

  override def fileChanged(in: Path): Boolean = {
    val newTimestamp = Files.getLastModifiedTime(in).toMillis
    val inStr = in.toString
    val lastTimestamp = myData.getOrElseUpdate(inStr, newTimestamp)
    val result = newTimestamp > lastTimestamp
    myData.put(inStr, newTimestamp)
    result
  }
}

Source File: TableRowJsonIOTest.scala From ratatool with Apache License 2.0

5 votes

package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.scalacheck._
import org.scalacheck.Gen
import scala.jdk.CollectionConverters._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class TableRowJsonIOTest extends AnyFlatSpec with Matchers {

  
  private def floatGen = Gen.choose[Float](0.0F, 1.0F)

  private val schema = Schemas.tableSchema
  private val data = Gen.listOfN(100,
    tableRowOf(schema)
      .amend(Gen.oneOf(
        Gen.const(null),
        floatGen
      ))(_.getRecord("nullable_fields").set("float_field"))
      .amend(floatGen)(_.getRecord("required_fields").set("float_field"))
      .amend(Gen.nonEmptyListOf(floatGen)
        .map(_.asJava)
      )(_.getRecord("repeated_fields").set("float_field"))
  ).sample.get

  "TableRowJsonIO" should "work with stream" in {
    val out = new ByteArrayOutputStream()
    TableRowJsonIO.writeToOutputStream(data, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = TableRowJsonIO.readFromInputStream(in).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

  it should "work with file" in {
    val file = File.createTempFile("ratatool-", ".json")
    file.deleteOnExit()
    TableRowJsonIO.writeToFile(data, file)
    val result = TableRowJsonIO.readFromFile(file).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

}

Source File: TestHelper.scala From odsc-west-streaming-trends with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.StandardCharsets

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import com.twilio.open.protocol.Calls.CallEvent
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

  def asMockKafkaDataFrame(event: CallEvent): MockKafkaDataFrame = {
    val key = event.getEventId.getBytes(StandardCharsets.UTF_8)
    val value = event.toByteArray
    MockKafkaDataFrame(key, value)
  }

}

case class MockKafkaDataFrame(key: Array[Byte], value: Array[Byte])


@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

}

Source File: Serialization.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization

Source File: GoogleDriveClient.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations.readwritedataframe.googlestorage

import java.io.{ByteArrayInputStream, FileOutputStream}
import java.util

import com.google.api.client.googleapis.auth.oauth2.GoogleCredential
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport
import com.google.api.client.http.FileContent
import com.google.api.client.json.gson.GsonFactory
import com.google.api.services.drive.model.File
import com.google.api.services.drive.{Drive, DriveScopes}

import ai.deepsense.commons.resources.ManagedResource
import ai.deepsense.commons.utils.LoggerForCallerClass
import ai.deepsense.deeplang.doperations.inout.CsvParameters.ColumnSeparatorChoice

private[googlestorage] object GoogleDriveClient {

  val logger = LoggerForCallerClass()

  val googleSheetCsvSeparator = ColumnSeparatorChoice.Comma()

  private val ApplicationName = "Seahorse"

  private val Scopes = util.Arrays.asList(DriveScopes.DRIVE)

  def uploadCsvFileAsGoogleSheet(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val fileMetadata = new File().setMimeType("application/vnd.google-apps.spreadsheet")
    val mediaContent = new FileContent("text/csv", new java.io.File(filePath))

    driveService(credentials).files.update(sheetId, fileMetadata, mediaContent).execute
  }

  def downloadGoogleSheetAsCsvFile(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val file = new java.io.File(filePath)
    file.getParentFile.mkdirs()

    ManagedResource(new FileOutputStream(file)) { fos =>
      driveService(credentials).files().export(sheetId, "text/csv").executeMediaAndDownloadTo(fos)
      logger.info(s"Downloaded google sheet id=$sheetId to the file $filePath")
    }
  }

  private def driveService(serviceAccountCredentials: String): Drive = {
    val credential = {
      val in = new ByteArrayInputStream(serviceAccountCredentials.getBytes)
      GoogleCredential.fromStream(in).createScoped(Scopes)
    }
    new Drive.Builder(
      GoogleNetHttpTransport.newTrustedTransport(),
      jsonFactory,
      credential
    ).setApplicationName(ApplicationName).build
  }

  // Default choice is JacksonFactory. However spark depends on Jackson as well
  // and google/spark jackson versions are binary incompatible with each other.
  private val jsonFactory = GsonFactory.getDefaultInstance

}

Source File: PythonNotebook.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations

import java.io.ByteArrayInputStream

import ai.deepsense.commons.utils.Version
import ai.deepsense.deeplang.DOperation.Id
import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.reflect.runtime.{universe => ru}
import scala.util.Failure

import ai.deepsense.commons.rest.client.NotebookRestClient

case class PythonNotebook()
  extends Notebook {

  override val id: Id = "e76ca616-0322-47a5-b390-70c9668265dd"
  override val name: String = "Python Notebook"
  override val description: String = "Creates a Python notebook with access to the DataFrame"

  override val since: Version = Version(1, 0, 0)
  override val notebookType: String = "python"

  override protected def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = {
    context.dataFrameStorage.setInputDataFrame(0, dataFrame.sparkDataFrame)
    headlessExecution(context)
  }

}

Source File: JavaSerde.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.core.serde

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}

import akka.actor.ExtendedActorSystem
import akka.serialization.JavaSerializer
import akka.util.ClassLoaderObjectInputStream

class JavaSerde(system: ExtendedActorSystem) extends Serde[AnyRef] {

  override def identifier: Int = 101

  override def close(): Unit = ()

  override def fromBytes(bytes: Array[Byte]): AnyRef = {
    val in = new ClassLoaderObjectInputStream(system.dynamicAccess.classLoader, new ByteArrayInputStream(bytes))
    val obj = JavaSerializer.currentSystem.withValue(system) { in.readObject }
    in.close()
    obj
  }

  override def toBytes(o: AnyRef): Array[Byte] = {
    val bos = new ByteArrayOutputStream
    val out = new ObjectOutputStream(bos)
    JavaSerializer.currentSystem.withValue(system) { out.writeObject(o) }
    out.close()
    bos.toByteArray
  }

}

Source File: SeqSerde.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SeqSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Seq[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 141

  override def close(): Unit = ()

  override protected def fromBytes(bytes: Array[Byte]): Seq[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toList
    di.close()
    result
  }

  override def toBytes(seq: Seq[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(seq.size)
    for (a: Any <- seq) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }
}

Source File: SetSerde.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SetSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Set[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 142

  override protected def fromBytes(bytes: Array[Byte]): Set[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toSet
    di.close()
    result
  }

  override def toBytes(set: Set[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(set.size)
    for (a: Any <- set) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }

  override def close() = ()
}

Source File: S3DiffUploader.scala From shield with MIT License

5 votes

package shield.aws

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.StandardCharsets

import akka.actor.{Actor, ActorLogging, Props}
import com.amazonaws.auth.profile.ProfileCredentialsProvider
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ObjectMetadata
import shield.actors.listeners.ComparisonDiffFile

object S3DiffUploader{
  def props(bucket: String, folder: String) : Props = Props(new S3DiffUploader(bucket, folder))
}

class S3DiffUploader(bucket: String, folder: String)  extends Actor with ActorLogging {
  val s3Client = new AmazonS3Client()
  val charset = StandardCharsets.UTF_8
  val stripped = folder.stripPrefix("/").stripSuffix("/")
  val prefix = if (stripped.isEmpty) {
    stripped
  } else {
    stripped + "/"
  }

  def receive = {
    case file: ComparisonDiffFile =>
      val metadata = new ObjectMetadata()
      metadata.setContentLength(file.contents.length)
      s3Client.putObject(bucket, s"$prefix${file.fileName}", new ByteArrayInputStream(file.contents), metadata)
  }
}

Source File: VinylRequest.scala From vinyldns with Apache License 2.0

5 votes

package models

import java.io.{ByteArrayInputStream, InputStream}
import java.util

import com.amazonaws.{ReadLimitInfo, SignableRequest}
import com.amazonaws.http.HttpMethodName

object VinylDNSRequest {
  val APPLICATION_JSON = "application/json"
}

case class VinylDNSRequest(
    method: String,
    url: String,
    path: String = "",
    payload: Option[String] = None,
    parameters: util.HashMap[String, java.util.List[String]] =
      new util.HashMap[String, java.util.List[String]]()
)

class SignableVinylDNSRequest(origReq: VinylDNSRequest) extends SignableRequest[VinylDNSRequest] {

  import VinylDNSRequest._

  val contentType: String = APPLICATION_JSON

  private val headers = new util.HashMap[String, String]()
  private val parameters = origReq.parameters
  private val uri = new java.net.URI(origReq.url)
  // I hate to do this, but need to be able to set the content after creation to
  // implement the interface properly
  private var contentStream: InputStream = new ByteArrayInputStream(
    origReq.payload.getOrElse("").getBytes("UTF-8")
  )

  override def addHeader(name: String, value: String): Unit = headers.put(name, value)
  override def getHeaders: java.util.Map[String, String] = headers
  override def getResourcePath: String = origReq.path
  override def addParameter(name: String, value: String): Unit = {
    if (!parameters.containsKey(name)) parameters.put(name, new util.ArrayList[String]())
    parameters.get(name).add(value)
  }
  override def getParameters: java.util.Map[String, java.util.List[String]] = parameters
  override def getEndpoint: java.net.URI = uri
  override def getHttpMethod: HttpMethodName = HttpMethodName.valueOf(origReq.method)
  override def getTimeOffset: Int = 0
  override def getContent: InputStream = contentStream
  override def getContentUnwrapped: InputStream = getContent
  override def getReadLimitInfo: ReadLimitInfo = new ReadLimitInfo {
    override def getReadLimit: Int = -1
  }
  override def getOriginalRequestObject: Object = origReq
  override def setContent(content: InputStream): Unit = contentStream = content
}

Source File: GenericAvroSerializerSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {//模式压缩与解压缩
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {//记录序列化和反序列化
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }
  //使用模式指纹以减少信息大小
  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {//缓存之前模式
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedScheam = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedScheam.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: Unpacker.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}
import java.nio.ByteBuffer
import java.util.zip.GZIPInputStream

import com.expedia.open.tracing.buffer.SpanBuffer
import com.github.luben.zstd.ZstdInputStream
import org.apache.commons.io.IOUtils
import org.json4s.jackson.Serialization
import org.xerial.snappy.SnappyInputStream

object Unpacker {
  import PackedMessage._

  private def readMetadata(packedDataBytes: Array[Byte]): Array[Byte] = {
    val byteBuffer = ByteBuffer.wrap(packedDataBytes)
    val magicBytesExist = MAGIC_BYTES.indices forall { idx => byteBuffer.get() == MAGIC_BYTES.apply(idx) }
    if (magicBytesExist) {
      val headerLength = byteBuffer.getInt
      val metadataBytes = new Array[Byte](headerLength)
      byteBuffer.get(metadataBytes, 0, headerLength)
      metadataBytes
    } else {
      null
    }
  }

  private def unpack(compressedStream: InputStream) = {
    val outputStream = new ByteArrayOutputStream()
    IOUtils.copy(compressedStream, outputStream)
    outputStream.toByteArray
  }

  def readSpanBuffer(packedDataBytes: Array[Byte]): SpanBuffer = {
    var parsedDataBytes: Array[Byte] = null
    val metadataBytes = readMetadata(packedDataBytes)
    if (metadataBytes != null) {
      val packedMetadata = Serialization.read[PackedMetadata](new String(metadataBytes))
      val compressedDataOffset = MAGIC_BYTES.length + 4 + metadataBytes.length
      packedMetadata.t match {
        case PackerType.SNAPPY =>
          parsedDataBytes = unpack(
            new SnappyInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.GZIP =>
          parsedDataBytes = unpack(
            new GZIPInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.ZSTD =>
          parsedDataBytes = unpack(
            new ZstdInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case _ =>
          return SpanBuffer.parseFrom(
            new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))
      }
    } else {
      parsedDataBytes = packedDataBytes
    }
    SpanBuffer.parseFrom(parsedDataBytes)
  }
}

Source File: Packer.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStream}
import java.util.zip.GZIPOutputStream

import com.expedia.www.haystack.trace.commons.packer.PackerType.PackerType
import com.github.luben.zstd.ZstdOutputStream
import com.google.protobuf.GeneratedMessageV3
import org.apache.commons.io.IOUtils
import org.xerial.snappy.SnappyOutputStream

object PackerType extends Enumeration {
  type PackerType = Value
  val GZIP, SNAPPY, NONE, ZSTD = Value
}

case class PackedMetadata(t: PackerType)

abstract class Packer[T <: GeneratedMessageV3] {
  val packerType: PackerType

  protected def compressStream(stream: OutputStream): OutputStream

  private def pack(protoObj: T): Array[Byte] = {
    val outStream = new ByteArrayOutputStream
    val compressedStream = compressStream(outStream)
    if (compressedStream != null) {
      IOUtils.copy(new ByteArrayInputStream(protoObj.toByteArray), compressedStream)
      compressedStream.close() // this flushes the data to final outStream
      outStream.toByteArray
    } else {
      protoObj.toByteArray
    }
  }

  def apply(protoObj: T): PackedMessage[T] = {
    PackedMessage(protoObj, pack, PackedMetadata(packerType))
  }
}

class NoopPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.NONE
  override protected def compressStream(stream: OutputStream): OutputStream = null
}

class SnappyPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.SNAPPY
  override protected def compressStream(stream: OutputStream): OutputStream = new SnappyOutputStream(stream)
}


class ZstdPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.ZSTD
  override protected def compressStream(stream: OutputStream): OutputStream = new ZstdOutputStream(stream)
}

class GzipPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.GZIP
  override protected def compressStream(stream: OutputStream): OutputStream = new GZIPOutputStream(stream)
}

Source File: ParquetIOTest.scala From ratatool with Apache License 2.0

5 votes

package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.nio.file.Files

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import com.spotify.ratatool.scalacheck._
import org.apache.commons.io.FileUtils
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ParquetIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "ParquetIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream(in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(genericData, genericSchema, file)
    val result = ParquetIO.readFromFile(file).toList
    result should equal (genericData)
    FileUtils.deleteDirectory(dir.toFile)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(specificData, specificSchema, file)
    val result = ParquetIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
    FileUtils.deleteDirectory(dir.toFile)
  }

}

Source File: AvroIOTest.scala From ratatool with Apache License 2.0

5 votes

package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import org.apache.avro.generic.GenericRecord
import com.spotify.ratatool.scalacheck._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class AvroIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "AvroIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[GenericRecord](in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(genericData, genericSchema, file)
    val result = AvroIO.readFromFile[GenericRecord](file).toList
    result should equal (genericData)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(specificData, specificSchema, file)
    val result = AvroIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }
}

Source File: MockHelpers.scala From guardrail with MIT License

5 votes

package helpers

import com.fasterxml.jackson.databind.ObjectMapper
import io.netty.handler.codec.http.EmptyHttpHeaders
import java.io.ByteArrayInputStream
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import java.util.Collections
import java.util.concurrent.CompletableFuture
import javax.ws.rs.container.AsyncResponse
import org.asynchttpclient.Response
import org.asynchttpclient.uri.Uri
import org.mockito.{ ArgumentMatchersSugar, MockitoSugar }
import org.scalatest.Assertions
import scala.reflect.ClassTag

object MockHelpers extends Assertions with MockitoSugar with ArgumentMatchersSugar {
  def mockAsyncResponse[T](future: CompletableFuture[T])(implicit cls: ClassTag[T]): AsyncResponse = {
    val asyncResponse = mock[AsyncResponse]

    when(asyncResponse.resume(any[T])) thenAnswer [AnyRef] { response =>
      response match {
        case t: Throwable => future.completeExceptionally(t)
        case other: T     => future.complete(other)
        case other        => fail(s"AsyncResponse.resume expected an object of type ${cls.runtimeClass.getName}, but got ${other.getClass.getName} instead")
      }
    }

    asyncResponse
  }

  def mockAHCResponse[T](uri: String, status: Int, maybeBody: Option[T] = None)(implicit mapper: ObjectMapper): Response = {
    val response = mock[Response]
    when(response.getUri) thenReturn Uri.create(uri)
    when(response.hasResponseStatus) thenReturn true
    when(response.getStatusCode) thenReturn status
    when(response.getStatusText) thenReturn "Some Status"
    when(response.hasResponseHeaders) thenReturn true
    when(response.getHeaders) thenReturn EmptyHttpHeaders.INSTANCE
    when(response.getHeader(any)) thenReturn null
    when(response.getHeaders(any)) thenReturn Collections.emptyList()
    maybeBody match {
      case None =>
        when(response.hasResponseBody) thenReturn true
      case Some(body) =>
        val responseBytes = mapper.writeValueAsBytes(body)
        val responseStr   = new String(responseBytes, StandardCharsets.UTF_8)
        when(response.hasResponseBody) thenReturn true
        when(response.getResponseBody(any)) thenReturn responseStr
        when(response.getResponseBody) thenReturn responseStr
        when(response.getResponseBodyAsStream) thenReturn new ByteArrayInputStream(responseBytes)
        when(response.getResponseBodyAsByteBuffer) thenReturn ByteBuffer.wrap(responseBytes)
        when(response.getResponseBodyAsBytes) thenReturn responseBytes
    }
    response
  }

}

Source File: MeetupReceiver.scala From meetup-stream with Apache License 2.0

5 votes

package receiver

import org.apache.spark.streaming.receiver.Receiver
import org.apache.spark.storage.StorageLevel
import org.apache.spark.Logging
import com.ning.http.client.AsyncHttpClientConfig
import com.ning.http.client._
import scala.collection.mutable.ArrayBuffer
import java.io.OutputStream
import java.io.ByteArrayInputStream
import java.io.InputStreamReader
import java.io.BufferedReader
import java.io.InputStream
import java.io.PipedInputStream
import java.io.PipedOutputStream

class MeetupReceiver(url: String) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging {
  
  @transient var client: AsyncHttpClient = _
  
  @transient var inputPipe: PipedInputStream = _
  @transient var outputPipe: PipedOutputStream = _  
       
  def onStart() {    
    val cf = new AsyncHttpClientConfig.Builder()
    cf.setRequestTimeout(Integer.MAX_VALUE)
    cf.setReadTimeout(Integer.MAX_VALUE)
    cf.setPooledConnectionIdleTimeout(Integer.MAX_VALUE)      
    client= new AsyncHttpClient(cf.build())
    
    inputPipe = new PipedInputStream(1024 * 1024)
    outputPipe = new PipedOutputStream(inputPipe)
    val producerThread = new Thread(new DataConsumer(inputPipe))
    producerThread.start()
    
    client.prepareGet(url).execute(new AsyncHandler[Unit]{
        
      def onBodyPartReceived(bodyPart: HttpResponseBodyPart) = {
        bodyPart.writeTo(outputPipe)
        AsyncHandler.STATE.CONTINUE        
      }
      
      def onStatusReceived(status: HttpResponseStatus) = {
        AsyncHandler.STATE.CONTINUE
      }
      
      def onHeadersReceived(headers: HttpResponseHeaders) = {
        AsyncHandler.STATE.CONTINUE
      }
            
      def onCompleted = {
        println("completed")
      }
      
      
      def onThrowable(t: Throwable)={
        t.printStackTrace()
      }
        
    })    
    
    
  }

  def onStop() {
    if (Option(client).isDefined) client.close()
    if (Option(outputPipe).isDefined) {
     outputPipe.flush()
     outputPipe.close() 
    }
    if (Option(inputPipe).isDefined) {
     inputPipe.close() 
    }    
  }
  
  class DataConsumer(inputStream: InputStream) extends Runnable 
  {
       
      override
      def run()
      {        
        val bufferedReader = new BufferedReader( new InputStreamReader( inputStream ))
        var input=bufferedReader.readLine()
        while(input!=null){          
          store(input)
          input=bufferedReader.readLine()
        }            
      }  
      
  }

}

Source File: GithubIssue485.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.CPWrapper
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import shapeless.Coproduct

class GithubIssue485 extends AnyFunSuite with Matchers {

  test("Serializable Coproduct Decoder #485") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[CPWrapper])
    oos.close()

    val decoder =
      new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)).readObject().asInstanceOf[Decoder[CPWrapper]]

    val schema = AvroSchema[CPWrapper]
    val record = new GenericData.Record(schema)
    record.put("u", new Utf8("wibble"))
    decoder.decode(record) shouldBe CPWrapper(Coproduct[CPWrapper.ISBG]("wibble"))
  }
}

Source File: GithubIssue484.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.ScalaEnumClass
import com.sksamuel.avro4s.schema.Colours
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.generic.GenericData.EnumSymbol
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class GithubIssue484 extends AnyFunSuite with Matchers {

  test("Serializable Scala Enum Decoder #484") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[ScalaEnumClass])
    oos.close()

    val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
      .readObject()
      .asInstanceOf[Decoder[ScalaEnumClass]]

    val schema = AvroSchema[ScalaEnumClass]
    val record = new GenericData.Record(schema)
    record.put("colour", new EnumSymbol(schema.getField("colour").schema(), "Green"))
    decoder.decode(record) shouldBe ScalaEnumClass(Colours.Green)
  }
}

Source File: GithubIssue432.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.Encoder
import org.scalatest.{FunSuite, Matchers}

class GithubIssue432 extends FunSuite with Matchers {

  test("Serializable Encoder[BigDecimal] #432") {
    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()
  }

  test("Deserialized Encoder[BigDecimal] works") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()

    val ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
    val encoder = ois.readObject().asInstanceOf[Encoder[BigDecimal]]

    encoder.encode(12.34)
  }
}

Source File: SchemaEvolutionTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.record.decoder

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.sksamuel.avro4s._
import org.apache.avro.SchemaBuilder
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class SchemaEvolutionTest extends AnyFunSuite with Matchers {

  case class Version1(original: String)
  case class Version2(@AvroAlias("original") renamed: String)

  case class P1(name: String, age: Int = 18)
  case class P2(name: String)

  case class OptionalStringTest(a: String, b: Option[String])
  case class DefaultStringTest(a: String, b: String = "foo")

  ignore("@AvroAlias should be used when a reader schema has a field missing from the write schema") {

    val v1schema = AvroSchema[Version1]
    val v1 = Version1("hello")
    val baos = new ByteArrayOutputStream()
    val output = AvroOutputStream.data[Version1].to(baos).build()
    output.write(v1)
    output.close()

    // we load using a v2 schema
    val is = new AvroDataInputStream[Version2](new ByteArrayInputStream(baos.toByteArray), Some(v1schema))
    val v2 = is.iterator.toList.head

    v2.renamed shouldBe v1.original
  }

  test("when decoding, if the record and schema are missing a field and the target has a scala default, use that") {

    val f1 = RecordFormat[P1]
    val f2 = RecordFormat[P2]

    f1.from(f2.to(P2("foo"))) shouldBe P1("foo")
  }

  test("when decoding, if the record is missing a field that is present in the schema with a default, use the default from the schema") {
    val schema = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val record = new GenericData.Record(schema)
    record.put("a", new Utf8("hello"))
    Decoder[DefaultStringTest].decode(record) shouldBe DefaultStringTest("hello")
  }

  test("when decoding, if the record is missing a field that is present in the schema and the type is option, then set to None") {
    val schema1 = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val schema2 = SchemaBuilder.record("foo").fields().requiredString("a").optionalString("b").endRecord()
    val record = new GenericData.Record(schema1)
    record.put("a", new Utf8("hello"))
    Decoder[OptionalStringTest].decode(record) shouldBe OptionalStringTest("hello", None)
  }
}

Source File: ProtoMarshaller.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
class ProtoMarshaller[T <: com.google.protobuf.Message](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
}

Source File: Gzip.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import java.util.zip.{ GZIPInputStream, GZIPOutputStream }

import akka.util.ByteString

object Gzip extends Codec {
  override val name: String = "gzip"

  override def compress(uncompressed: ByteString): ByteString = {
    val baos = new ByteArrayOutputStream(uncompressed.size)
    val gzos = new GZIPOutputStream(baos)
    gzos.write(uncompressed.toArray)
    gzos.flush()
    gzos.close()
    ByteString(baos.toByteArray)
  }

  override def uncompress(compressed: ByteString): ByteString = {
    val gzis = new GZIPInputStream(new ByteArrayInputStream(compressed.toArray))

    val baos = new ByteArrayOutputStream(compressed.size)
    val buffer = new Array[Byte](32 * 1024)
    var read = gzis.read(buffer)
    while (read != -1) {
      baos.write(buffer, 0, read)
      read = gzis.read(buffer)
    }
    ByteString(baos.toByteArray)
  }
}

Source File: Marshaller.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
final class Marshaller[T <: scalapb.GeneratedMessage](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
}

Source File: SslContexts.scala From kubernetes-client with Apache License 2.0

5 votes

package com.goyeau.kubernetes.client.util
import java.io.{ByteArrayInputStream, File, FileInputStream, InputStreamReader}
import java.security.cert.{CertificateFactory, X509Certificate}
import java.security.{KeyStore, SecureRandom, Security}
import java.util.Base64

import com.goyeau.kubernetes.client.KubeConfig
import javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory}
import org.bouncycastle.jce.provider.BouncyCastleProvider
import org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter
import org.bouncycastle.openssl.{PEMKeyPair, PEMParser}

object SslContexts {
  private val TrustStoreSystemProperty         = "javax.net.ssl.trustStore"
  private val TrustStorePasswordSystemProperty = "javax.net.ssl.trustStorePassword"
  private val KeyStoreSystemProperty           = "javax.net.ssl.keyStore"
  private val KeyStorePasswordSystemProperty   = "javax.net.ssl.keyStorePassword"

  def fromConfig(config: KubeConfig): SSLContext = {
    val sslContext = SSLContext.getInstance("TLS")
    sslContext.init(keyManagers(config), trustManagers(config), new SecureRandom)
    sslContext
  }

  private def keyManagers(config: KubeConfig) = {
    // Client certificate
    val certDataStream = config.clientCertData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data)))
    val certFileStream = config.clientCertFile.map(new FileInputStream(_))

    // Client key
    val keyDataStream = config.clientKeyData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data)))
    val keyFileStream = config.clientKeyFile.map(new FileInputStream(_))

    for {
      keyStream  <- keyDataStream.orElse(keyFileStream)
      certStream <- certDataStream.orElse(certFileStream)
    } yield {
      Security.addProvider(new BouncyCastleProvider())
      val pemKeyPair =
        new PEMParser(new InputStreamReader(keyStream)).readObject().asInstanceOf[PEMKeyPair] // scalafix:ok
      val privateKey = new JcaPEMKeyConverter().setProvider("BC").getPrivateKey(pemKeyPair.getPrivateKeyInfo)

      val certificateFactory = CertificateFactory.getInstance("X509")
      val certificate        = certificateFactory.generateCertificate(certStream).asInstanceOf[X509Certificate] // scalafix:ok

      defaultKeyStore.setKeyEntry(
        certificate.getSubjectX500Principal.getName,
        privateKey,
        config.clientKeyPass.fold(Array.empty[Char])(_.toCharArray),
        Array(certificate)
      )
    }

    val keyManagerFactory = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm)
    keyManagerFactory.init(defaultKeyStore, Array.empty)
    keyManagerFactory.getKeyManagers
  }

  private lazy val defaultKeyStore = {
    val propertyKeyStoreFile =
      Option(System.getProperty(KeyStoreSystemProperty, "")).filter(_.nonEmpty).map(new File(_))

    val keyStore = KeyStore.getInstance(KeyStore.getDefaultType)
    keyStore.load(
      propertyKeyStoreFile.map(new FileInputStream(_)).orNull,
      System.getProperty(KeyStorePasswordSystemProperty, "").toCharArray
    )
    keyStore
  }

  private def trustManagers(config: KubeConfig) = {
    val certDataStream = config.caCertData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data)))
    val certFileStream = config.caCertFile.map(new FileInputStream(_))

    certDataStream.orElse(certFileStream).foreach { certStream =>
      val certificateFactory = CertificateFactory.getInstance("X509")
      val certificate        = certificateFactory.generateCertificate(certStream).asInstanceOf[X509Certificate] // scalafix:ok
      defaultTrustStore.setCertificateEntry(certificate.getSubjectX500Principal.getName, certificate)
    }

    val trustManagerFactory = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
    trustManagerFactory.init(defaultTrustStore)
    trustManagerFactory.getTrustManagers
  }

  private lazy val defaultTrustStore = {
    val securityDirectory = s"${System.getProperty("java.home")}/lib/security"

    val propertyTrustStoreFile =
      Option(System.getProperty(TrustStoreSystemProperty, "")).filter(_.nonEmpty).map(new File(_))
    val jssecacertsFile = Option(new File(s"$securityDirectory/jssecacerts")).filter(f => f.exists && f.isFile)
    val cacertsFile     = new File(s"$securityDirectory/cacerts")

    val keyStore = KeyStore.getInstance(KeyStore.getDefaultType)
    keyStore.load(
      new FileInputStream(propertyTrustStoreFile.orElse(jssecacertsFile).getOrElse(cacertsFile)),
      System.getProperty(TrustStorePasswordSystemProperty, "changeit").toCharArray
    )
    keyStore
  }
}

Source File: ScaleAndConvert.scala From SparkNet with MIT License

5 votes

package preprocessing

import java.awt.image.DataBufferByte
import java.io.ByteArrayInputStream
import javax.imageio.ImageIO

import scala.collection.mutable.ArrayBuffer
import scala.collection.JavaConversions._
import net.coobird.thumbnailator._

import org.apache.spark.rdd.RDD

import libs._

object ScaleAndConvert {
  def BufferedImageToByteArray(image: java.awt.image.BufferedImage) : Array[Byte] = {
    val height = image.getHeight()
    val width = image.getWidth()
    val pixels = image.getRGB(0, 0, width, height, null, 0, width)
    val result = new Array[Byte](3 * height * width)
    var row = 0
    while (row < height) {
      var col = 0
      while (col < width) {
        val rgb = pixels(row * width + col)
        result(0 * height * width + row * width + col) = ((rgb >> 16) & 0xFF).toByte
        result(1 * height * width + row * width + col) = ((rgb >> 8) & 0xFF).toByte
        result(2 * height * width + row * width + col) = (rgb & 0xFF).toByte
        col += 1
      }
      row += 1
    }
    result
  }

  def decompressImageAndResize(compressedImage: Array[Byte], height: Int, width: Int) : Option[Array[Byte]] = {
    // this method takes a JPEG, decompresses it, and resizes it
    try {
      val im = ImageIO.read(new ByteArrayInputStream(compressedImage))
      val resizedImage = Thumbnails.of(im).forceSize(width, height).asBufferedImage()
      Some(BufferedImageToByteArray(resizedImage))
    } catch {
      // If images can't be processed properly, just ignore them
      case e: java.lang.IllegalArgumentException => None
      case e: javax.imageio.IIOException => None
      case e: java.lang.NullPointerException => None
    }
  }
}

Source File: TestingTypedCount.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
}

Source File: CreateJacksonParser.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.json

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text

import org.apache.spark.unsafe.types.UTF8String

private[sql] object CreateJacksonParser extends Serializable {
  def string(jsonFactory: JsonFactory, record: String): JsonParser = {
    jsonFactory.createParser(record)
  }

  def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = {
    val bb = record.getByteBuffer
    assert(bb.hasArray)

    val bain = new ByteArrayInputStream(
      bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
  }

  def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
    jsonFactory.createParser(record.getBytes, 0, record.getLength)
  }

  def inputStream(jsonFactory: JsonFactory, record: InputStream): JsonParser = {
    jsonFactory.createParser(record)
  }
}

Source File: GenericAvroSerializerSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

java.io.ByteArrayInputStream Scala Examples