java.io.ByteArrayInputStream Scala Examples

The following examples show how to use java.io.ByteArrayInputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: AvroDataToCatalyst.scala    From spark-schema-registry   with Apache License 2.0 6 votes vote down vote up
package com.hortonworks.spark.registry.avro

import java.io.ByteArrayInputStream

import com.hortonworks.registries.schemaregistry.{SchemaVersionInfo, SchemaVersionKey}
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import org.apache.avro.Schema
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.types.{BinaryType, DataType}

import scala.collection.JavaConverters._


case class AvroDataToCatalyst(child: Expression, schemaName: String, version: Option[Int], config: Map[String, Object])
  extends UnaryExpression with ExpectsInputTypes {

  override def inputTypes = Seq(BinaryType)

  @transient private lazy val srDeser: AvroSnapshotDeserializer = {
    val obj = new AvroSnapshotDeserializer()
    obj.init(config.asJava)
    obj
  }

  @transient private lazy val srSchema = fetchSchemaVersionInfo(schemaName, version)

  @transient private lazy val avroSchema = new Schema.Parser().parse(srSchema.getSchemaText)

  override lazy val dataType: DataType = SchemaConverters.toSqlType(avroSchema).dataType

  @transient private lazy val avroDeser= new AvroDeserializer(avroSchema, dataType)

  override def nullable: Boolean = true

  override def nullSafeEval(input: Any): Any = {
    val binary = input.asInstanceOf[Array[Byte]]
    val row = avroDeser.deserialize(srDeser.deserialize(new ByteArrayInputStream(binary), srSchema.getVersion))
    val result = row match {
      case r: InternalRow => r.copy()
      case _ => row
    }
    result
  }

  override def simpleString: String = {
    s"from_sr(${child.sql}, ${dataType.simpleString})"
  }

  override def sql: String = {
    s"from_sr(${child.sql}, ${dataType.catalogString})"
  }

  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val expr = ctx.addReferenceObj("this", this)
    defineCodeGen(ctx, ev, input =>
      s"(${ctx.boxedType(dataType)})$expr.nullSafeEval($input)")
  }

  private def fetchSchemaVersionInfo(schemaName: String, version: Option[Int]): SchemaVersionInfo = {
    val srClient = new SchemaRegistryClient(config.asJava)
    version.map(v => srClient.getSchemaVersionInfo(new SchemaVersionKey(schemaName, v)))
      .getOrElse(srClient.getLatestSchemaVersionInfo(schemaName))
  }

} 
Example 2
Source File: HDFSCredentialProvider.scala    From drizzle-spark   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import java.io.{ByteArrayInputStream, DataInputStream}

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.mapred.Master
import org.apache.hadoop.security.Credentials

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._

private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging {
  // Token renewal interval, this value will be set in the first call,
  // if None means no token renewer specified, so cannot get token renewal interval.
  private var tokenRenewalInterval: Option[Long] = null

  override val serviceName: String = "hdfs"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    // NameNode to access, used to get tokens from different FileSystems
    nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
      val dstFs = dst.getFileSystem(hadoopConf)
      logInfo("getting token for namenode: " + dst)
      dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds)
    }

    // Get the token renewal interval if it is not set. It will only be called once.
    if (tokenRenewalInterval == null) {
      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf)
    }

    // Get the time of next renewal.
    tokenRenewalInterval.map { interval =>
      creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .map { t =>
          val identifier = new DelegationTokenIdentifier()
          identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
          identifier.getIssueDate + interval
      }.foldLeft(0L)(math.max)
    }
  }

  private def getTokenRenewalInterval(
      hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = {
    // We cannot use the tokens generated with renewer yarn. Trying to renew
    // those will fail with an access control issue. So create new tokens with the logged in
    // user as renewer.
    sparkConf.get(PRINCIPAL).map { renewer =>
      val creds = new Credentials()
      nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
        val dstFs = dst.getFileSystem(hadoopConf)
        dstFs.addDelegationTokens(renewer, creds)
      }
      val t = creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .head
      val newExpiration = t.renew(hadoopConf)
      val identifier = new DelegationTokenIdentifier()
      identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
      val interval = newExpiration - identifier.getIssueDate
      logInfo(s"Renewal Interval is $interval")
      interval
    }
  }

  private def getTokenRenewer(conf: Configuration): String = {
    val delegTokenRenewer = Master.getMasterPrincipal(conf)
    logDebug("delegation token renewer is: " + delegTokenRenewer)
    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
      logError(errorMessage)
      throw new SparkException(errorMessage)
    }

    delegTokenRenewer
  }

  private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
    sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet +
      sparkConf.get(STAGING_DIR).map(new Path(_))
        .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory)
  }
} 
Example 3
Source File: RecordReaderSuite.scala    From spark-snowflake   with Apache License 2.0 5 votes vote down vote up
package net.snowflake.spark.snowflake.io

import java.io.ByteArrayInputStream

import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.ObjectMapper
import org.scalatest.FunSuite

class RecordReaderSuite extends FunSuite {

  val mapper: ObjectMapper = new ObjectMapper()

  test("Read Json File") {
    val record1 =
      s"""
         |{
         |  "name":"abc",
         |  "age":123,
         |  "car":[
         |    {
         |      "make": "vw",
         |      "mode": "golf",
         |      "year": 2010
         |    },
         |    {
         |      "make": "Audi",
         |      "mode": "R8",
         |      "year": 2011
         |    }
         |  ]
         |}
         """.stripMargin
    val record2 =
      s"""
         |{
         |  "name":"def ghi",
         |  "age":222,
         |  "car":[
         |    {
         |      "make": "Tesla",
         |      "mode": "X",
         |      "year": 2017
         |    }
         |  ]
         |}
       """.stripMargin
    val file = record1 + record2

    val recordReader: SFRecordReader = new SFRecordReader(SupportedFormat.JSON)

    recordReader.addStream(new ByteArrayInputStream(file.getBytes))

    recordReader.addStream(new ByteArrayInputStream(file.getBytes))

    val result1 = mapper.readTree(recordReader.next())
    val json1 = mapper.readTree(record1)

    assert(json1.equals(result1))

    val result2 = mapper.readTree(recordReader.next())
    val json2 = mapper.readTree(record2)

    assert(json2.equals(result2))

    val result3 = mapper.readTree(recordReader.next())

    assert(json1.equals(result3))

    val result4 = mapper.readTree(recordReader.next())

    assert(json2.equals(result4))

    assert(!recordReader.hasNext)

  }

} 
Example 4
Source File: V26_1__Fill_create_argument.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package db.migration.postgres

import java.io.ByteArrayInputStream

import com.daml.platform.store.serialization.ValueSerializer
import db.migration.translation.ContractSerializer
import org.flywaydb.core.api.migration.{BaseJavaMigration, Context}

class V26_1__Fill_create_argument extends BaseJavaMigration {

  // left join on contracts to make sure to include divulged contracts
  private val SELECT_CONTRACT_DATA =
    """select contract_data.id, contract_data.contract
      |from contract_data
      |left join contracts
      |  on contracts.id = contract_data.id
      |  and contracts.archive_offset is null""".stripMargin

  private val UPDATE_PARTICIPANT_CONTRACTS =
    "update participant_contracts set create_argument = ?, template_id = ? where contract_id = ?"

  override def migrate(context: Context): Unit = {
    val conn = context.getConnection
    var loadContracts: java.sql.Statement = null
    var updateParticipantContracts: java.sql.PreparedStatement = null
    var rows: java.sql.ResultSet = null
    try {
      updateParticipantContracts = conn.prepareStatement(UPDATE_PARTICIPANT_CONTRACTS)
      loadContracts = conn.createStatement()
      rows = loadContracts.executeQuery(SELECT_CONTRACT_DATA)

      while (rows.next()) {
        val contractId = rows.getString("id")
        val contractBytes = rows.getBinaryStream("contract")
        val contract =
          ContractSerializer
            .deserializeContractInstance(contractBytes)
            .getOrElse(sys.error(s"failed to deserialize contract $contractId"))
        val createArgument = contract.arg
        val templateId = contract.template
        val createArgumentBytes =
          new ByteArrayInputStream(
            ValueSerializer.serializeValue(
              createArgument,
              s"failed to serialize create argument for contract $contractId"))
        updateParticipantContracts.setBinaryStream(1, createArgumentBytes)
        updateParticipantContracts.setString(2, templateId.toString)
        updateParticipantContracts.setString(3, contractId)
        updateParticipantContracts.execute()
      }
    } finally {
      if (loadContracts != null) {
        loadContracts.close()
      }
      if (updateParticipantContracts != null) {
        updateParticipantContracts.close()
      }
      if (rows != null) {
        rows.close()
      }
    }
  }

} 
Example 5
Source File: FileBasedLedgerDataExportSpec.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.export

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.time.Instant

import com.daml.ledger.participant.state.v1
import com.google.protobuf.ByteString
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{Matchers, WordSpec}

class FileBasedLedgerDataExportSpec extends WordSpec with Matchers with MockitoSugar {
  // XXX SC remove in Scala 2.13; see notes in ConfSpec
  import scala.collection.GenTraversable, org.scalatest.enablers.Containing
  private[this] implicit def `fixed sig containingNatureOfGenTraversable`[
      E: org.scalactic.Equality,
      TRAV]: Containing[TRAV with GenTraversable[E]] =
    Containing.containingNatureOfGenTraversable[E, GenTraversable]

  "addParentChild" should {
    "add entry to correlation ID mapping" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")

      instance.correlationIdMapping should contain("child" -> "parent")
    }
  }

  "addToWriteSet" should {
    "append to existing data" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")
      instance.addToWriteSet("child", Seq(keyValuePairOf("a", "b")))
      instance.addToWriteSet("child", Seq(keyValuePairOf("c", "d")))

      instance.bufferedKeyValueDataPerCorrelationId should contain(
        "parent" ->
          Seq(keyValuePairOf("a", "b"), keyValuePairOf("c", "d")))
    }
  }

  "finishedProcessing" should {
    "remove all data such as submission info, write-set and child correlation IDs" in {
      val dataOutputStream = new DataOutputStream(new ByteArrayOutputStream())
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        Instant.now(),
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      instance.inProgressSubmissions shouldBe empty
      instance.bufferedKeyValueDataPerCorrelationId shouldBe empty
      instance.correlationIdMapping shouldBe empty
    }
  }

  "serialized submission" should {
    "be readable back" in {
      val baos = new ByteArrayOutputStream()
      val dataOutputStream = new DataOutputStream(baos)
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      val expectedRecordTimeInstant = Instant.now()
      val expectedParticipantId = v1.ParticipantId.assertFromString("id")
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        expectedRecordTimeInstant,
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      val dataInputStream = new DataInputStream(new ByteArrayInputStream(baos.toByteArray))
      val (actualSubmissionInfo, actualWriteSet) = Serialization.readEntry(dataInputStream)
      actualSubmissionInfo.submissionEnvelope should be(ByteString.copyFromUtf8("an envelope"))
      actualSubmissionInfo.correlationId should be("parent")
      actualSubmissionInfo.recordTimeInstant should be(expectedRecordTimeInstant)
      actualSubmissionInfo.participantId should be(expectedParticipantId)
      actualWriteSet should be(Seq(keyValuePairOf("a", "b")))
    }
  }

  private def keyValuePairOf(key: String, value: String): (ByteString, ByteString) =
    ByteString.copyFromUtf8(key) -> ByteString.copyFromUtf8(value)
} 
Example 6
Source File: DarManifestReaderTest.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf.archive

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.Charset

import com.daml.lf.archive.DarManifestReader.DarManifestReaderException
import org.scalatest.{Inside, Matchers, WordSpec}

import scala.util.{Failure, Success}

class DarManifestReaderTest extends WordSpec with Matchers with Inside {

  private val unicode = Charset.forName("UTF-8")

  "should read dalf names from manifest, real scenario with Dalfs line split" in {
    val manifest = """Manifest-Version: 1.0
      |Created-By: Digital Asset packager (DAML-GHC)
      |Main-Dalf: com.daml.lf.archive:DarReaderTest:0.1.dalf
      |Dalfs: com.daml.lf.archive:DarReaderTest:0.1.dalf, daml-pri
      | m.dalf
      |Format: daml-lf
      |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(
      Dar("com.daml.lf.archive:DarReaderTest:0.1.dalf", List("daml-prim.dalf")))

    inputStream.close()
  }

  "should read dalf names from manifest, Main-Dalf returned in the head" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf
                     |Format: daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(Dar("A.dalf", List("B.dalf", "C.dalf", "E.dalf")))

    inputStream.close()
  }

  "should read dalf names from manifest, can handle one Dalf per manifest" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: A.dalf
                     |Format: daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    actual shouldBe Success(Dar("A.dalf", List.empty))

    inputStream.close()
  }

  "should return failure if Format is not daml-lf" in {
    val manifest = """Main-Dalf: A.dalf
                     |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf
                     |Format: anything-different-from-daml-lf
                     |Encryption: non-encrypted""".stripMargin

    val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode))
    val actual = DarManifestReader.dalfNames(inputStream)

    inside(actual) {
      case Failure(DarManifestReaderException(msg)) =>
        msg shouldBe "Unsupported format: anything-different-from-daml-lf"
    }

    inputStream.close()
  }
} 
Example 7
Source File: BytecodeUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor}
import org.apache.xbean.asm5.Opcodes._

import org.apache.spark.util.Utils


  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM5) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM5) {
          override def visitMethodInsn(
              op: Int, owner: String, name: String, desc: String, itf: Boolean) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
} 
Example 8
Source File: PortableDataStream.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.input

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import scala.collection.JavaConverters._

import com.google.common.io.{ByteStreams, Closeables}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit}


  def toArray(): Array[Byte] = {
    val stream = open()
    try {
      ByteStreams.toByteArray(stream)
    } finally {
      Closeables.close(stream, true)
    }
  }

  def getPath(): String = path
} 
Example 9
Source File: GenericAvroSerializerSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 10
Source File: SerializerPropertiesSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.util.Random

import org.scalatest.Assertions

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset


class SerializerPropertiesSuite extends SparkFunSuite {

  import SerializerPropertiesSuite._

  test("JavaSerializer does not support relocation") {
    // Per a comment on the SPARK-4550 JIRA ticket, Java serialization appears to write out the
    // full class name the first time an object is written to an output stream, but subsequent
    // references to the class write a more compact identifier; this prevents relocation.
    val ser = new JavaSerializer(new SparkConf())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer supports relocation when auto-reset is enabled") {
    val ser = new KryoSerializer(new SparkConf)
    assert(ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer does not support relocation when auto-reset is disabled") {
    val conf = new SparkConf().set("spark.kryo.registrator",
      classOf[RegistratorWithoutAutoReset].getName)
    val ser = new KryoSerializer(conf)
    assert(!ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

}

object SerializerPropertiesSuite extends Assertions {

  def generateRandomItem(rand: Random): Any = {
    val randomFunctions: Seq[() => Any] = Seq(
      () => rand.nextInt(),
      () => rand.nextString(rand.nextInt(10)),
      () => rand.nextDouble(),
      () => rand.nextBoolean(),
      () => (rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => {
        val x = MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10)))
        (x, x)
      }
    )
    randomFunctions(rand.nextInt(randomFunctions.size)).apply()
  }

  def testSupportsRelocationOfSerializedObjects(
      serializer: Serializer,
      generateRandomItem: Random => Any): Unit = {
    if (!serializer.supportsRelocationOfSerializedObjects) {
      return
    }
    val NUM_TRIALS = 5
    val rand = new Random(42)
    for (_ <- 1 to NUM_TRIALS) {
      val items = {
        // Make sure that we have duplicate occurrences of the same object in the stream:
        val randomItems = Seq.fill(10)(generateRandomItem(rand))
        randomItems ++ randomItems.take(5)
      }
      val baos = new ByteArrayOutputStream()
      val serStream = serializer.newInstance().serializeStream(baos)
      def serializeItem(item: Any): Array[Byte] = {
        val itemStartOffset = baos.toByteArray.length
        serStream.writeObject(item)
        serStream.flush()
        val itemEndOffset = baos.toByteArray.length
        baos.toByteArray.slice(itemStartOffset, itemEndOffset).clone()
      }
      val itemsAndSerializedItems: Seq[(Any, Array[Byte])] = {
        val serItems = items.map {
          item => (item, serializeItem(item))
        }
        serStream.close()
        rand.shuffle(serItems)
      }
      val reorderedSerializedData: Array[Byte] = itemsAndSerializedItems.flatMap(_._2).toArray
      val deserializedItemsStream = serializer.newInstance().deserializeStream(
        new ByteArrayInputStream(reorderedSerializedData))
      assert(deserializedItemsStream.asIterator.toSeq === itemsAndSerializedItems.map(_._1))
      deserializedItemsStream.close()
    }
  }
}

private case class MyCaseClass(foo: Int, bar: String) 
Example 11
Source File: ECIESCoder.scala    From mantis   with Apache License 2.0 5 votes vote down vote up
package io.iohk.ethereum.crypto

import java.io.{ByteArrayInputStream, IOException}
import java.math.BigInteger
import java.security.SecureRandom

import org.spongycastle.crypto.digests.{SHA1Digest, SHA256Digest}
import org.spongycastle.crypto.engines.AESEngine
import org.spongycastle.crypto.generators.ECKeyPairGenerator
import org.spongycastle.crypto.macs.HMac
import org.spongycastle.crypto.modes.SICBlockCipher
import org.spongycastle.crypto.params._
import org.spongycastle.crypto.parsers.ECIESPublicKeyParser
import org.spongycastle.crypto.{BufferedBlockCipher, InvalidCipherTextException}
import org.spongycastle.math.ec.ECPoint

object ECIESCoder {

  val KeySize = 128
  val PublicKeyOverheadSize = 65
  val MacOverheadSize = 32
  val OverheadSize = PublicKeyOverheadSize + KeySize / 8 + MacOverheadSize

  @throws[IOException]
  @throws[InvalidCipherTextException]
  def decrypt(privKey: BigInteger, cipher: Array[Byte], macData: Option[Array[Byte]] = None): Array[Byte] = {
    val is = new ByteArrayInputStream(cipher)
    val ephemBytes = new Array[Byte](2 * ((curve.getCurve.getFieldSize + 7) / 8) + 1)
    is.read(ephemBytes)
    val ephem = curve.getCurve.decodePoint(ephemBytes)
    val IV = new Array[Byte](KeySize / 8)
    is.read(IV)
    val cipherBody = new Array[Byte](is.available)
    is.read(cipherBody)
    decrypt(ephem, privKey, Some(IV), cipherBody, macData)
  }

  @throws[InvalidCipherTextException]
  def decrypt(ephem: ECPoint, prv: BigInteger, IV: Option[Array[Byte]], cipher: Array[Byte], macData: Option[Array[Byte]]): Array[Byte] = {
    val aesEngine = new AESEngine

    val iesEngine = new EthereumIESEngine(
      kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)),
      mac = new HMac(new SHA256Digest),
      hash = new SHA256Digest,
      cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))),
      IV = IV,
      prvSrc = Left(new ECPrivateKeyParameters(prv, curve)),
      pubSrc = Left(new ECPublicKeyParameters(ephem, curve)))


    iesEngine.processBlock(cipher, 0, cipher.length, forEncryption = false, macData)
  }

  
  @throws[IOException]
  @throws[InvalidCipherTextException]
  def encryptSimple(pub: ECPoint, secureRandom: SecureRandom, plaintext: Array[Byte]): Array[Byte] = {

    val eGen = new ECKeyPairGenerator
    val gParam = new ECKeyGenerationParameters(curve, secureRandom)
    eGen.init(gParam)

    val iesEngine = new EthereumIESEngine(
      kdf = Right(new MGF1BytesGeneratorExt(new SHA1Digest)),
      mac = new HMac(new SHA1Digest),
      hash = new SHA1Digest,
      cipher = None,
      IV = Some(new Array[Byte](0)),
      prvSrc = Right(eGen),
      pubSrc = Left(new ECPublicKeyParameters(pub, curve)),
      hashMacKey = false)

    iesEngine.processBlock(plaintext, 0, plaintext.length, forEncryption = true)
  }

  private def makeIESEngine(pub: ECPoint, prv: BigInteger, IV: Option[Array[Byte]]) = {
    val aesEngine = new AESEngine

    val iesEngine = new EthereumIESEngine(
      kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)),
      mac = new HMac(new SHA256Digest),
      hash = new SHA256Digest,
      cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))),
      IV = IV,
      prvSrc = Left(new ECPrivateKeyParameters(prv, curve)),
      pubSrc = Left(new ECPublicKeyParameters(pub, curve)))

    iesEngine
  }

} 
Example 12
Source File: HdfsBlockStream.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.io

import java.io.{ByteArrayInputStream, InputStream}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.archive.archivespark.sparkling.logging.LogContext
import org.archive.archivespark.sparkling.util.Common

import scala.util.Try

class HdfsBlockStream (fs: FileSystem, file: String, offset: Long = 0, length: Long = -1, retries: Int = 60, sleepMillis: Int = 1000 * 60) extends InputStream {
  implicit val logContext: LogContext = LogContext(this)

  val path = new Path(file)
  val (blockSize: Int, fileSize: Long) = {
    val status = fs.getFileStatus(path)
    (status.getBlockSize.min(Int.MaxValue).toInt, status.getLen)
  }

  private var pos: Long = offset.max(0)
  private val max: Long = if (length > 0) fileSize.min(pos + length) else fileSize

  private val buffer = new Array[Byte](blockSize)
  private val emptyBlock = new ByteArrayInputStream(Array.emptyByteArray)
  private var block: ByteArrayInputStream = emptyBlock

  def ensureNextBlock(): InputStream = {
    if (block.available() == 0 && pos < max) {
      val end = pos + blockSize
      val blockLength = ((end - (end % blockSize)).min(max) - pos).toInt
      Common.retry(retries, sleepMillis, (retry, e) => {
        "File access failed (" + retry + "/" + retries + "): " + path + " (Offset: " + pos + ") - " + e.getMessage
      }) { retry =>
        val in = fs.open(path, blockLength)
        if (retry > 0) Try(in.seekToNewSource(pos))
        else if (pos > 0) in.seek(pos)
        var read = 0
        while (read < blockLength) read += in.read(buffer, read, blockLength - read)
        Try(in.close())
      }
      pos += blockLength
      block = new ByteArrayInputStream(buffer, 0, blockLength)
    }
    block
  }

  override def read(): Int = ensureNextBlock().read()

  override def read(b: Array[Byte]): Int = ensureNextBlock().read(b)

  override def read(b: Array[Byte], off: Int, len: Int): Int = ensureNextBlock().read(b, off, len)

  override def skip(n: Long): Long = {
    val available = block.available()
    if (n <= available) block.skip(n)
    else {
      block = emptyBlock
      val currentPos = pos - available
      val skip = n.min(max - currentPos)
      pos += skip - available
      skip
    }
  }

  override def available(): Int = block.available()

  override def close(): Unit = {}
  override def markSupported(): Boolean = false
} 
Example 13
Source File: PLYReadWriteTests.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter}
import java.nio.ByteOrder
import java.util.Scanner

import scalismo.faces.FacesTestSuite
import scalismo.faces.io.ply._

class PLYReadWriteTests extends FacesTestSuite {

  describe("Write-read cycles to string, big- and little endian") {

    def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val writer = new SequenceWriter[A]
      writer.write(toWrite, os, bo)

      val ba = os.toByteArray

      val is = new ByteArrayInputStream(ba)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, is, bo)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val osw = new OutputStreamWriter(os)
      val writer = new SequenceWriter[A]
      writer.write(toWrite, osw)
      osw.flush()

      val is = new ByteArrayInputStream(os.toByteArray)
      val isr = new Scanner(is)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, isr)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      testRWStringCycle(toWrite)
      testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN)
      testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN)
    }

    it("should result in the same sequence of bytes") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of char") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of short") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of int") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of long") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of float") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of double") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255)
      testAllThreeCycles(toWrite)
    }

  }

} 
Example 14
Source File: package.scala    From spark-alchemy   with Apache License 2.0 5 votes vote down vote up
package com.swoop.alchemy.spark.expressions

import java.io.{ByteArrayInputStream, DataInputStream}

import com.clearspring.analytics.stream
import com.clearspring.analytics.stream.cardinality.RegisterSet
import com.clearspring.analytics.util.{Bits, Varint}
import net.agkn.hll.HLL
import net.agkn.hll.serialization.{HLLMetadata, SchemaVersionOne}
import net.agkn.hll.util.BitVector

package object hll {
  val IMPLEMENTATION_CONFIG_KEY = "com.swoop.alchemy.hll.implementation"

  def strmToAgkn(from: stream.cardinality.HyperLogLogPlus): net.agkn.hll.HLL = {
    HLL.fromBytes(strmToAgkn(from.getBytes))
  }

  def strmToAgkn(from: Array[Byte]): Array[Byte] = {
    var bais = new ByteArrayInputStream(from)
    var oi = new DataInputStream(bais)
    val version = oi.readInt
    // the new encoding scheme includes a version field
    // that is always negative.
    if (version >= 0) {
      throw new UnsupportedOperationException("conversion is only supported for the new style encoding scheme")
    }

    val p = Varint.readUnsignedVarInt(oi)
    val sp = Varint.readUnsignedVarInt(oi)
    val formatType = Varint.readUnsignedVarInt(oi)
    if (formatType != 0) {
      throw new UnsupportedOperationException("conversion is only supported for non-sparse representation")
    }

    val size = Varint.readUnsignedVarInt(oi)
    val longArrayBytes = new Array[Byte](size)
    oi.readFully(longArrayBytes)
    val registerSet = new RegisterSet(Math.pow(2, p).toInt, Bits.getBits(longArrayBytes))
    val bitVector = new BitVector(RegisterSet.REGISTER_SIZE, registerSet.count)

    for (i <- 0 until registerSet.count) bitVector.setRegister(i, registerSet.get(i))
    val schemaVersion = new SchemaVersionOne
    val serializer =
      schemaVersion.getSerializer(net.agkn.hll.HLLType.FULL, RegisterSet.REGISTER_SIZE, registerSet.count)
    bitVector.getRegisterContents(serializer)
    var outBytes = serializer.getBytes

    val metadata = new HLLMetadata(
      schemaVersion.schemaVersionNumber(),
      net.agkn.hll.HLLType.FULL,
      p,
      RegisterSet.REGISTER_SIZE,
      0,
      true,
      false,
      false
    )
    schemaVersion.writeMetadata(outBytes, metadata)
    outBytes
  }
} 
Example 15
Source File: DatasetFunctionsSpec.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package daf.dataset

import java.io.ByteArrayInputStream

import akka.stream.ActorMaterializer
import akka.stream.scaladsl.StreamConverters
import controllers.modules.TestAbstractModule
import daf.filesystem.MergeStrategy
import daf.instances.{ AkkaInstance, ConfigurationInstance }
import org.scalatest.{ BeforeAndAfterAll, MustMatchers, WordSpecLike }

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.util.Random

class DatasetFunctionsSpec extends TestAbstractModule
  with WordSpecLike
  with MustMatchers
  with BeforeAndAfterAll
  with ConfigurationInstance
  with AkkaInstance {

  implicit lazy val executionContext = actorSystem.dispatchers.lookup("akka.actor.test-dispatcher")

  protected implicit lazy val materializer = ActorMaterializer.create { actorSystem }

  override def beforeAll() = {
    startAkka()
  }

  def data = (1 to 5) .map { i =>
    Random.alphanumeric.grouped(20).take(5).map { s => s"$i - ${s.mkString}" }.toStream :+ defaultSeparator
  }

  def stream = MergeStrategy.coalesced {
    data.map { iter =>
      new ByteArrayInputStream(
        iter.mkString(defaultSeparator).getBytes("UTF-8")
      )
    }
  }

  def source = StreamConverters.fromInputStream(() => stream, 5)

  "Source manipulation" must {

    "convert to a string source" in {
      Await.result(
        wrapDefault { asStringSource(source) }.runFold("") { _ + _ },
        5.seconds
      ).split(defaultSeparator).length must be { 25 }
    }

    "convert to a json source" in {
      Await.result(
        wrapJson { asStringSource(source) }.runFold("") { _ + _ },
        5.seconds
      ).split(jsonSeparator).length must be { 25 }
    }

  }

} 
Example 16
Source File: JavaSerializationConverter.scala    From scala-serialization   with MIT License 5 votes vote down vote up
package com.komanov.serialization.converters

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.komanov.serialization.converters.IoUtils.using
import com.komanov.serialization.domain.{Site, SiteEvent, SiteEventData}

object JavaSerializationConverter extends MyConverter {

  override def toByteArray(site: Site): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(site)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def fromByteArray(bytes: Array[Byte]): Site = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[Site]
      }
    }
  }

  override def toByteArray(event: SiteEvent): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(event)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def siteEventFromByteArray(clazz: Class[_], bytes: Array[Byte]): SiteEvent = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[SiteEvent]
      }
    }
  }

} 
Example 17
Source File: BMLHelper.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.filesystem.bml

import java.io.{ByteArrayInputStream, InputStream}
import java.util
import java.util.UUID

import com.webank.wedatasphere.linkis.bml.client.{BmlClient, BmlClientFactory}
import com.webank.wedatasphere.linkis.bml.protocol.{BmlDownloadResponse, BmlUpdateResponse, BmlUploadResponse}
import com.webank.wedatasphere.linkis.filesystem.exception.WorkspaceExceptionManager
import org.springframework.stereotype.Component

import scala.collection.JavaConversions._


@Component
class BMLHelper {

  def upload(userName: String, content: String, fileName: String): util.Map[String, Object] = {
    val inputStream = new ByteArrayInputStream(content.getBytes("utf-8"))
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def upload(userName: String, inputStream: InputStream, fileName: String, projectName: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }


  def upload(userName: String, inputStream: InputStream, fileName: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def update(userName: String, resourceId: String, inputStream: InputStream): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, "", inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def update(userName: String, resourceId: String, content: String): util.Map[String, Object] = {
    val inputStream = new ByteArrayInputStream(content.getBytes("utf-8"))
    val client: BmlClient = createBMLClient(userName)
    val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, UUID.randomUUID().toString + ".json", inputStream)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022)
    val map = new util.HashMap[String, Object]
    map += "resourceId" -> resource.resourceId
    map += "version" -> resource.version
  }

  def query(userName: String, resourceId: String, version: String): util.Map[String, Object] = {
    val client: BmlClient = createBMLClient(userName)
    var resource: BmlDownloadResponse = null
    if (version == null) resource = client.downloadResource(userName, resourceId, null)
    else resource = client.downloadResource(userName, resourceId, version)
    if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80023)
    val map = new util.HashMap[String, Object]
    map += "path" -> resource.fullFilePath
    map += "stream" -> resource.inputStream
  }

  private def inputstremToString(inputStream: InputStream): String = scala.io.Source.fromInputStream(inputStream).mkString

  private def createBMLClient(userName: String): BmlClient = if (userName == null)
    BmlClientFactory.createBmlClient()
  else
    BmlClientFactory.createBmlClient(userName)
} 
Example 18
Source File: StorageScriptFsWriter.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.storage.script.writer

import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream}
import java.util

import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record}
import com.webank.wedatasphere.linkis.storage.LineRecord
import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData}
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}
import org.apache.commons.io.IOUtils


class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter {

  private val stringBuilder = new StringBuilder

  @scala.throws[IOException]
  override def addMetaData(metaData: MetaData): Unit = {
    val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath)))
    val metadataLine = new util.ArrayList[String]()
    if (compactions.length > 0) {
      metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add)
      if (outputStream != null) {
        IOUtils.writeLines(metadataLine, "\n", outputStream, charset)
      } else {
        import scala.collection.JavaConversions._
        metadataLine.foreach(m => stringBuilder.append(s"$m\n"))
      }
    }
  }

  @scala.throws[IOException]
  override def addRecord(record: Record): Unit = {
    //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中
    val scriptRecord = record.asInstanceOf[LineRecord]
    if (outputStream != null) {
      IOUtils.write(scriptRecord.getLine, outputStream, charset)
    } else {
      stringBuilder.append(scriptRecord.getLine)
    }
  }

  override def close(): Unit = {
    IOUtils.closeQuietly(outputStream)
  }

  override def flush(): Unit = if (outputStream != null) outputStream.flush()

  def getInputStream(): InputStream = {
    new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue))
  }

} 
Example 19
Source File: StorageResultSetReader.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.storage.resultset

import java.io.{ByteArrayInputStream, IOException, InputStream}

import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader}
import com.webank.wedatasphere.linkis.common.io.{MetaData, Record}
import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.storage.domain.Dolphin
import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException
import com.webank.wedatasphere.linkis.storage.utils.StorageUtils

import scala.collection.mutable.ArrayBuffer



  def readLine(): Array[Byte] = {

    var rowLen = 0
    try rowLen = Dolphin.readInt(inputStream)
    catch {
      case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null
      case t: Throwable => throw t
    }

    val rowBuffer = ArrayBuffer[Byte]()
    var len = 0

    //Read the entire line, except for the data of the line length(读取整行,除了行长的数据)
    while (rowLen > 0 && len >= 0) {
      if (rowLen > READ_CACHE)
        len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE)
      else
        len = StorageUtils.readBytes(inputStream,bytes, rowLen)

      if (len > 0) {
        rowLen -= len
        rowBuffer ++= bytes.slice(0, len)
      }
    }
    rowCount = rowCount + 1
    rowBuffer.toArray
  }

  @scala.throws[IOException]
  override def getRecord: Record = {
    if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)")
    if (row ==  null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值,也许IO流已读取完毕或已被关闭!)")
    row
  }

  @scala.throws[IOException]
  override def getMetaData: MetaData = {
    if(metaData == null) init()
    metaData = deserializer.createMetaData(readLine())
    metaData
  }

  @scala.throws[IOException]
  override def skip(recordNum: Int): Int = {
    if(recordNum < 0 ) return -1

    if(metaData == null) getMetaData
    for(i <- recordNum until (0, -1)){
      try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1}
    }
    recordNum
  }

  @scala.throws[IOException]
  override def getPosition: Long = rowCount

  @scala.throws[IOException]
  override def hasNext: Boolean = {
    if(metaData == null) getMetaData
    val line = readLine()
    if(line == null) return  false
    row = deserializer.createRecord(line)
    if(row == null) return  false
    true
  }

  @scala.throws[IOException]
  override def available: Long = inputStream.available()

  override def close(): Unit = inputStream.close()
} 
Example 20
Source File: StreamingSpec.scala    From seals   with Apache License 2.0 5 votes vote down vote up
package com.example.streaming

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import shapeless.record._

import cats.effect.IO

import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpec

import fs2.Stream

import scodec.Codec
import scodec.bits.BitVector
import scodec.stream.CodecError

import dev.tauri.seals._
import dev.tauri.seals.scodec.Codecs._
import dev.tauri.seals.scodec.StreamCodecs._

class StreamingSpec extends AnyFlatSpec with Matchers {

  import Main.{ Animal, Elephant, Quokka, Quagga, Grey }

  val animals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f),
    Quokka("Nellie"),
    Quagga("Ford", speed = 120.0)
  )

  val transformedAnimals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f + 17.0f),
    Quokka("Nellie", Grey)
  )

  val animalStream = Stream.emits[IO, Animal](animals)

  val encoder = streamEncoderFromReified[Animal]
  val decoder = streamDecoderFromReified[Animal]

  "Encoding/decoding" should "work correctly" in {
    val tsk: IO[Unit] = for {
      bv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (animals)
    }
    tsk.unsafeRunSync()
  }

  it should "fail with incompatible models" in {
    val mod = Reified[Record.`'Elephant -> Elephant, 'Quokka -> Quokka`.T].model
    val bv: BitVector = Codec[Model].encode(mod).getOrElse(fail)
    val tsk: IO[Unit] = for {
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (Vector.empty)
    }

    val ex = intercept[CodecError] {
      tsk.unsafeRunSync()
    }
    ex.err.message should include ("incompatible models")
  }

  "Transformation" should "work correctly" in {
    val tsk: IO[Unit] = for {
      ibv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      is = new ByteArrayInputStream(ibv.toByteArray)
      os = new ByteArrayOutputStream
      _ <- Main.transform(is, os)(Main.transformer)
      obv = BitVector(os.toByteArray())
      transformed <- decoder.decode[IO](Stream(obv)).compile.fold(Vector.empty[Animal])(_ :+ _)
    } yield {
      transformed should === (transformedAnimals)
    }
    tsk.unsafeRunSync()
  }
} 
Example 21
Source File: CsvSourceTypeConversionTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.csv

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import io.eels.schema._
import org.scalatest.{Ignore, Matchers, WordSpec}

@Ignore
class CsvSourceTypeConversionTest extends WordSpec with Matchers {
  "CsvSource" should {
    "read schema" in {
      val exampleCsvString =
        """A,B,C,D
          |1,2.2,3,foo
          |4,5.5,6,bar
        """.stripMargin

      val stream = new ByteArrayInputStream(exampleCsvString.getBytes(StandardCharsets.UTF_8))
      val schema = new StructType(Vector(
        Field("A", IntType.Signed),
        Field("B", DoubleType),
        Field("C", IntType.Signed),
        Field("D", StringType)
      ))
      val source = new CsvSource(() => stream)
        .withSchema(schema)
      
      source.schema.fields.foreach(println)
      val ds = source.toDataStream()
      val firstRow = ds.iterator.toIterable.head
      val firstRowA = firstRow.get("A")
      println(firstRowA) // prints 1 as expected
      println(firstRowA.getClass.getTypeName) // prints java.lang.String
      assert(firstRowA == 1) // this assertion will fail because firstRowA is not an Int
    }
  }
} 
Example 22
Source File: get_features_from_peinfo.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import Array.concat
import org.apache.spark.sql.types._
import org.apache.spark.ml.linalg.SQLDataTypes.VectorType 
import org.apache.spark.ml.linalg._
import org.apache.spark.sql.Row
import org.apache.spark.ml.feature.MinMaxScaler
import org.apache.spark.ml.linalg.DenseVector
import PreProcessingConfig._

case class peinfo_results_by_service_name_class(service_name: String, sha256: String)
case class peinfo_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class peinfo_join_results_class(sha256: String, service_name: String, results: String)
case class peinfo_int_final_array_rdd_class(sha256: String, array_results: Array[Double])
case class peinfo_binaray_final_array_rdd_class(sha256:String, array_results :Array[Double])
case class peinfo_final_array_rdd_class(sha256:String, array_results: Array[Double])

def unzip(x: Array[Byte]) : String = {      
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def findAllIntinpeinfo( peinfo_json_results : JsLookupResult, time: Double): Array[Double]= {
    val entropy = peinfo_json_results \\ "entropy" ; val virt_address = peinfo_json_results \\ "virt_address"; val virt_size = peinfo_json_results \\ "virt_size"; val size = peinfo_json_results \\ "size";
    var i= 0; var List  = Array.iterate(0.0,17)(a=>a*0)
    for (k <- ( peinfo_json_results \\ "section_name")){
        k.as[String] match {
            case ".text\u0000\u0000\u0000" => { List(0)=entropy(i).as[Double]; List(1)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(2)=virt_size(i).as[Double]; List(3)=size(i).as[Double] }
            case ".data\u0000\u0000\u0000" => { List(4)=entropy(i).as[Double]; List(5)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(6)=virt_size(i).as[Double]; List(7)=size(i).as[Double] }
            case ".rsrc\u0000\u0000\u0000" => { List(8)=entropy(i).as[Double]; List(9)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(10)=virt_size(i).as[Double]; List(11)=size(i).as[Double] }
            case ".rdata\u0000\u0000" => { List(12)=entropy(i).as[Double]; List(13)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(14)=virt_size(i).as[Double]; List(15)=size(i).as[Double] }
            case other => {}
        }
        i = i + 1
    }
    List(16)= time
    return List.toArray
}

val peinfo_results_by_service_name_meta = sc.cassandraTable[peinfo_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","peinfo")
val peinfo_results_by_service_name_rdd = peinfo_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val peinfo_results_by_sha256_meta = sc.cassandraTable[peinfo_results_by_sha256_class](keyspace,sha256_table)
val peinfo_results_by_sha256_rdd = peinfo_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val peinfo_join_results = peinfo_results_by_service_name_rdd.join(peinfo_results_by_sha256_rdd).map(x=> (new peinfo_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()

val peinfo_int_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "pe_sections"),{if ((Json.parse(x.results) \ "timestamp").isInstanceOf[JsUndefined]) 0.0 else (Json.parse(x.results) \ "timestamp" \\ "timestamp")(0).as[Double]})).filter(x=> !x._2.isInstanceOf[JsUndefined]).map(x=>new  peinfo_int_final_array_rdd_class(x._1,findAllIntinpeinfo(x._2,x._3)))

val peinfo_dllfunction_list= peinfo_join_results.map(x=>Json.parse(x.results) \ "imports").filter(x=> !x.isInstanceOf[JsUndefined]).flatMap(x=>x.as[List[Map[String, String]]].map(x=>(x("dll")+"."+x("function")))).toDF("func_name").groupBy("func_name").count.sort(desc("count")).filter("count > 10000").rdd.map(r => r.getString(0)).collect().toList
implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllBininpeinfo_dllfunction(peinfo_dllfunction : Seq[String]) : Array[Double] ={
    val forlist = for (family <- peinfo_dllfunction_list) yield {
        (peinfo_dllfunction.contains(family):Int).toDouble
    }
    return (forlist).toArray
}
val List502 = Array.iterate(0.0,502)(a=>0.0)
val peinfo_binaray_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "imports"))).map(x=>new  peinfo_binaray_final_array_rdd_class(x._1,{if (x._2.isInstanceOf[JsUndefined]) List502 else findAllBininpeinfo_dllfunction(x._2.as[Seq[Map[String, String]]].map(x=>(x("dll")+"."+x("function"))))}))

val peinfo_int_final_array_rdd_before_join = peinfo_int_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_binaray_final_array_rdd_before_join = peinfo_binaray_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_array_rdd_by_join = peinfo_int_final_array_rdd_before_join.join(peinfo_binaray_final_array_rdd_before_join).map(x=> (x._1,concat(x._2._1,x._2._2)))
val peinfo_final_array_rdd = peinfo_array_rdd_by_join.map(x=>new peinfo_final_array_rdd_class(x._1,x._2))

val peinfo_schema = new StructType().add("sha256", StringType).add("peinfo",VectorType)
val peinfo_vector_rdd = peinfo_final_array_rdd.map(x=>(x.sha256,Vectors.dense(x.array_results)))
val peinfo_vector_rowrdd = peinfo_vector_rdd.map(p => Row(p._1,p._2))
val peinfo_vector_dataframe = spark.createDataFrame(peinfo_vector_rowrdd, peinfo_schema)
val peinfo_scaler = new MinMaxScaler()
  .setInputCol("peinfo")
  .setOutputCol("scaled_peinfo")
val peinfo_scalerModel = peinfo_scaler.fit(peinfo_vector_dataframe)
val peinfo_scaledData_df = peinfo_scalerModel.transform(peinfo_vector_dataframe)
val peinfo_scaledData_rdd = peinfo_scaledData_df.select("sha256","scaled_peinfo").rdd.map(row=>(row.getAs[String]("sha256"),row.getAs[DenseVector]("scaled_peinfo"))).map(x=>new peinfo_final_array_rdd_class(x._1,x._2.toArray))
peinfo_scaledData_rdd.toDF().write.format("parquet").save(peinfo_final_array_file) 
Example 23
Source File: get_features_from_objdump.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class objdump_results_by_service_name_class(service_name: String, sha256: String)
case class objdump_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class objdump_join_results_class(sha256: String, service_name: String, results: String)
case class objdump_binaray_final_array_rdd_class(sha256: String, array_results: Array[Double])
 
val objdump_main_list = sc.textFile(objdump_x86Opcodes_file).collect.toList
def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def combineAllObjdumpInOne( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] ={
    if (malwarelist(0).toString() == "null") return List("null")
    var begin = malwarelist(0).as[List[String]]
    for (i <- 1 to (malwarelist.size-1)){
        if (malwarelist(i).toString() == "null") begin = begin
        else begin = begin ::: malwarelist(i).as[List[String]]
    }
    return  begin
}
def convertToList( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] = {
    if (malwarelist(0).toString() == "null") return List("null")
    else {
        return malwarelist(0).as[List[String]]
    } 
    
}
def findAllBininobjdump_main_list(malware :List[String]) : Array[Double] ={
    if (malware == List("null")) return (List.fill(10000)(0.0)).toArray
    else {
        val forlist = for ( one  <- malware ) yield {
            objdump_main_list.indexOf(one) + 1.0
        }
        if (forlist.size < 10000){
            return  (List.concat(forlist,List.fill(10000-forlist.size)(0.0))).toArray
        }
        else return forlist.toArray
    }
}

val objdump_results_by_service_name_meta = sc.cassandraTable[objdump_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","objdump")
val objdump_results_by_service_name_rdd = objdump_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val objdump_results_by_sha256_meta = sc.cassandraTable[objdump_results_by_sha256_class](keyspace,sha256_table)
val objdump_results_by_sha256_rdd = objdump_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val objdump_join_results = objdump_results_by_service_name_rdd.join(objdump_results_by_sha256_rdd).map(x=> (new objdump_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct()
val objdump_binaray_final_array_rdd = objdump_join_results.map(x=>(x.sha256,(Json.parse(x.results) \\ "opcodes"))).filter(x=> (x._2.size > 0)).map(x=>(x._1,if ( x._2.size == 1 ) convertToList(x._2) else combineAllObjdumpInOne(x._2))).map(x=>(x._1,findAllBininobjdump_main_list(x._2)))
objdump_binaray_final_array_rdd.toDF().write.format("parquet").save(objdump_binaray_final_array_file) 
Example 24
Source File: get_VT_signatures.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
import com.datastax.spark.connector._
import play.api.libs.json.Json
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class VT_results_by_service_name_class(service_name: String, sha256: String)
case class VT_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte] )
case class VT_join_results_class(sha256: String, service_name: String, results: String)
case class VT_sample_signatures_initial_seq_rdd_class(sha256: String, seq_results: Seq[String])
case class VT_sample_signatures_final_array_rdd_class(sha256:String, array_results:Array[Double])

def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def deleteNumberInSampleSignatures(x: String): Boolean = {
    val regex = "[0-9]".r
    return regex.findFirstIn(x).isEmpty
}

val VT_results_by_service_name_meta = sc.cassandraTable[VT_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","virustotal")
val VT_results_by_service_name_rdd = VT_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val VT_results_by_sha256_meta = sc.cassandraTable[VT_results_by_sha256_class](keyspace,sha256_table)
val VT_results_by_sha256_rdd = VT_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val VT_join_results = VT_results_by_service_name_rdd.join(VT_results_by_sha256_rdd).map(x => (new VT_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()
val sample_signatures_rdd = VT_join_results.flatMap(x=>Json.parse(x.results) \ "scans" \\ "result").map(x=>Json.stringify(x)).filter( x=> !(x == "null"))
val sample_signatures_split_rdd = sample_signatures_rdd.flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())
val signatures_prefix_rdd = sc.textFile(VT_signatures_prefix_suffix_file).map(x=>x.toLowerCase())
val family_signatures_subtract_rdd = sample_signatures_split_rdd.subtract(signatures_prefix_rdd)
val family_signatures_sorted_rdd = sc.parallelize(family_signatures_subtract_rdd.countByValue().toSeq).filter(x=>(x._2>50)).sortBy(x=>x._2,false)
val family_signatures_list = family_signatures_sorted_rdd.keys.collect().toList
val VT_sample_signatures_rdd = VT_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "scans" \\ "result").map(_.toString).filter( s => !(s== "null")).flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())))
val  VT_sample_signatures_initial_seq_rdd = VT_sample_signatures_rdd.map(x=>new VT_sample_signatures_initial_seq_rdd_class(x._1, x._2))

implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllInFamilySignatures(sample_signatures_seq : Seq[String]) : Array[Double] ={
    val forlist = for (family <- family_signatures_list) yield {
        (sample_signatures_seq.contains(family):Int).toDouble
    }
    return forlist.toArray
}

val VT_sample_signatures_final_array_rdd = VT_sample_signatures_initial_seq_rdd.map(x=>new VT_sample_signatures_final_array_rdd_class(x.sha256,findAllInFamilySignatures(x.seq_results)))
VT_sample_signatures_final_array_rdd.toDF().write.format("parquet").save(VT_sample_signatures_final_array_file) 
Example 25
Source File: HelperMethods.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
package com.holmesprocessing.analytics.relationship.knowledgeBase

import play.api.libs.json.Json
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream


  def score(ruleset_1: String, ruleset_2:String) : Double = {

    val split_1 = ruleset_1.split(",").toSeq
    val split_2 = ruleset_2.split(",").toSeq
    if (split_1.length > 0 && split_2.length > 0) {
      return split_1.intersect(split_2).length.toDouble/split_1.union(split_2).distinct.length.toDouble
    } else {
      return 0
    }
  }

} 
Example 26
Source File: JsonSerializer.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.serializers

import java.io.ByteArrayInputStream
import java.util

import com.fasterxml.jackson.databind.{
  JsonNode,
  ObjectMapper,
  SerializationFeature
}
import hydra.common.config.ConfigSupport
import org.apache.kafka.common.serialization._


class JsonSerializer extends Serializer[JsonNode] with ConfigSupport {

  import JsonSerializer._

  override def serialize(topic: String, data: JsonNode): Array[Byte] = {
    mapper.writeValueAsBytes(data)
  }

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
    val indent = Option(configs.get("kafka.encoders.json.indent.output"))
      .map(_.toString.toBoolean)
      .getOrElse(false)
    mapper.configure(SerializationFeature.INDENT_OUTPUT, indent)
  }

  override def close(): Unit = {
    //nothing to do
  }
}

object JsonSerializer {
  val mapper = new ObjectMapper
}

class JsonDeserializer extends Deserializer[JsonNode] {

  import JsonDeserializer._

  override def deserialize(topic: String, bytes: Array[Byte]): JsonNode = {
    mapper.readTree(new ByteArrayInputStream(bytes))
  }

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
    val indent = Option(configs.get("kafka.encoders.json.indent.output"))
      .map(_.toString.toBoolean)
      .getOrElse(false)
    mapper.configure(SerializationFeature.INDENT_OUTPUT, indent)
  }

  override def close(): Unit = {
    //nothing
  }
}

object JsonDeserializer {
  val mapper = new ObjectMapper
} 
Example 27
Source File: KryoInitSpec.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.core.akka

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.romix.scala.serialization.kryo.{
  EnumerationSerializer,
  ScalaImmutableAbstractMapSerializer,
  ScalaMutableMapSerializer
}
import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpecLike

class KryoInitSpec extends Matchers with AnyFlatSpecLike {

  "The custom KryoInit" should "register serializers" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    kryo.getDefaultSerializer(classOf[scala.Enumeration#Value]) shouldBe an[
      EnumerationSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.mutable.HashMap[_, _]]) shouldBe a[
      ScalaMutableMapSerializer
    ]
  }

  it should "serialize immutable maps" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    val map1 = Map(
      "Rome" -> "Italy",
      "London" -> "England",
      "Paris" -> "France",
      "New York" -> "USA",
      "Tokyo" -> "Japan",
      "Peking" -> "China",
      "Brussels" -> "Belgium"
    )
    val map2 = map1 + ("Moscow" -> "Russia")
    val map3 = map2 + ("Berlin" -> "Germany")
    val map4 = map3 + ("Germany" -> "Berlin", "Russia" -> "Moscow")
    roundTrip(map1, kryo)
    roundTrip(map2, kryo)
    roundTrip(map3, kryo)
    roundTrip(map4, kryo)
  }

  def roundTrip[T](obj: T, kryo: Kryo): T = {
    val outStream = new ByteArrayOutputStream()
    val output = new Output(outStream, 4096)
    kryo.writeClassAndObject(output, obj)
    output.flush()

    val input = new Input(new ByteArrayInputStream(outStream.toByteArray), 4096)
    val obj1 = kryo.readClassAndObject(input)

    assert(obj == obj1)

    obj1.asInstanceOf[T]
  }

} 
Example 28
Source File: BytecodeUtils.scala    From graphx-algorithm   with GNU General Public License v2.0 5 votes vote down vote up
package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.spark.util.Utils

import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._



  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM4) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM4) {
          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
} 
Example 29
Source File: FakeMavenRepository.scala    From exodus   with MIT License 5 votes vote down vote up
package com.wixpress.build.maven

import java.io.ByteArrayInputStream

import org.apache.commons.codec.digest.DigestUtils
import org.codehaus.mojo.mrm.api.maven.Artifact
import org.codehaus.mojo.mrm.impl.maven.{ArtifactStoreFileSystem, MemoryArtifactStore}
import org.codehaus.mojo.mrm.plugin.FileSystemServer

class FakeMavenRepository(port: Int = 0) {

  implicit class ExtendedArtifactDescriptor(artifact: ArtifactDescriptor) {
    def asArtifact(ofType: String): Artifact = {
      val parent = artifact.parentCoordinates
      val groupId = artifact.groupId
        .getOrElse(parent.map(_.groupId).getOrElse(throw new RuntimeException("missing groupId or parent.groupId")))
      val version = artifact.version
        .getOrElse(parent.map(_.version).getOrElse(throw new RuntimeException("missing version or parent.version")))
      new Artifact(groupId, artifact.artifactId, version, ofType)
    }
  }

  private val inMemoryArtifactStore = new MemoryArtifactStore
  private val mavenRepoManager = new FileSystemServer("foo", port,
    new ArtifactStoreFileSystem(inMemoryArtifactStore), "")

  def url: String = mavenRepoManager.getUrl

  def start(): Unit = mavenRepoManager.ensureStarted()

  def stop(): Unit = {
    mavenRepoManager.finish()
    mavenRepoManager.waitForFinished()
  }

  def addArtifacts(artifact: ArtifactDescriptor*): Unit = addArtifacts(artifact.toSet)

  def addCoordinates(coordinatesSet: Coordinates*): Unit = addCoordinates(coordinatesSet.toSet)

  def addArtifacts(artifacts: Set[ArtifactDescriptor]): Unit = artifacts.foreach(addSingleArtifact)

  def addCoordinates(coordinatesSet: Set[Coordinates]): Unit = coordinatesSet.foreach(addSingleCoordinates)

  def addSingleCoordinates(coordinates: Coordinates): Unit = addSingleArtifact(ArtifactDescriptor.anArtifact(coordinates))

  def addSingleArtifact(artifact: ArtifactDescriptor): Unit = {
    val xml = artifact.pomXml
    val md5 = DigestUtils.md5Hex(xml)
    val sha1 = DigestUtils.sha1Hex(xml)
    inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom"), streamFrom(xml))
    inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom.md5"), streamFrom(md5))
    inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom.sha1"), streamFrom(sha1))
  }


  private def streamFrom(input: String) = {
    new ByteArrayInputStream(input.getBytes("UTF-8"))
  }

  def addJarArtifact(artifact: Coordinates, jar: Array[Byte]) =
    inMemoryArtifactStore.set(
      new Artifact(artifact.groupId, artifact.artifactId, artifact.version, artifact.classifier.orNull, "jar"), new ByteArrayInputStream(jar))

  def addJarSha256(artifact: Coordinates, sha256: String) =
    inMemoryArtifactStore.set(
      new Artifact(
        artifact.groupId,
        artifact.artifactId,
        artifact.version,
        artifact.classifier.orNull,
        artifact.packaging.value + ".sha256"), streamFrom(sha256))

} 
Example 30
Source File: RulesTxtDeploymentServiceSpec.scala    From smui   with Apache License 2.0 5 votes vote down vote up
package models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils
import org.scalatest.{FlatSpec, Matchers}

class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase {

  private lazy val service = injector.instanceOf[RulesTxtDeploymentService]
  private var inputIds: Seq[SearchInputId] = Seq.empty

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    createTestCores()
    inputIds = createTestRule()
  }

  private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith =>
                           |	SYNONYM: mercury
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |mercury =>
                           |	SYNONYM: aerosmith
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |shipping =>
                           |	DECORATE: REDIRECT http://xyz.com/shipping
                           |	@{
                           |	  "_log" : "${ruleIds.last}"
                           |	}@""".stripMargin

  "RulesTxtDeploymentService" should "generate rules files with correct file names" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    rulesTxt.solrIndexId shouldBe core1Id
    rulesTxt.decompoundRules shouldBe empty
    rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds)

    rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp"
    rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt"
  }

  it should "validate the rules files correctly" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty

    val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file"))
    service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction")
  }

  it should "provide a zip file with all rules files" in {
    val out = new ByteArrayOutputStream()
    service.writeAllRulesTxtFilesAsZipFileToStream(out)

    val bytes = out.toByteArray
    val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes))
    val firstEntry = zipStream.getNextEntry
    firstEntry.getName shouldBe "rules_core1.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds)
    val secondEntry = zipStream.getNextEntry
    secondEntry.getName shouldBe "rules_core2.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe ""
  }

} 
Example 31
Source File: ImageLoaderUtils.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.loaders

import java.awt.image.BufferedImage
import java.io.{InputStream, ByteArrayInputStream}
import java.net.URI
import java.util.zip.GZIPInputStream
import javax.imageio.ImageIO

import keystoneml.loaders.VOCLoader._
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import keystoneml.pipelines.Logging
import keystoneml.utils._

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag

object ImageLoaderUtils extends Logging {
  
  def loadFiles[L, I <: AbstractLabeledImage[L] : ClassTag](
      filePathsRDD: RDD[URI],
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I, // TODO(etrain): We can probably do this with implicits.
      namePrefix: Option[String] = None): RDD[I] = {
    filePathsRDD.flatMap(fileUri => loadFile(fileUri, labelsMap, imageBuilder, namePrefix))
  }

  private def loadFile[L, I <: AbstractLabeledImage[L]](
      fileUri: URI,
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I,
      namePrefix: Option[String]): Iterator[I] = {
    val filePath = new Path(fileUri)
    val conf = new Configuration(true)
    val fs = FileSystem.get(filePath.toUri(), conf)
    val fStream = fs.open(filePath)

    val tarStream = new ArchiveStreamFactory().createArchiveInputStream(
      "tar", fStream).asInstanceOf[TarArchiveInputStream]

    var entry = tarStream.getNextTarEntry()
    val imgs = new ArrayBuffer[I]
    while (entry != null) {
      if (!entry.isDirectory && (namePrefix.isEmpty || entry.getName.startsWith(namePrefix.get))) {
        var offset = 0
        var ret = 0
        val content = new Array[Byte](entry.getSize().toInt)
        while (ret >= 0 && offset != entry.getSize()) {
          ret = tarStream.read(content, offset, content.length - offset)
          if (ret >= 0) {
            offset += ret
          }
        }

        val bais = new ByteArrayInputStream(content)

        val image = ImageUtils.loadImage(bais).map { img =>
          imageBuilder(img, labelsMap(entry.getName), Some(entry.getName))
        }

        imgs ++= image
      }
      entry = tarStream.getNextTarEntry()
    }

    imgs.iterator
  }
} 
Example 32
Source File: TestUtils.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
} 
Example 33
Source File: AggregatorTest.scala    From noether   with Apache License 2.0 5 votes vote down vote up
package com.spotify.noether

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.twitter.algebird.Aggregator
import org.scalatest._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

trait AggregatorTest extends AnyFlatSpec with Matchers {
  def run[A, B, C](aggregator: Aggregator[A, B, C])(as: Seq[A]): C = {
    val bs = as.map(aggregator.prepare _ compose ensureSerializable)
    val b = ensureSerializable(aggregator.reduce(bs))
    ensureSerializable(aggregator.present(b))
  }

  private def serializeToByteArray(value: Any): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  private def ensureSerializable[T](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
} 
Example 34
Source File: TestingTypedCount.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
} 
Example 35
Source File: CreateJacksonParser.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.json

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
import java.nio.channels.Channels
import java.nio.charset.Charset

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
import sun.nio.cs.StreamDecoder

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.unsafe.types.UTF8String

private[sql] object CreateJacksonParser extends Serializable {
  def string(jsonFactory: JsonFactory, record: String): JsonParser = {
    jsonFactory.createParser(record)
  }

  def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = {
    val bb = record.getByteBuffer
    assert(bb.hasArray)

    val bain = new ByteArrayInputStream(
      bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
  }

  def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
    jsonFactory.createParser(record.getBytes, 0, record.getLength)
  }

  // Jackson parsers can be ranked according to their performance:
  // 1. Array based with actual encoding UTF-8 in the array. This is the fastest parser
  //    but it doesn't allow to set encoding explicitly. Actual encoding is detected automatically
  //    by checking leading bytes of the array.
  // 2. InputStream based with actual encoding UTF-8 in the stream. Encoding is detected
  //    automatically by analyzing first bytes of the input stream.
  // 3. Reader based parser. This is the slowest parser used here but it allows to create
  //    a reader with specific encoding.
  // The method creates a reader for an array with given encoding and sets size of internal
  // decoding buffer according to size of input array.
  private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = {
    val bais = new ByteArrayInputStream(in, 0, length)
    val byteChannel = Channels.newChannel(bais)
    val decodingBufferSize = Math.min(length, 8192)
    val decoder = Charset.forName(enc).newDecoder()

    StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize)
  }

  def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = {
    val sd = getStreamDecoder(enc, record.getBytes, record.getLength)
    jsonFactory.createParser(sd)
  }

  def inputStream(jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(is)
  }

  def inputStream(enc: String, jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(new InputStreamReader(is, enc))
  }

  def internalRow(jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val ba = row.getBinary(0)

    jsonFactory.createParser(ba, 0, ba.length)
  }

  def internalRow(enc: String, jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val binary = row.getBinary(0)
    val sd = getStreamDecoder(enc, binary, binary.length)

    jsonFactory.createParser(sd)
  }
} 
Example 36
Source File: SQLRunnerSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package com.sap.spark.cli

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}

import org.apache.spark.SparkContext
import org.apache.spark.sql.{GlobalSapSQLContext, SQLContext}
import org.scalatest.{BeforeAndAfterEach, FunSuite, ShouldMatchers}



    // good call
    val goodOpts =
      SQLRunner.parseOpts(List("a.sql", "b.sql", "-o", "output.csv"))

    goodOpts.sqlFiles should be(List("a.sql", "b.sql"))
    goodOpts.output should be(Some("output.csv"))

    // bad call
    val badOpts = SQLRunner.parseOpts(List())

    badOpts.sqlFiles should be(List())
    badOpts.output should be(None)

    // ugly call
    val uglyOpts =
      SQLRunner.parseOpts(List("a.sql", "-o", "output.csv", "b.sql"))

    uglyOpts.sqlFiles should be(List("a.sql", "b.sql"))
    uglyOpts.output should be(Some("output.csv"))
  }

  def runSQLTest(input: String, expectedOutput: String): Unit = {
    val inputStream: InputStream = new ByteArrayInputStream(input.getBytes())
    val outputStream = new ByteArrayOutputStream()

    SQLRunner.sql(inputStream, outputStream)

    val output = outputStream.toString
    output should be(expectedOutput)
  }

  test("can run dummy query") {
    val input = "SELECT 1;"
    val output = "1\n"

    runSQLTest(input, output)
  }

  test("can run multiple dummy queries") {
    val input = """
        |SELECT 1;SELECT 2;
        |SELECT 3;
      """.stripMargin

    val output = "1\n2\n3\n"

    runSQLTest(input, output)
  }

  test("can run a basic example with tables") {
    val input = """
                  |SELECT * FROM DEMO_TABLE;
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }

  test("can run an example with comments") {
    val input = """
                  |SELECT * FROM DEMO_TABLE; -- this is the first query
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |-- now let's drop a table
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }
} 
Example 37
Source File: S3SnapshotStore.scala    From akka-persistence-s3   with MIT License 5 votes vote down vote up
package akka.persistence.s3
package snapshot

import java.io.ByteArrayInputStream
import akka.actor.ActorLogging
import akka.persistence.serialization.Snapshot
import akka.persistence.{ SelectedSnapshot, SnapshotMetadata, SnapshotSelectionCriteria }
import akka.persistence.snapshot.SnapshotStore
import akka.serialization.SerializationExtension
import com.amazonaws.services.s3.model.{ ObjectMetadata, S3ObjectInputStream, ListObjectsRequest }
import com.typesafe.config.Config
import scala.collection.JavaConversions._
import scala.collection.immutable
import scala.concurrent.Future
import scala.util.control.NonFatal

case class SerializationResult(stream: ByteArrayInputStream, size: Int)

class S3SnapshotStore(config: Config) extends SnapshotStore with ActorLogging with SnapshotKeySupport {
  import context.dispatcher

  val settings = new S3SnapshotConfig(config)

  val s3Client: S3Client = new S3Client {
    val s3ClientConfig = new S3ClientConfig(context.system.settings.config.getConfig("s3-client"))
  }

  private val serializationExtension = SerializationExtension(context.system)

  private val s3Dispatcher = context.system.dispatchers.lookup("s3-snapshot-store.s3-client-dispatcher")

  val extensionName = settings.extension

  override def loadAsync(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[Option[SelectedSnapshot]] = {
    snapshotMetadatas(persistenceId, criteria)
      .map(_.sorted.takeRight(settings.maxLoadAttempts))
      .flatMap(load)
  }

  private def load(metadata: immutable.Seq[SnapshotMetadata]): Future[Option[SelectedSnapshot]] = metadata.lastOption match {
    case None => Future.successful(None)
    case Some(md) =>
      s3Client.getObject(settings.bucketName, snapshotKey(md))(s3Dispatcher)
        .map { obj =>
          val snapshot = deserialize(obj.getObjectContent)
          Some(SelectedSnapshot(md, snapshot.data))
        } recoverWith {
          case NonFatal(e) =>
            log.error(e, s"Error loading snapshot [${md}]")
            load(metadata.init) // try older snapshot
        }
  }

  override def saveAsync(metadata: SnapshotMetadata, snapshot: Any): Future[Unit] = {
    val serialized = serialize(Snapshot(snapshot))
    val objectMetadata = new ObjectMetadata()
    objectMetadata.setContentLength(serialized.size)
    s3Client.putObject(
      settings.bucketName,
      snapshotKey(metadata),
      serialized.stream,
      objectMetadata
    )(s3Dispatcher).map(_ => ())
  }

  override def deleteAsync(metadata: SnapshotMetadata): Future[Unit] = {
    if (metadata.timestamp == 0L)
      deleteAsync(metadata.persistenceId, SnapshotSelectionCriteria(metadata.sequenceNr, Long.MaxValue, metadata.sequenceNr, Long.MinValue))
    else
      s3Client.deleteObject(settings.bucketName, snapshotKey(metadata))(s3Dispatcher)
  }

  override def deleteAsync(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[Unit] = {
    val metadatas = snapshotMetadatas(persistenceId, criteria)
    metadatas.map(list => Future.sequence(list.map(deleteAsync)))
  }

  private def snapshotMetadatas(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[List[SnapshotMetadata]] = {
    s3Client.listObjects(
      new ListObjectsRequest()
        .withBucketName(settings.bucketName)
        .withPrefix(prefixFromPersistenceId(persistenceId))
        .withDelimiter("/")
    )(s3Dispatcher)
      .map(_.getObjectSummaries.toList.map(s => parseKeyToMetadata(s.getKey))
        .filter(m => m.sequenceNr >= criteria.minSequenceNr && m.sequenceNr <= criteria.maxSequenceNr && m.timestamp >= criteria.minTimestamp && m.timestamp <= criteria.maxTimestamp))

  }

  protected def deserialize(inputStream: S3ObjectInputStream): Snapshot =
    serializationExtension.deserialize(akka.persistence.serialization.streamToBytes(inputStream), classOf[Snapshot]).get

  protected def serialize(snapshot: Snapshot): SerializationResult = {
    val serialized = serializationExtension.findSerializerFor(snapshot).toBinary(snapshot)
    SerializationResult(new ByteArrayInputStream(serializationExtension.findSerializerFor(snapshot).toBinary(snapshot)), serialized.size)
  }
} 
Example 38
Source File: SerializedWithSchemaToObject.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package com.orendainx.trucking.storm.bolts

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets
import java.util

import com.hortonworks.registries.schemaregistry.SchemaMetadata
import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData}
import com.typesafe.scalalogging.Logger
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.storm.task.{OutputCollector, TopologyContext}
import org.apache.storm.topology.OutputFieldsDeclarer
import org.apache.storm.topology.base.BaseRichBolt
import org.apache.storm.tuple.{Fields, Tuple, Values}

import scala.collection.JavaConversions._


class SerializedWithSchemaToObject extends BaseRichBolt {

  private lazy val log = Logger(this.getClass)
  private var outputCollector: OutputCollector = _

  // Declare schema-related fields to be initialized when this component's prepare() method is called
  private var schemaRegistryClient: SchemaRegistryClient = _
  private var deserializer: AvroSnapshotDeserializer = _
  private var truckDataSchemaMetadata: SchemaMetadata = _
  private var trafficDataSchemaMetadata: SchemaMetadata = _

  override def prepare(stormConf: util.Map[_, _], context: TopologyContext, collector: OutputCollector): Unit = {

    outputCollector = collector

    val schemaRegistryUrl = stormConf.get(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name()).toString
    val clientConfig = Map(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name() -> schemaRegistryUrl)

    schemaRegistryClient = new SchemaRegistryClient(clientConfig)
    truckDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("EnrichedTruckData").getSchemaMetadata
    trafficDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("TrafficData").getSchemaMetadata
    deserializer = schemaRegistryClient.getDefaultDeserializer(AvroSchemaProvider.TYPE).asInstanceOf[AvroSnapshotDeserializer]
    deserializer.init(clientConfig)
  }

  override def execute(tuple: Tuple): Unit = {

    // Deserialize each tuple and convert it into its proper case class (e.g. EnrichedTruckData or TrafficData)
    val str = tuple.getStringByField("data").getBytes(StandardCharsets.UTF_8)
    log.info(s"str2: ${tuple.getStringByField("data")}")
    val bytes = new ByteArrayInputStream(str)
    log.info(s"bytes: $bytes")
    val (dataType, data) = tuple.getStringByField("dataType") match {
      case typ @ "EnrichedTruckData" =>
        log.info(s"des: ${deserializer.deserialize(bytes, null)}")
        (typ, recordToEnrichedTruckData(deserializer.deserialize(bytes, null).asInstanceOf[GenericData.Record]))
      case typ @ "TrafficData" =>
        log.info(s"des: ${deserializer.deserialize(bytes, null)}")
        (typ, recordToTrafficData(deserializer.deserialize(bytes, null).asInstanceOf[GenericData.Record]))
    }

    outputCollector.emit(new Values(data, dataType))
    outputCollector.ack(tuple)
  }

  override def declareOutputFields(declarer: OutputFieldsDeclarer): Unit = declarer.declare(new Fields("data", "dataType"))

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData =
    EnrichedTruckData(
      r.get("eventTime").toString.toLong,
      r.get("truckId").toString.toInt,
      r.get("driverId").toString.toInt,
      r.get("driverName").toString,
      r.get("routeId").toString.toInt,
      r.get("routeName").toString,
      r.get("latitude").toString.toDouble,
      r.get("longitude").toString.toDouble,
      r.get("speed").toString.toInt,
      r.get("eventType").toString,
      r.get("foggy").toString.toInt,
      r.get("rainy").toString.toInt,
      r.get("windy").toString.toInt)

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToTrafficData(r: GenericRecord): TrafficData =
    TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt)
} 
Example 39
Source File: NiFiPacketWithSchemaToObject.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package com.orendainx.trucking.storm.bolts

import java.io.ByteArrayInputStream
import java.util

import com.hortonworks.registries.schemaregistry.SchemaMetadata
import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData}
import com.typesafe.scalalogging.Logger
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.nifi.storm.NiFiDataPacket
import org.apache.storm.task.{OutputCollector, TopologyContext}
import org.apache.storm.topology.OutputFieldsDeclarer
import org.apache.storm.topology.base.BaseRichBolt
import org.apache.storm.tuple.{Fields, Tuple, Values}

import scala.collection.JavaConversions._


class NiFiPacketWithSchemaToObject extends BaseRichBolt {

  private lazy val log = Logger(this.getClass)
  private var outputCollector: OutputCollector = _

  // Declare schema-related fields to be initialized when this component's prepare() method is called
  private var schemaRegistryClient: SchemaRegistryClient = _
  private var deserializer: AvroSnapshotDeserializer = _
  private var truckDataSchemaMetadata: SchemaMetadata = _
  private var trafficDataSchemaMetadata: SchemaMetadata = _

  override def prepare(stormConf: util.Map[_, _], context: TopologyContext, collector: OutputCollector): Unit = {

    outputCollector = collector

    val schemaRegistryUrl = stormConf.get(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name()).toString
    val clientConfig = Map(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name() -> schemaRegistryUrl)

    schemaRegistryClient = new SchemaRegistryClient(clientConfig)
    truckDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("EnrichedTruckData").getSchemaMetadata
    trafficDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("TrafficData").getSchemaMetadata
    deserializer = schemaRegistryClient.getDefaultDeserializer(AvroSchemaProvider.TYPE).asInstanceOf[AvroSnapshotDeserializer]
    deserializer.init(clientConfig)
  }

  override def execute(tuple: Tuple): Unit = {
    val dp = tuple.getValueByField("nifiDataPacket").asInstanceOf[NiFiDataPacket]

    // Deserialize each tuple and convert it into its proper case class (e.g. EnrichedTruckData or TrafficData)
    val (dataType, data) = dp.getAttributes.get("dataType") match {
      case typ @ "EnrichedTruckData" => (typ, recordToEnrichedTruckData(deserializer.deserialize(new ByteArrayInputStream(dp.getContent), null).asInstanceOf[GenericData.Record]))
      case typ @ "TrafficData" => (typ, recordToTrafficData(deserializer.deserialize(new ByteArrayInputStream(dp.getContent), null).asInstanceOf[GenericData.Record]))
    }

    outputCollector.emit(new Values(data, dataType))
    outputCollector.ack(tuple)
  }

  override def declareOutputFields(declarer: OutputFieldsDeclarer): Unit = declarer.declare(new Fields("data", "dataType"))

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData =
    EnrichedTruckData(
      r.get("eventTime").toString.toLong,
      r.get("truckId").toString.toInt,
      r.get("driverId").toString.toInt,
      r.get("driverName").toString,
      r.get("routeId").toString.toInt,
      r.get("routeName").toString,
      r.get("latitude").toString.toDouble,
      r.get("longitude").toString.toDouble,
      r.get("speed").toString.toInt,
      r.get("eventType").toString,
      r.get("foggy").toString.toInt,
      r.get("rainy").toString.toInt,
      r.get("windy").toString.toInt)

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToTrafficData(r: GenericRecord): TrafficData =
    TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt)
} 
Example 40
Source File: BytesWithSchemaToObject.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package com.orendainx.trucking.storm.bolts

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets
import java.util

import com.hortonworks.registries.schemaregistry.SchemaMetadata
import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider
import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient
import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer
import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData}
import com.typesafe.scalalogging.Logger
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.storm.task.{OutputCollector, TopologyContext}
import org.apache.storm.topology.OutputFieldsDeclarer
import org.apache.storm.topology.base.BaseRichBolt
import org.apache.storm.tuple.{Fields, Tuple, Values}

import scala.collection.JavaConversions._



  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData =
    EnrichedTruckData(
      r.get("eventTime").toString.toLong,
      r.get("truckId").toString.toInt,
      r.get("driverId").toString.toInt,
      r.get("driverName").toString,
      r.get("routeId").toString.toInt,
      r.get("routeName").toString,
      r.get("latitude").toString.toDouble,
      r.get("longitude").toString.toDouble,
      r.get("speed").toString.toInt,
      r.get("eventType").toString,
      r.get("foggy").toString.toInt,
      r.get("rainy").toString.toInt,
      r.get("windy").toString.toInt)

  // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object
  private def recordToTrafficData(r: GenericRecord): TrafficData =
    TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt)
} 
Example 41
Source File: RegisterNodeSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.RegisterNode
import justin.db.consistenthashing.NodeId
import org.scalatest.{FlatSpec, Matchers}

class RegisterNodeSerializerTest extends FlatSpec with Matchers {

  behavior of "RegisterNode Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[RegisterNode], RegisterNodeSerializer)

    // object
    val serializedData = RegisterNode(NodeId(1))

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[RegisterNode])

    serializedData shouldBe deserializedData
  }
} 
Example 42
Source File: DataSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class DataSerializerTest extends FlatSpec with Matchers {

  behavior of "Data Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[justin.db.Data], DataSerializer)

    // object
    val vClock         = VectorClock[NodeId](Map(NodeId(1) -> Counter(3)))
    val timestamp      = System.currentTimeMillis()
    val serializedData = Data(id = UUID.randomUUID(), value = "some value", vClock, timestamp)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[Data])

    serializedData shouldBe deserializedData
  }
} 
Example 43
Source File: StorageNodeWriteDataLocalSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.actors.protocol.StorageNodeWriteDataLocal
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeWriteDataLocalSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeWriteDataLocal Serializer"

  it should "serialize/deserialize StorageNodeWriteDataLocal" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer)

    // object
    val data = Data(
      id        = UUID.randomUUID(),
      value     = "some value",
      vclock    = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))),
      timestamp = System.currentTimeMillis()
    )
    val serializedData = StorageNodeWriteDataLocal(data)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeWriteDataLocal])

    serializedData shouldBe deserializedData
  }
} 
Example 44
Source File: StorageNodeLocalReadSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.StorageNodeLocalRead
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeLocalReadSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeLocalReader Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeLocalRead], StorageNodeLocalReadSerializer)

    // object
    val serializedData = StorageNodeLocalRead(UUID.randomUUID())

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeLocalRead])

    serializedData shouldBe deserializedData
  }
} 
Example 45
Source File: RocksDBStorage.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData}
import org.rocksdb.{FlushOptions, Options, RocksDB}

import scala.concurrent.Future

// TODO:
// Current version store every single data under one file (totally doesn't care about data originality).
// Data should be eventually splitted by ring partitionId.
// This might be an issue during possible data movements between nodes.
final class RocksDBStorage(dir: File) extends PluggableStorageProtocol {
  import RocksDBStorage._

  {
    RocksDB.loadLibrary()
  }

  private[this] val kryo = new Kryo()

  private[this] val db: RocksDB = {
    val options: Options = new Options().setCreateIfMissing(true)
    RocksDB.open(options, dir.getPath)
  }

  override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = {
    val key: Array[Byte] = uuid2bytes(kryo, id)
    val dataBytes: Array[Byte] = db.get(key)

    val justinDataOpt = Option(dataBytes).map { dataBytes =>
      val input = new Input(new ByteArrayInputStream(dataBytes))
      JustinDataSerializer.read(kryo, input, classOf[JustinData])
    }

    Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None))
  }

  override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = {
    val key: Array[Byte] = uuid2bytes(kryo, data.id)
    val dataBytes: Array[Byte] = {
      val output = new Output(new ByteArrayOutputStream())
      JustinDataSerializer.write(kryo, output, data)
      output.getBuffer
    }

    db.put(key, dataBytes)
    db.flush(new FlushOptions().setWaitForFlush(true))

    Ack.future
  }
}

object RocksDBStorage {

  def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = {
    val output = new Output(new ByteArrayOutputStream(), 16)
    UUIDSerializer.write(kryo, output, id)
    output.getBuffer
  }

  object UUIDSerializer extends Serializer[UUID] {
    override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = {
      new UUID(input.readLong, input.readLong)
    }

    override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = {
      output.writeLong(uuid.getMostSignificantBits)
      output.writeLong(uuid.getLeastSignificantBits)
    }
  }

  object JustinDataSerializer extends Serializer[JustinData] {
    override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = {
      JustinData(
        id        = UUIDSerializer.read(kryo, input, classOf[UUID]),
        value     = input.readString(),
        vclock    = input.readString(),
        timestamp = input.readLong()
      )
    }

    override def write(kryo: Kryo, output: Output, data: JustinData): Unit = {
      UUIDSerializer.write(kryo, output, data.id)
      output.writeString(data.value)
      output.writeString(data.vclock)
      output.writeLong(data.timestamp)
    }
  }
} 
Example 46
Source File: JustinDataSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.storage.RocksDBStorage.JustinDataSerializer
import org.scalatest.{FlatSpec, Matchers}

class JustinDataSerializerTest extends FlatSpec with Matchers {

  behavior of "JustinDataSerializer"

  it should "serialize/deserialize JustinData with Kryo" in {
    val kryo = new Kryo()
    val data = JustinData(
      id        = UUID.randomUUID,
      value     = "to jest przykladowa wartość",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )

    // serialize
    val output = new Output(new ByteArrayOutputStream())
    JustinDataSerializer.write(kryo, output, data)
    val dataBytes = output.getBuffer

    // deserialize
    val input = new Input(new ByteArrayInputStream(dataBytes))
    JustinDataSerializer.read(kryo, input, classOf[JustinData]) shouldBe data
  }
} 
Example 47
Source File: UUIDSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.ByteArrayInputStream
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.Input
import justin.db.storage.RocksDBStorage.UUIDSerializer
import org.scalatest.{FlatSpec, Matchers}

class UUIDSerializerTest extends FlatSpec with Matchers {

  behavior of "UUIDSerializer"

  it should "serialize/deserialize UUID with Kryo" in {
    val uuid = UUID.randomUUID()
    val kryo = new Kryo()

    // serialize
    val bytes = RocksDBStorage.uuid2bytes(kryo, uuid)

    // deserialize
    val input = new Input(new ByteArrayInputStream(bytes))
    val id = UUIDSerializer.read(kryo, input, classOf[UUID])

    uuid shouldBe id
  }
} 
Example 48
Source File: AmqpXPathCheckMaterializer.scala    From gatling-amqp-plugin   with Apache License 2.0 5 votes vote down vote up
package ru.tinkoff.gatling.amqp.checks

import java.io.{ByteArrayInputStream, InputStreamReader}

import io.gatling.commons.validation.{safely, _}
import io.gatling.core.check.xpath.{Dom, XPathCheckType, XmlParsers}
import io.gatling.core.check.{CheckMaterializer, Preparer}
import org.xml.sax.InputSource
import ru.tinkoff.gatling.amqp.AmqpCheck
import ru.tinkoff.gatling.amqp.request.AmqpProtocolMessage

class AmqpXPathCheckMaterializer(xmlParsers: XmlParsers)
    extends CheckMaterializer[XPathCheckType, AmqpCheck, AmqpProtocolMessage, Option[Dom]](identity) {
  private val ErrorMapper = "Could not parse response into a DOM Document: " + _

  override protected def preparer: Preparer[AmqpProtocolMessage, Option[Dom]] =
    message =>
      safely(ErrorMapper) {
        message match {
          case AmqpProtocolMessage(_, payload, _) =>
            val in = new ByteArrayInputStream(payload)
            Some(xmlParsers.parse(new InputSource(new InputStreamReader(in)))).success
          case _ => "Unsupported message type".failure
        }
      }
} 
Example 49
Source File: AmqpJsonPathCheckMaterializer.scala    From gatling-amqp-plugin   with Apache License 2.0 5 votes vote down vote up
package ru.tinkoff.gatling.amqp.checks

import java.io.ByteArrayInputStream
import java.nio.charset.Charset

import io.gatling.core.check.jsonpath.JsonPathCheckType
import io.gatling.core.check.{CheckMaterializer, Preparer}
import io.gatling.core.json.JsonParsers
import ru.tinkoff.gatling.amqp.AmqpCheck
import ru.tinkoff.gatling.amqp.request.AmqpProtocolMessage

import scala.util.Try

class AmqpJsonPathCheckMaterializer(jsonParsers: JsonParsers)
  extends CheckMaterializer[JsonPathCheckType, AmqpCheck, AmqpProtocolMessage, Any](identity) {
  override protected def preparer: Preparer[AmqpProtocolMessage, Any] =
    AmqpJsonPathCheckMaterializer.jsonPathPreparer(jsonParsers)
}

object AmqpJsonPathCheckMaterializer {
  private val CharsParsingThreshold = 200 * 1000

  private def jsonPathPreparer(jsonParsers: JsonParsers): Preparer[AmqpProtocolMessage, Any] =
    replyMessage => {
      val bodyCharset = Try(Charset.forName(replyMessage.amqpProperties.getContentEncoding))
        .getOrElse(Charset.defaultCharset())

      if (replyMessage.payload.length > CharsParsingThreshold)
        jsonParsers.safeParse(new ByteArrayInputStream(replyMessage.payload), bodyCharset)
      else
        jsonParsers.safeParse(new String(replyMessage.payload, bodyCharset))
    }
} 
Example 50
Source File: JQHttpClient.scala    From ledger-manager-chrome   with MIT License 5 votes vote down vote up
package co.ledger.manager.web.core.net

import java.io.ByteArrayInputStream

import co.ledger.wallet.core.net.{BasicHttpRequestLogger, HttpClient, HttpRequestExecutor, HttpRequestLogger}
import co.ledger.manager.web.core.utils.JQueryHelper
import org.scalajs.jquery.JQueryXHR

import scala.concurrent.ExecutionContext
import scala.scalajs.js


class JQHttpClient(override val baseUrl: String) extends HttpClient {
  override implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.Implicits.global

  override protected val executor: HttpRequestExecutor = new HttpRequestExecutor {
    override def execute(responseBuilder: co.ledger.wallet.core.net.HttpClient#ResponseBuilder): Unit = {
      val request = responseBuilder.request
      val headers = js.Dictionary[js.Any]()
      request.headers foreach {
        case (key, value) =>
          headers(key) = value.toString
      }
      try {
        JQueryHelper.$.ajax(js.Dictionary[js.Any](
          "method" -> request.method,
          "url" -> request.url,
          "headers" -> headers,
          "timeout" -> (request.readTimeout.toMillis + request.connectionTimeout.toMillis),
          "data" -> request.bodyAsString,
          "complete" -> { (xhr: JQueryXHR, status: String) =>
            responseBuilder.statusCode = xhr.status
            responseBuilder.statusMessage = xhr.statusText
            responseBuilder.bodyEncoding = "utf-8"
            if (xhr.status != 0)
              responseBuilder.body = new ByteArrayInputStream(xhr.responseText.getBytes)
            responseBuilder.build()
          }
        ))
      } catch {
        case er: Throwable =>
          responseBuilder.failure(er)
      }
      request.body.close()
    }
  }
  override var defaultLogger: HttpRequestLogger = new BasicHttpRequestLogger
}

object JQHttpClient {
  val etcInstance = new JQHttpClient("https://api.ledgerwallet.com/blockchain/v2/ethc")
  val ethInstance = new JQHttpClient("https://api.ledgerwallet.com/blockchain/v2/eth")
} 
Example 51
Source File: SerializableSerializerTest.scala    From spark-util   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.hadoop.kryo

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream }

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{ Input, Output }
import org.hammerlab.test.Suite

class SerializableSerializerTest
  extends Suite {
  test("serde") {
    val kryo = new Kryo()
    kryo.setRegistrationRequired(true)
    val baos = new ByteArrayOutputStream()
    val output = new Output(baos)

    val foo = new Foo
    foo.n = 123
    foo.s = "abc"

    intercept[IllegalArgumentException] {
      kryo.writeClassAndObject(output, foo)
    }
    .getMessage should startWith("Class is not registered: org.hammerlab.hadoop.kryo.Foo")

    kryo.register(classOf[Foo], SerializableSerializer[Foo]())

    kryo.writeClassAndObject(output, foo)

    output.close()

    val bytes = baos.toByteArray
    bytes.length should be(93)

    val bais = new ByteArrayInputStream(bytes)

    val input = new Input(bais)
    val after = kryo.readClassAndObject(input).asInstanceOf[Foo]

    after.n should be(foo.n)
    after.s should be(foo.s)
  }
}

class Foo
  extends Serializable {

  var n = 0
  var s = ""

  private def writeObject(out: ObjectOutputStream): Unit = {
    out.writeInt(n)
    out.writeUTF(s)
  }

  private def readObject(in: ObjectInputStream): Unit = {
    n = in.readInt()
    s = in.readUTF()
  }
} 
Example 52
Source File: CloudFrontSigner.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core.database.s3
import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets.UTF_8
import java.security.PrivateKey
import java.time.Instant
import java.util.Date

import akka.http.scaladsl.model.Uri
import com.amazonaws.auth.PEM
import com.amazonaws.services.cloudfront.CloudFrontUrlSigner
import com.amazonaws.services.cloudfront.util.SignerUtils
import com.amazonaws.services.cloudfront.util.SignerUtils.Protocol

import scala.concurrent.duration._

case class CloudFrontConfig(domainName: String,
                            keyPairId: String,
                            privateKey: String,
                            timeout: FiniteDuration = 10.minutes)

case class CloudFrontSigner(config: CloudFrontConfig) extends UrlSigner {
  private val privateKey = createPrivateKey(config.privateKey)

  override def getSignedURL(s3ObjectKey: String): Uri = {
    val resourcePath = SignerUtils.generateResourcePath(Protocol.https, config.domainName, s3ObjectKey)
    val date = Date.from(Instant.now().plusSeconds(config.timeout.toSeconds))
    val url = CloudFrontUrlSigner.getSignedURLWithCannedPolicy(resourcePath, config.keyPairId, privateKey, date)
    Uri(url)
  }

  override def toString: String = s"CloudFront Signer - ${config.domainName}"

  private def createPrivateKey(keyContent: String): PrivateKey = {
    val is = new ByteArrayInputStream(keyContent.getBytes(UTF_8))
    PEM.readPrivateKey(is)
  }
} 
Example 53
Source File: DefaultRowReader.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.binary

import java.io.{ByteArrayInputStream, DataInputStream}
import java.nio.charset.Charset

import ml.combust.mleap.runtime.serialization.{BuiltinFormats, RowReader}
import ml.combust.mleap.core.types.StructType
import ml.combust.mleap.runtime.frame.{ArrayRow, Row}
import resource._

import scala.util.Try


class DefaultRowReader(override val schema: StructType) extends RowReader {
  private val serializers = schema.fields.map(_.dataType).map(ValueSerializer.serializerForDataType)

  override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[Row] = {
    (for(in <- managed(new ByteArrayInputStream(bytes))) yield {
      val din = new DataInputStream(in)
      val row = ArrayRow(new Array[Any](schema.fields.length))
      var i = 0
      for(s <- serializers) {
        row.set(i, s.read(din))
        i = i + 1
      }
      row
    }).tried
  }
} 
Example 54
Source File: DefaultFrameReader.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.binary

import java.io.{ByteArrayInputStream, DataInputStream}
import java.nio.charset.Charset

import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameReader}
import ml.combust.mleap.core.types.StructType
import ml.combust.mleap.json.JsonSupport._
import ml.combust.mleap.runtime.frame.{ArrayRow, DefaultLeapFrame, Row}
import spray.json._
import resource._

import scala.collection.mutable
import scala.util.Try


class DefaultFrameReader extends FrameReader {
  override def fromBytes(bytes: Array[Byte],
                         charset: Charset = BuiltinFormats.charset): Try[DefaultLeapFrame] = {
    (for(in <- managed(new ByteArrayInputStream(bytes))) yield {
      val din = new DataInputStream(in)
      val length = din.readInt()
      val schemaBytes = new Array[Byte](length)
      din.readFully(schemaBytes)
      val schema = new String(schemaBytes, BuiltinFormats.charset).parseJson.convertTo[StructType]
      val serializers = schema.fields.map(_.dataType).map(ValueSerializer.serializerForDataType)
      val rowCount = din.readInt()
      val rows = mutable.WrappedArray.make[Row](new Array[Row](rowCount))

      for(i <- 0 until rowCount) {
        val row = new ArrayRow(new Array[Any](schema.fields.length))

        var j = 0
        for(s <- serializers) {
          row.set(j, s.read(din))
          j = j + 1
        }

        rows(i) = row
      }

      DefaultLeapFrame(schema, rows)
    }).tried
  }
} 
Example 55
Source File: XGBoostRegressionOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.xgboost.runtime.bundle.ops

import java.io.ByteArrayInputStream
import java.nio.file.Files

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl.{Model, Value}
import ml.combust.bundle.op.OpModel
import ml.combust.mleap.bundle.ops.MleapOp
import ml.combust.mleap.runtime.MleapContext
import ml.combust.mleap.xgboost.runtime.{XGBoostRegression, XGBoostRegressionModel}
import ml.dmlc.xgboost4j.scala.XGBoost
import resource._


class XGBoostRegressionOp extends MleapOp[XGBoostRegression, XGBoostRegressionModel] {
  override val Model: OpModel[MleapContext, XGBoostRegressionModel] = new OpModel[MleapContext, XGBoostRegressionModel] {
    override val klazz: Class[XGBoostRegressionModel] = classOf[XGBoostRegressionModel]

    override def opName: String = "xgboost.regression"

    override def store(model: Model, obj: XGBoostRegressionModel)
                      (implicit context: BundleContext[MleapContext]): Model = {
      val out = Files.newOutputStream(context.file("xgboost.model"))
      obj.booster.saveModel(out)

      model
        .withValue("num_features", Value.int(obj.numFeatures))
        .withValue("tree_limit", Value.int(obj.treeLimit))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[MleapContext]): XGBoostRegressionModel = {
      val bytes = Files.readAllBytes(context.file("xgboost.model"))
      val booster = XGBoost.loadModel(new ByteArrayInputStream(bytes))
      val treeLimit = model.value("tree_limit").getInt

      XGBoostRegressionModel(booster,
        numFeatures = model.value("num_features").getInt,
        treeLimit = treeLimit)
    }
  }

  override def model(node: XGBoostRegression): XGBoostRegressionModel = node.model
} 
Example 56
Source File: TestSpec.scala    From spark-distcp   with Apache License 2.0 5 votes vote down vote up
package com.coxautodata

import java.io.ByteArrayInputStream
import java.nio.file.Files

import com.coxautodata.objects.SerializableFileStatus
import com.coxautodata.utils.FileListing
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path}
import org.scalatest.{BeforeAndAfterEach, FunSpec, Matchers}

trait TestSpec extends FunSpec with Matchers with BeforeAndAfterEach {

  var testingBaseDir: java.nio.file.Path = _
  var testingBaseDirName: String = _
  var testingBaseDirPath: Path = _
  var localFileSystem: LocalFileSystem = _

  override def beforeEach(): Unit = {
    super.beforeEach()
    testingBaseDir = Files.createTempDirectory("test_output")
    testingBaseDirName = testingBaseDir.toString
    localFileSystem = FileSystem.getLocal(new Configuration())
    testingBaseDirPath = localFileSystem.makeQualified(new Path(testingBaseDirName))
  }

  override def afterEach(): Unit = {
    super.afterEach()
    FileUtils.deleteDirectory(testingBaseDir.toFile)
  }

  def createFile(relativePath: Path, content: Array[Byte]): SerializableFileStatus = {
    val path = new Path(testingBaseDirPath, relativePath)
    localFileSystem.mkdirs(path.getParent)
    val in = new ByteArrayInputStream(content)
    val out = localFileSystem.create(path)
    IOUtils.copy(in, out)
    in.close()
    out.close()
    SerializableFileStatus(localFileSystem.getFileStatus(path))
  }

  def fileStatusToResult(f: SerializableFileStatus): FileListing = {
    FileListing(f.getPath.toString, if (f.isFile) Some(f.getLen) else None)
  }

} 
Example 57
Source File: DesignSerializationTest.scala    From airframe   with Apache License 2.0 5 votes vote down vote up
package wvlet.airframe

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import DesignTest._
import wvlet.airspec.AirSpec

object DesignSerializationTest {
  def serialize(d: Design): Array[Byte] = {
    val b  = new ByteArrayOutputStream()
    val oo = new ObjectOutputStream(b)
    oo.writeObject(d)
    oo.close()
    b.toByteArray
  }

  def deserialize(b: Array[Byte]): Design = {
    val in  = new ByteArrayInputStream(b)
    val oi  = new ObjectInputStream(in)
    val obj = oi.readObject().asInstanceOf[Design]
    obj.asInstanceOf[Design]
  }
}


class DesignSerializationTest extends AirSpec {
  import DesignSerializationTest._

  def `be serializable`: Unit = {
    val b   = serialize(d1)
    val d1s = deserialize(b)
    d1s shouldBe (d1)
  }

  def `serialize instance binding`: Unit = {
    val d  = Design.blanc.bind[Message].toInstance(Hello("world"))
    val b  = serialize(d)
    val ds = deserialize(b)
    ds shouldBe (d)
  }
} 
Example 58
Source File: SerializationTest.scala    From airframe   with Apache License 2.0 5 votes vote down vote up
package wvlet.log

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import wvlet.log.io.IOUtil

object SerializationTest {
  trait A extends LogSupport {
    debug("new A")
    def hello = debug("hello")
  }
}


class SerializationTest extends Spec {
  import SerializationTest._

  def `logger should be serializable`: Unit = {
    val a = new A {}
    val b = new ByteArrayOutputStream()
    IOUtil.withResource(new ObjectOutputStream(b)) { out => out.writeObject(a) }
    val ser = b.toByteArray
    IOUtil.withResource(new ObjectInputStream(new ByteArrayInputStream(ser))) { in =>
      debug("deserialization")
      val a = in.readObject().asInstanceOf[A]
      a.hello
    }
  }
} 
Example 59
Source File: TypeInformationDataInputFormat.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.dataformats

import java.io.{ByteArrayInputStream, EOFException, InputStream}

import com.amazon.milan.dataformats.DataInputFormat
import com.amazon.milan.typeutil.TypeDescriptor
import org.apache.flink.api.common.ExecutionConfig
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.core.memory.DataInputViewStreamWrapper



class TypeInformationDataInputFormat[T](typeInfo: TypeInformation[T]) extends DataInputFormat[T] {
  @transient private lazy val serializer = this.createSerializer()

  override def getGenericArguments: List[TypeDescriptor[_]] = {
    // This class is not intended to be serialized by GenericTypedJsonSerializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = {
    // This class is not intended to be deserialized by GenericTypedJsonDeserializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = {
    val input = new DataInputViewStreamWrapper(new ByteArrayInputStream(bytes, offset, length))
    Some(this.serializer.deserialize(input))
  }

  override def readValues(stream: InputStream): TraversableOnce[T] = {
    val input = new DataInputViewStreamWrapper(stream)
    Stream.continually(0)
      .map(_ =>
        try {
          Some(this.serializer.deserialize(input))
        }
        catch {
          case _: EOFException => None
        })
      .takeWhile(_.isDefined)
      .map(_.get)
  }

  private def createSerializer(): TypeSerializer[T] = {
    val config = new ExecutionConfig()
    this.typeInfo.createSerializer(config)
  }
} 
Example 60
Source File: ObjectStreamUtil.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink.testutil

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

object ObjectStreamUtil {
  def serializeAndDeserialize[T](value: T): T = {
    val outputStream = new ByteArrayOutputStream()
    val objectOutputStream = new ObjectOutputStream(outputStream)
    objectOutputStream.writeObject(value)

    val bytes = outputStream.toByteArray
    val objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytes))
    objectInputStream.readObject().asInstanceOf[T]
  }
} 
Example 61
Source File: package.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.compiler.flink

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.amazon.milan.compiler.flink.runtime.{UnwrapRecordsMapFunction, WrapRecordsMapFunction}
import com.amazon.milan.compiler.flink.testing.IntKeyValueRecord
import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.core.memory.{DataInputView, DataInputViewStreamWrapper, DataOutputView, DataOutputViewStreamWrapper}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.datastream.DataStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment

import scala.language.implicitConversions
import scala.util.Random


package object testutil {
  def getTestExecutionEnvironment: StreamExecutionEnvironment = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setBufferTimeout(0)
    env
  }

  def copyWithSerializer[T](value: T, serializer: TypeSerializer[T]): T = {
    val outputStream = new ByteArrayOutputStream()
    val outputView = new DataOutputViewStreamWrapper(outputStream)
    serializer.serialize(value, outputView)

    val bytes = outputStream.toByteArray
    val inputStream = new ByteArrayInputStream(bytes)
    val inputView = new DataInputViewStreamWrapper(inputStream)
    serializer.deserialize(inputView)
  }

  def copyData[T](writeValue: DataOutputView => Unit, readValue: DataInputView => T): T = {
    val outputStream = new ByteArrayOutputStream()
    val outputView = new DataOutputViewStreamWrapper(outputStream)
    writeValue(outputView)

    val bytes = outputStream.toByteArray
    val inputStream = new ByteArrayInputStream(bytes)
    val inputView = new DataInputViewStreamWrapper(inputStream)
    readValue(inputView)
  }

  def generateIntKeyValueRecords(recordCount: Int, keyCount: Int, maxValue: Int): List[IntKeyValueRecord] = {
    val rand = new Random(0)
    List.tabulate(recordCount)(_ => IntKeyValueRecord(rand.nextInt(keyCount), rand.nextInt(maxValue + 1)))
  }

  implicit class WrappedDataStreamExtensions[T >: Null, TKey >: Null <: Product](dataStream: DataStream[RecordWrapper[T, TKey]]) {
    def unwrap(recordTypeInformation: TypeInformation[T]): DataStream[T] = {
      val mapper = new UnwrapRecordsMapFunction[T, TKey](recordTypeInformation)
      this.dataStream.map(mapper)
    }

    def unwrap(): DataStream[T] = {
      val recordType = this.dataStream.getType.asInstanceOf[RecordWrapperTypeInformation[T, TKey]].valueTypeInformation
      this.unwrap(recordType)
    }
  }

  implicit class DataStreamExtensions[T >: Null](dataStream: DataStream[T]) {
    def wrap(recordTypeInformation: TypeInformation[T]): DataStream[RecordWrapper[T, Product]] = {
      val mapper = new WrapRecordsMapFunction[T](recordTypeInformation)
      this.dataStream.map(mapper)
    }

    def wrap(): DataStream[RecordWrapper[T, Product]] = {
      val recordType = this.dataStream.asInstanceOf[ResultTypeQueryable[T]].getProducedType
      this.wrap(recordType)
    }
  }

} 
Example 62
Source File: HiveQlParserImplTest.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.lineage.parser

import java.io.ByteArrayInputStream

import org.apache.calcite.avatica.util.Casing
import org.apache.calcite.sql.{SqlCall, SqlKind, SqlSelect}
import org.scalatest.{FlatSpec, Matchers}

class HiveQlParserImplTest extends FlatSpec with Matchers {
  "The HiveQlParserImpl" should "parse the <=> operator correctly" in {
    val sql = "SELECT * FROM a WHERE x <=> y"
    val stream = new ByteArrayInputStream(sql.getBytes)
    val parser = new HiveQlParserImpl(stream)
    parser.setIdentifierMaxLength(255)
    parser.setUnquotedCasing(Casing.UNCHANGED)

    val sqlNode = parser.parseSqlStmtEof
    val select = sqlNode.asInstanceOf[SqlSelect]
    val where = select.getWhere.asInstanceOf[SqlCall]
    where.getOperator.getKind should be(SqlKind.EQUALS)
  }
} 
Example 63
Source File: CodecFactory.scala    From OAP   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.oap.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream}

import scala.collection.mutable

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodec}
import org.apache.hadoop.util.ReflectionUtils
import org.apache.parquet.format.{CompressionCodec => ParquetCodec}
import org.apache.parquet.hadoop.metadata.CompressionCodecName

// This is a simple version of parquet's CodeFactory.
// TODO: [linhong] Need change this into Scala Code style
private[oap] class CodecFactory(conf: Configuration) {

  private val compressors = new mutable.HashMap[ParquetCodec, BytesCompressor]
  private val decompressors = new mutable.HashMap[ParquetCodec, BytesDecompressor]
  private val codecByName = new mutable.HashMap[String, CompressionCodec]

  private def getCodec(codecString: String): Option[CompressionCodec] = {
    codecByName.get(codecString) match {
      case Some(codec) => Some(codec)
      case None =>
        val codecName = CompressionCodecName.valueOf(codecString)
        val codecClass = codecName.getHadoopCompressionCodecClass
        if (codecClass == null) {
          None
        } else {
          val codec = ReflectionUtils.newInstance(codecClass, conf).asInstanceOf[CompressionCodec]
          codecByName.put(codecString, codec)
          Some(codec)
        }
    }
  }

  def getCompressor(codec: ParquetCodec): BytesCompressor = {
    compressors.getOrElseUpdate(codec, new BytesCompressor(getCodec(codec.name)))
  }

  def getDecompressor(codec: ParquetCodec): BytesDecompressor = {
    decompressors.getOrElseUpdate(codec, new BytesDecompressor(getCodec(codec.name)))
  }

  def release(): Unit = {
    compressors.values.foreach(_.release())
    compressors.clear()
    decompressors.values.foreach(_.release())
    decompressors.clear()
  }
}

private[oap] class BytesCompressor(compressionCodec: Option[CompressionCodec]) {

  private lazy val compressedOutBuffer = new ByteArrayOutputStream()
  private lazy val compressor = compressionCodec match {
    case Some(codec) => CodecPool.getCompressor(codec)
    case None => null
  }

  def compress(bytes: Array[Byte]): Array[Byte] = {
    compressionCodec match {
      case Some(codec) =>
        compressedOutBuffer.reset()
        // null compressor for non-native gzip
        if (compressor != null) {
          compressor.reset()
        }
        val cos = codec.createOutputStream(compressedOutBuffer, compressor)
        cos.write(bytes)
        cos.finish()
        cos.close()
        compressedOutBuffer.toByteArray
      case None => bytes
    }
  }

  def release(): Unit = CodecPool.returnCompressor(compressor)
}

private[oap] class BytesDecompressor(compressionCodec: Option[CompressionCodec]) {

  private lazy val decompressor = compressionCodec match {
    case Some(codec) => CodecPool.getDecompressor(codec)
    case None => null
  }

  def decompress(bytes: Array[Byte], uncompressedSize: Int): Array[Byte] = {
    compressionCodec match {
      case Some(codec) =>
        decompressor.reset()
        val cis = codec.createInputStream(new ByteArrayInputStream(bytes), decompressor)
        val decompressed = new Array[Byte](uncompressedSize)
        new DataInputStream(cis).readFully(decompressed)
        decompressed
      case None => bytes
    }
  }

  def release(): Unit = CodecPool.returnDecompressor(decompressor)
} 
Example 64
Source File: Command.scala    From scala-ssh   with Apache License 2.0 5 votes vote down vote up
package com.decodified.scalassh

import net.schmizz.sshj.connection.channel.direct.Session
import java.io.{ FileInputStream, File, ByteArrayInputStream, InputStream }

case class Command(command: String, input: CommandInput = CommandInput.NoInput, timeout: Option[Int] = None)

object Command {
  implicit def string2Command(cmd: String) = Command(cmd)
}

case class CommandInput(inputStream: Option[InputStream])

object CommandInput {
  lazy val NoInput = CommandInput(None)
  implicit def apply(input: String, charsetName: String = "UTF8"): CommandInput = apply(input.getBytes(charsetName))
  implicit def apply(input: Array[Byte]): CommandInput = apply(Some(new ByteArrayInputStream(input)))
  implicit def apply(input: InputStream): CommandInput = apply(Some(input))
  def fromFile(file: String): CommandInput = fromFile(new File(file))
  def fromFile(file: File): CommandInput = new FileInputStream(file)
  def fromResource(resource: String): CommandInput = getClass.getClassLoader.getResourceAsStream(resource)
}

class CommandResult(val channel: Session.Command) {
  def stdErrStream: InputStream = channel.getErrorStream
  def stdOutStream: InputStream = channel.getInputStream
  lazy val stdErrBytes = new StreamCopier().emptyToByteArray(stdErrStream)
  lazy val stdOutBytes = new StreamCopier().emptyToByteArray(stdOutStream)
  def stdErrAsString(charsetname: String = "utf8") = new String(stdErrBytes, charsetname)
  def stdOutAsString(charsetname: String = "utf8") = new String(stdOutBytes, charsetname)
  lazy val exitSignal: Option[String] = Option(channel.getExitSignal).map(_.toString)
  lazy val exitCode: Option[Int] = Option(channel.getExitStatus)
  lazy val exitErrorMessage: Option[String] = Option(channel.getExitErrorMessage)
} 
Example 65
Source File: CCGBankToCabochaFormat.scala    From jigg   with Apache License 2.0 5 votes vote down vote up
package jigg.nlp.ccg


object CCGBankToCabochaFormat {

  case class Opts(
    @Help(text="Path to CCGBank file") ccgbank: File = new File(""),
    @Help(text="Path to output") output: File = new File(""),
    @Help(text="Cabocha command (path to cabocha)") cabocha: String = "cabocha"
  )

  type Tree = ParseTree[NodeLabel]

  def main(args:Array[String]) = {
    val opts = CommandLineParser.readIn[Opts](args)

    val dict = new JapaneseDictionary()
    val extractors = TreeExtractor(
      new JapaneseParseTreeConverter(dict),
      new CCGBankReader)

    val trees = extractors.readTrees(opts.ccgbank, -1, true)
    val rawString = trees map (extractors.treeConv.toSentenceFromLabelTree) map (_.wordSeq.mkString("")) mkString ("\n")
    val is = new java.io.ByteArrayInputStream(rawString.getBytes("UTF-8"))
    val out = (Process(s"${opts.cabocha} -f1") #< is).lineStream_!

    val os = jigg.util.IOUtil.openOut(opts.output.getPath)
    out foreach { line =>
      os.write(line + "\n")
    }
    os.flush
    os.close
  }
} 
Example 66
Source File: HadoopConfig.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.examples.fsio

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import scala.language.implicitConversions

import org.apache.hadoop.conf.Configuration

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.util.Constants._

class HadoopConfig(config: UserConfig) {

  def withHadoopConf(conf: Configuration): UserConfig = {
    config.withBytes(HADOOP_CONF, serializeHadoopConf(conf))
  }

  def hadoopConf: Configuration = deserializeHadoopConf(config.getBytes(HADOOP_CONF).get)

  private def serializeHadoopConf(conf: Configuration): Array[Byte] = {
    val out = new ByteArrayOutputStream()
    val dataOut = new DataOutputStream(out)
    conf.write(dataOut)
    dataOut.close()
    out.toByteArray
  }

  private def deserializeHadoopConf(bytes: Array[Byte]): Configuration = {
    val in = new ByteArrayInputStream(bytes)
    val dataIn = new DataInputStream(in)
    val result = new Configuration()
    result.readFields(dataIn)
    dataIn.close()
    result
  }
}

object HadoopConfig {
  def empty: HadoopConfig = new HadoopConfig(UserConfig.empty)
  def apply(config: UserConfig): HadoopConfig = new HadoopConfig(config)

  implicit def userConfigToHadoopConfig(userConf: UserConfig): HadoopConfig = {
    HadoopConfig(userConf)
  }
} 
Example 67
Source File: WindowsPluginFrontendSpec.scala    From protoc-bridge   with Apache License 2.0 5 votes vote down vote up
package protocbridge.frontend

import java.io.ByteArrayInputStream

import protocbridge.ProtocCodeGenerator

import scala.sys.process.ProcessLogger
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers

class WindowsPluginFrontendSpec extends AnyFlatSpec with Matchers {
  if (PluginFrontend.isWindows) {
    it must "execute a program that forwards input and output to given stream" in {
      val toSend = "ping"
      val toReceive = "pong"

      val fakeGenerator = new ProtocCodeGenerator {
        override def run(request: Array[Byte]): Array[Byte] = {
          request mustBe toSend.getBytes
          toReceive.getBytes
        }
      }
      val (path, state) = WindowsPluginFrontend.prepare(fakeGenerator)
      val actualOutput = scala.collection.mutable.Buffer.empty[String]
      val process = sys.process
        .Process(path.toAbsolutePath.toString)
        .#<(new ByteArrayInputStream(toSend.getBytes))
        .run(ProcessLogger(o => actualOutput.append(o)))
      process.exitValue()
      actualOutput.mkString mustBe toReceive
      WindowsPluginFrontend.cleanup(state)
    }
  }
} 
Example 68
Source File: PluginFrontendSpec.scala    From protoc-bridge   with Apache License 2.0 5 votes vote down vote up
package protocbridge.frontend

import java.io.ByteArrayInputStream

import com.google.protobuf.compiler.PluginProtos.CodeGeneratorResponse
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers

class PluginFrontendSpec
    extends AnyFlatSpec
    with Matchers
    with ScalaCheckDrivenPropertyChecks {
  def expected(error: String) =
    CodeGeneratorResponse.newBuilder().setError(error).build()

  def actual(error: String) =
    CodeGeneratorResponse.parseFrom(
      PluginFrontend.createCodeGeneratorResponseWithError(error)
    )

  "createCodeGeneratorResponseWithError" should "create valid objects" in {
    actual("") must be(expected(""))
    actual("foo") must be(expected("foo"))
    actual("\u2035") must be(expected("\u2035"))
    actual("a" * 128) must be(expected("a" * 128))
    actual("a" * 256) must be(expected("a" * 256))
    actual("\u3714\u3715" * 256) must be(expected("\u3714\u3715" * 256))
    actual("abc" * 1000) must be(expected("abc" * 1000))
    forAll(MinSuccessful(1000)) { s: String =>
      actual(s) must be(expected(s))
    }

  }

  "readInputStreamToByteArray" should "read the input stream to a byte array" in {
    def readInput(bs: Array[Byte]) =
      PluginFrontend.readInputStreamToByteArray(new ByteArrayInputStream(bs))

    readInput(Array.empty) must be(Array())
    readInput(Array[Byte](1, 2, 3, 4)) must be(Array(1, 2, 3, 4))
    val special = Array.tabulate[Byte](10000) { n =>
      (n % 37).toByte
    }
    readInput(special) must be(special)
  }
} 
Example 69
Source File: StreamingParserSpec.scala    From cormorant   with MIT License 5 votes vote down vote up
package io.chrisdavenport.cormorant
package fs2

import cats.data.NonEmptyList
import cats.effect._
import cats.effect.testing.specs2.CatsIO
import _root_.fs2.Stream
import io.chrisdavenport.cormorant._
// import io.chrisdavenport.cormorant.implicits._
// import scala.concurrent.duration._
import java.io.ByteArrayInputStream
import java.io.InputStream

class StreamingParserSpec extends CormorantSpec with CatsIO {

  def ruinDelims(str: String) = augmentString(str).flatMap {
    case '\n' => "\r\n"
    case c => c.toString
  }

  "Streaming Parser" should {
    // https://github.com/ChristopherDavenport/cormorant/pull/84
    "parse a known value that did not work with streaming" in {
      val x = """First Name,Last Name,Email
Larry,Bordowitz,[email protected]
Anonymous,Hippopotamus,[email protected]"""
      val source = IO.pure(new ByteArrayInputStream(ruinDelims(x).getBytes): InputStream)
      Stream.resource(Blocker[IO]).flatMap{blocker => 
        _root_.fs2.io.readInputStream(
          source,
          chunkSize = 4,
          blocker
        )
      }
        .through(_root_.fs2.text.utf8Decode)
        .through(parseComplete[IO])
        .compile
        .toVector
        .map{ v => 
          val header = CSV.Headers(NonEmptyList.of(CSV.Header("First Name"), CSV.Header("Last Name"), CSV.Header("Email")))
          val row1 = CSV.Row(NonEmptyList.of(CSV.Field("Larry"), CSV.Field("Bordowitz"), CSV.Field("[email protected]")))
          val row2 = CSV.Row(NonEmptyList.of(CSV.Field("Anonymous"), CSV.Field("Hippopotamus"), CSV.Field("[email protected]")))
          Vector(
            (header, row1),
            (header, row2)
          ) must_=== v
        }
    }
  }

  

} 
Example 70
Source File: TestHelper.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

}

@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

} 
Example 71
Source File: HDFSCredentialProvider.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import java.io.{ByteArrayInputStream, DataInputStream}

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.mapred.Master
import org.apache.hadoop.security.Credentials

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._

private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging {
  // Token renewal interval, this value will be set in the first call,
  // if None means no token renewer specified, so cannot get token renewal interval.
  private var tokenRenewalInterval: Option[Long] = null

  override val serviceName: String = "hdfs"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    // NameNode to access, used to get tokens from different FileSystems
    nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
      val dstFs = dst.getFileSystem(hadoopConf)
      logInfo("getting token for namenode: " + dst)
      dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds)
    }

    // Get the token renewal interval if it is not set. It will only be called once.
    if (tokenRenewalInterval == null) {
      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf)
    }

    // Get the time of next renewal.
    tokenRenewalInterval.map { interval =>
      creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .map { t =>
          val identifier = new DelegationTokenIdentifier()
          identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
          identifier.getIssueDate + interval
      }.foldLeft(0L)(math.max)
    }
  }

  private def getTokenRenewalInterval(
      hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = {
    // We cannot use the tokens generated with renewer yarn. Trying to renew
    // those will fail with an access control issue. So create new tokens with the logged in
    // user as renewer.
    sparkConf.get(PRINCIPAL).flatMap { renewer =>
      val creds = new Credentials()
      nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
        val dstFs = dst.getFileSystem(hadoopConf)
        dstFs.addDelegationTokens(renewer, creds)
      }
      val hdfsToken = creds.getAllTokens.asScala
        .find(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
      hdfsToken.map { t =>
        val newExpiration = t.renew(hadoopConf)
        val identifier = new DelegationTokenIdentifier()
        identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
        val interval = newExpiration - identifier.getIssueDate
        logInfo(s"Renewal Interval is $interval")
        interval
      }
    }
  }

  private def getTokenRenewer(conf: Configuration): String = {
    val delegTokenRenewer = Master.getMasterPrincipal(conf)
    logDebug("delegation token renewer is: " + delegTokenRenewer)
    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
      logError(errorMessage)
      throw new SparkException(errorMessage)
    }

    delegTokenRenewer
  }

  private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
    sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet +
      sparkConf.get(STAGING_DIR).map(new Path(_))
        .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory)
  }
} 
Example 72
Source File: GenericAvroSerializerSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 73
Source File: CompressionCodecSuite.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import org.scalatest.FunSuite

import org.apache.spark.SparkConf

class CompressionCodecSuite extends FunSuite {
  val conf = new SparkConf(false)

  def testCodec(codec: CompressionCodec) {
    // Write 1000 integers to the output stream, compressed.
    val outputStream = new ByteArrayOutputStream()
    val out = codec.compressedOutputStream(outputStream)
    for (i <- 1 until 1000) {
      out.write(i % 256)
    }
    out.close()

    // Read the 1000 integers back.
    val inputStream = new ByteArrayInputStream(outputStream.toByteArray)
    val in = codec.compressedInputStream(inputStream)
    for (i <- 1 until 1000) {
      assert(in.read() === i % 256)
    }
    in.close()
  }

  test("default compression codec") {
    val codec = CompressionCodec.createCodec(conf)
    assert(codec.getClass === classOf[SnappyCompressionCodec])
    testCodec(codec)
  }

  test("lz4 compression codec") {
    val codec = CompressionCodec.createCodec(conf, classOf[LZ4CompressionCodec].getName)
    assert(codec.getClass === classOf[LZ4CompressionCodec])
    testCodec(codec)
  }

  test("lz4 compression codec short form") {
    val codec = CompressionCodec.createCodec(conf, "lz4")
    assert(codec.getClass === classOf[LZ4CompressionCodec])
    testCodec(codec)
  }

  test("lzf compression codec") {
    val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
    assert(codec.getClass === classOf[LZFCompressionCodec])
    testCodec(codec)
  }

  test("lzf compression codec short form") {
    val codec = CompressionCodec.createCodec(conf, "lzf")
    assert(codec.getClass === classOf[LZFCompressionCodec])
    testCodec(codec)
  }

  test("snappy compression codec") {
    val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
    assert(codec.getClass === classOf[SnappyCompressionCodec])
    testCodec(codec)
  }

  test("snappy compression codec short form") {
    val codec = CompressionCodec.createCodec(conf, "snappy")
    assert(codec.getClass === classOf[SnappyCompressionCodec])
    testCodec(codec)
  }

  test("bad compression codec") {
    intercept[IllegalArgumentException] {
      CompressionCodec.createCodec(conf, "foobar")
    }
  }
} 
Example 74
Source File: package.scala    From chronicler   with Apache License 2.0 5 votes vote down vote up
package com.github.fsanaulla.chronicler.core

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}

package object gzip {

  
  def compress(data: Array[Byte]): (Int, Array[Byte]) = {
    val bArrOut    = new ByteArrayOutputStream()
    val gzippedOut = new GZIPOutputStream(bArrOut)

    gzippedOut.write(data)
    gzippedOut.close()

    val gzippedData   = bArrOut.toByteArray
    val contentLength = gzippedData.length
    contentLength -> gzippedData
  }

  def decompress(data: Array[Byte]): Array[Byte] = {
    val gis = new GZIPInputStream(new ByteArrayInputStream(data))
    val out = new ByteArrayOutputStream()
    val buf = new Array[Byte](1024)

    var res = 0
    while (res >= 0) {
      res = gis.read(buf, 0, buf.length)
      if (res > 0) out.write(buf, 0, res)
    }

    out.toByteArray
  }
} 
Example 75
Source File: S3Util.scala    From redshift-fake-driver   with Apache License 2.0 5 votes vote down vote up
package jp.ne.opt.redshiftfake

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.charset.StandardCharsets
import java.util.zip.GZIPOutputStream

import com.amazonaws.services.s3.AmazonS3
import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest}
import jp.ne.opt.redshiftfake.util.Loan.using
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream

object S3Util {

   def loadGzippedDataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = {
    val arrayOutputStream = new ByteArrayOutputStream()
    using(new GZIPOutputStream(arrayOutputStream)) (gzipOutStream => {
      gzipOutStream.write(data.getBytes(StandardCharsets.UTF_8))
    })
    val buf = arrayOutputStream.toByteArray
    val metadata = new ObjectMetadata
    metadata.setContentLength(buf.length)
    val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata)

    s3Client.putObject(request)
  }

   def loadBzipped2DataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = {
    val arrayOutputStream = new ByteArrayOutputStream()
    using(new BZip2CompressorOutputStream(arrayOutputStream)) (bzip2OutStream => {
      bzip2OutStream.write(data.getBytes(StandardCharsets.UTF_8))
    })
    val buf = arrayOutputStream.toByteArray
    val metadata = new ObjectMetadata
    metadata.setContentLength(buf.length)
    val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata)

    s3Client.putObject(request)
  }

   def loadDataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = {
    val buf = data.getBytes
    val metadata = new ObjectMetadata
    metadata.setContentLength(buf.length)
    val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata)

    s3Client.putObject(request)
  }
} 
Example 76
Source File: AvroTypeSpec.scala    From shapeless-datatype   with Apache License 2.0 5 votes vote down vote up
package shapeless.datatype.avro

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.net.URI
import java.nio.ByteBuffer

import com.google.protobuf.ByteString
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
import org.joda.time.Instant
import org.scalacheck.Prop.forAll
import org.scalacheck.ScalacheckShapeless._
import org.scalacheck._
import shapeless._
import shapeless.datatype.record._

import scala.reflect.runtime.universe._

object AvroTypeSpec extends Properties("AvroType") {
  import shapeless.datatype.test.Records._
  import shapeless.datatype.test.SerializableUtils._

  implicit def compareByteArrays(x: Array[Byte], y: Array[Byte]) = java.util.Arrays.equals(x, y)
  implicit def compareIntArrays(x: Array[Int], y: Array[Int]) = java.util.Arrays.equals(x, y)

  def roundTrip[A: TypeTag, L <: HList](m: A)(implicit
    gen: LabelledGeneric.Aux[A, L],
    fromL: FromAvroRecord[L],
    toL: ToAvroRecord[L],
    mr: MatchRecord[L]
  ): Boolean = {
    val t = ensureSerializable(AvroType[A])
    val f1: SerializableFunction[A, GenericRecord] =
      new SerializableFunction[A, GenericRecord] {
        override def apply(m: A): GenericRecord = t.toGenericRecord(m)
      }
    val f2: SerializableFunction[GenericRecord, Option[A]] =
      new SerializableFunction[GenericRecord, Option[A]] {
        override def apply(m: GenericRecord): Option[A] = t.fromGenericRecord(m)
      }
    val toFn = ensureSerializable(f1)
    val fromFn = ensureSerializable(f2)
    val copy = fromFn(roundTripRecord(toFn(m)))
    val rm = RecordMatcher[A]
    copy.exists(rm(_, m))
  }

  def roundTripRecord(r: GenericRecord): GenericRecord = {
    val writer = new GenericDatumWriter[GenericRecord](r.getSchema)
    val baos = new ByteArrayOutputStream()
    val encoder = EncoderFactory.get().binaryEncoder(baos, null)
    writer.write(r, encoder)
    encoder.flush()
    baos.close()
    val bytes = baos.toByteArray

    val reader = new GenericDatumReader[GenericRecord](r.getSchema)
    val bais = new ByteArrayInputStream(bytes)
    val decoder = DecoderFactory.get().binaryDecoder(bais, null)
    reader.read(null, decoder)
  }

  implicit val byteStringAvroType = AvroType.at[ByteString](Schema.Type.BYTES)(
    v => ByteString.copyFrom(v.asInstanceOf[ByteBuffer]),
    v => ByteBuffer.wrap(v.toByteArray)
  )
  implicit val instantAvroType =
    AvroType.at[Instant](Schema.Type.LONG)(v => new Instant(v.asInstanceOf[Long]), _.getMillis)
  property("required") = forAll { m: Required => roundTrip(m) }
  property("optional") = forAll { m: Optional => roundTrip(m) }
  property("repeated") = forAll { m: Repeated => roundTrip(m) }
  property("mixed") = forAll { m: Mixed => roundTrip(m) }
  property("nested") = forAll { m: Nested => roundTrip(m) }
  property("seqs") = forAll { m: Seqs => roundTrip(m) }

  implicit val uriAvroType =
    AvroType.at[URI](Schema.Type.STRING)(v => URI.create(v.toString), _.toString)
  property("custom") = forAll { m: Custom => roundTrip(m) }
} 
Example 77
Source File: SerializableUtils.scala    From protobuf-generic   with Apache License 2.0 5 votes vote down vote up
package me.lyh.protobuf.generic.test

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

object SerializableUtils {
  private def serializeToByteArray(value: Serializable): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  def ensureSerializable[T <: Serializable](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
} 
Example 78
Source File: SerializableUtils.scala    From protobuf-generic   with Apache License 2.0 5 votes vote down vote up
package me.lyh.protobuf.generic.test

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

object SerializableUtils {
  private def serializeToByteArray(value: Serializable): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  def ensureSerializable[T <: Serializable](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
} 
Example 79
Source File: ProtobufGenericSpec.scala    From protobuf-generic   with Apache License 2.0 5 votes vote down vote up
package me.lyh.protobuf.generic.test

import java.io.ByteArrayInputStream
import java.nio.ByteBuffer

import com.google.protobuf.{ByteString, Message}
import me.lyh.protobuf.generic._
import me.lyh.protobuf.generic.proto2.Schemas._

import scala.reflect.ClassTag
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ProtobufGenericSpec extends AnyFlatSpec with Matchers {
  def roundTrip[T <: Message: ClassTag](record: T): Unit = {
    val schema = SerializableUtils.ensureSerializable(Schema.of[T])
    val schemaCopy = Schema.fromJson(schema.toJson)
    schemaCopy shouldBe schema

    val reader = SerializableUtils.ensureSerializable(GenericReader.of(schema))
    val writer = SerializableUtils.ensureSerializable(GenericWriter.of(schema))
    val jsonRecord = reader.read(record.toByteArray).toJson
    jsonRecord shouldBe reader.read(ByteBuffer.wrap(record.toByteArray)).toJson
    jsonRecord shouldBe reader.read(new ByteArrayInputStream(record.toByteArray)).toJson
    val bytes = writer.write(GenericRecord.fromJson(jsonRecord))

    val recordCopy = ProtobufType[T].parseFrom(bytes)
    recordCopy shouldBe record
  }

  "ProtobufGeneric" should "round trip required" in {
    roundTrip[Required](Records.required)
  }

  it should "round trip optional" in {
    roundTrip[Optional](Records.optional)
    roundTrip[Optional](Records.optionalEmpty)
  }

  it should "round trip repeated" in {
    roundTrip[Repeated](Records.repeated)
    roundTrip[Repeated](Records.repeatedEmpty)
    roundTrip[RepeatedPacked](Records.repeatedPacked)
    roundTrip[RepeatedUnpacked](Records.repeatedUnpacked)
  }

  it should "round trip oneofs" in {
    Records.oneOfs.foreach(roundTrip[OneOf])
  }

  it should "round trip mixed" in {
    roundTrip[Mixed](Records.mixed)
    roundTrip[Mixed](Records.mixedEmpty)
  }

  it should "round trip nested" in {
    roundTrip[Nested](Records.nested)
    roundTrip[Nested](Records.nestedEmpty)
  }

  it should "round trip with custom options" in {
    roundTrip[CustomOptionMessage](Records.customOptionMessage)
    roundTrip[CustomOptionMessage](Records.customOptionMessageEmpty)
  }

  it should "round trip with custom defaults" in {
    roundTrip[CustomDefaults](CustomDefaults.getDefaultInstance)
  }

  it should "populate default values" in {
    val schema = Schema.of[CustomDefaults]
    val record = GenericReader.of(schema).read(CustomDefaults.getDefaultInstance.toByteArray)
    record.get("double_field") shouldBe 101.0
    record.get("float_field") shouldBe 102.0f
    record.get("int32_field") shouldBe 103
    record.get("int64_field") shouldBe 104L
    record.get("uint32_field") shouldBe 105
    record.get("uint64_field") shouldBe 106L
    record.get("sint32_field") shouldBe 107
    record.get("sint64_field") shouldBe 108L
    record.get("fixed32_field") shouldBe 109
    record.get("fixed64_field") shouldBe 110L
    record.get("sfixed32_field") shouldBe 111
    record.get("sfixed64_field") shouldBe 112L
    record.get("bool_field") shouldBe true
    record.get("string_field") shouldBe "hello"
    record.get("bytes_field") shouldBe
      Base64.encode(ByteString.copyFromUtf8("world").toByteArray)
    record.get("color_field") shouldBe "GREEN"
  }
} 
Example 80
Source File: ProtobufTypeSpec.scala    From protobuf-generic   with Apache License 2.0 5 votes vote down vote up
package me.lyh.protobuf.generic.test

import java.io.ByteArrayInputStream

import com.google.protobuf.CodedInputStream
import me.lyh.protobuf.generic._
import me.lyh.protobuf.generic.proto2.Schemas._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ProtobufTypeSpec extends AnyFlatSpec with Matchers {
  private val pt = ProtobufType[Optional]
  private val record = Records.optional

  "ProtobufType.descriptor" should "work" in {
    pt.descriptor shouldBe Optional.getDescriptor
  }

  "ProtobufType.newBuilder" should "work" in {
    pt.newBuilder().build() shouldBe Optional.newBuilder().build()
  }

  "ProtobufType.parseFrom" should "support byte array" in {
    pt.parseFrom(record.toByteArray) shouldBe record
  }

  it should "support ByteString" in {
    pt.parseFrom(record.toByteString) shouldBe record
  }

  it should "support InputStream" in {
    pt.parseFrom(new ByteArrayInputStream(record.toByteArray)) shouldBe record
  }

  it should "support CodedInputStream" in {
    pt.parseFrom(CodedInputStream.newInstance(record.toByteArray)) shouldBe record
  }
} 
Example 81
Source File: OdfExtract.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.extract.odf

import java.io.{ByteArrayInputStream, InputStream}

import scala.util.Try

import cats.effect._
import cats.implicits._
import fs2.Stream

import docspell.extract.internal.Text

import org.apache.tika.metadata.Metadata
import org.apache.tika.parser.ParseContext
import org.apache.tika.parser.odf.OpenDocumentParser
import org.apache.tika.sax.BodyContentHandler

object OdfExtract {

  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)

  def get(is: InputStream) =
    Try {
      val handler  = new BodyContentHandler()
      val pctx     = new ParseContext()
      val meta     = new Metadata()
      val ooparser = new OpenDocumentParser()
      ooparser.parse(is, handler, meta, pctx)
      Text(Option(handler.toString))
    }.toEither

} 
Example 82
Source File: RtfExtract.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.extract.rtf

import java.io.{ByteArrayInputStream, InputStream}
import javax.swing.text.rtf.RTFEditorKit

import scala.util.Try

import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.common.MimeType
import docspell.extract.internal.Text

object RtfExtract {

  val rtfType = MimeType.application("rtf")

  def get(is: InputStream): Either[Throwable, Text] =
    Try {
      val kit = new RTFEditorKit()
      val doc = kit.createDefaultDocument()
      kit.read(is, doc, 0)
      Text(doc.getText(0, doc.getLength))
    }.toEither

  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
} 
Example 83
Source File: PoiExtract.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.extract.poi

import java.io.{ByteArrayInputStream, InputStream}

import scala.util.Try

import cats.data.EitherT
import cats.effect.Sync
import cats.implicits._
import fs2.Stream

import docspell.common._
import docspell.extract.internal.Text
import docspell.files.TikaMimetype

import org.apache.poi.hssf.extractor.ExcelExtractor
import org.apache.poi.hssf.usermodel.HSSFWorkbook
import org.apache.poi.hwpf.extractor.WordExtractor
import org.apache.poi.xssf.extractor.XSSFExcelExtractor
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import org.apache.poi.xwpf.extractor.XWPFWordExtractor
import org.apache.poi.xwpf.usermodel.XWPFDocument

object PoiExtract {

  def get[F[_]: Sync](
      data: Stream[F, Byte],
      hint: MimeTypeHint
  ): F[Either[Throwable, Text]] =
    TikaMimetype.detect(data, hint).flatMap(mt => get(data, mt))

  def get[F[_]: Sync](
      data: Stream[F, Byte],
      mime: MimeType
  ): F[Either[Throwable, Text]] =
    mime match {
      case PoiType.doc =>
        getDoc(data)
      case PoiType.xls =>
        getXls(data)
      case PoiType.xlsx =>
        getXlsx(data)
      case PoiType.docx =>
        getDocx(data)
      case PoiType.msoffice =>
        EitherT(getDoc[F](data))
          .recoverWith({
            case _ => EitherT(getXls[F](data))
          })
          .value
      case PoiType.ooxml =>
        EitherT(getDocx[F](data))
          .recoverWith({
            case _ => EitherT(getXlsx[F](data))
          })
          .value
      case mt =>
        Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}")))
    }

  def getDocx(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new XWPFWordExtractor(new XWPFDocument(is))
      Text(Option(xt.getText))
    }.toEither

  def getDoc(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new WordExtractor(is)
      Text(Option(xt.getText))
    }.toEither

  def getXlsx(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new XSSFExcelExtractor(new XSSFWorkbook(is))
      Text(Option(xt.getText))
    }.toEither

  def getXls(is: InputStream): Either[Throwable, Text] =
    Try {
      val xt = new ExcelExtractor(new HSSFWorkbook(is))
      Text(Option(xt.getText))
    }.toEither

  def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx)

  def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc)

  def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx)

  def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
    data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls)

} 
Example 84
Source File: ImageSize.scala    From docspell   with GNU General Public License v3.0 5 votes vote down vote up
package docspell.files

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.file.Path
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
import javax.imageio.{ImageIO, ImageReader}

import scala.jdk.CollectionConverters._
import scala.util.{Try, Using}

import cats.effect._
import cats.implicits._
import fs2.Stream

object ImageSize {

  
  def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] =
    data.take(768).compile.to(Array).map { ar =>
      val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar))
      if (iis == null) sys.error("no reader given for the array")
      else getDimension(iis)
    }

  private def getDimension(in: ImageInputStream): Option[Dimension] =
    ImageIO
      .getImageReaders(in)
      .asScala
      .to(LazyList)
      .collectFirst(Function.unlift { reader =>
        val dim = getDimension(in, reader).toOption
        reader.dispose()
        dim
      })

  private def getDimension(
      in: ImageInputStream,
      reader: ImageReader
  ): Either[Throwable, Dimension] =
    Try {
      reader.setInput(in)
      val width  = reader.getWidth(reader.getMinIndex)
      val height = reader.getHeight(reader.getMinIndex)
      Dimension(width, height)
    }.toEither
} 
Example 85
Source File: DataFinder.scala    From Scala-Design-Patterns-Second-Edition   with MIT License 5 votes vote down vote up
package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
} 
Example 86
Source File: DataFinder.scala    From Scala-Design-Patterns-Second-Edition   with MIT License 5 votes vote down vote up
package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
} 
Example 87
Source File: DataFinder.scala    From Scala-Design-Patterns-Second-Edition   with MIT License 5 votes vote down vote up
package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
} 
Example 88
Source File: DataFinder.scala    From Scala-Design-Patterns-Second-Edition   with MIT License 5 votes vote down vote up
package com.ivan.nikolov.behavioral.template

import java.io.{InputStreamReader, ByteArrayInputStream}

import com.github.tototoshi.csv.CSVReader
import com.ivan.nikolov.behavioral.template.model.Person
import org.json4s.{StringInput, DefaultFormats}
import org.json4s.jackson.JsonMethods

abstract class DataFinder[T, Y] {

  def find(f: T => Option[Y]): Option[Y] =
    try {
      val data = readData()
      val parsed = parse(data)
      f(parsed)
    } finally {
      cleanup()
    }

  def readData(): Array[Byte]

  def parse(data: Array[Byte]): T

  def cleanup()
}

class JsonDataFinder extends DataFinder[List[Person], Person] {
  implicit val formats = DefaultFormats

  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.json")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading json: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]]
}

class CSVDataFinder extends DataFinder[List[Person], Person] {
  override def readData(): Array[Byte] = {
    val stream = this.getClass.getResourceAsStream("people.csv")
    Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray
  }

  override def cleanup(): Unit = {
    System.out.println("Reading csv: nothing to do.")
  }

  override def parse(data: Array[Byte]): List[Person] =
    CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map {
      case List(name, age, address) =>
        Person(name, age.toInt, address)
    }
}


object DataFinderExample {
  def main(args: Array[String]): Unit = {
    val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder
    val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder

    System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}")
    System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}")

    System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}")
    System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}")
  }
} 
Example 89
Source File: FileInputImpl.scala    From chatoverflow   with Eclipse Public License 2.0 5 votes vote down vote up
package org.codeoverflow.chatoverflow.requirement.service.file.impl

import java.awt.image.BufferedImage
import java.io.ByteArrayInputStream
import java.util.Optional

import javax.imageio.ImageIO
import org.codeoverflow.chatoverflow.WithLogger
import org.codeoverflow.chatoverflow.api.io.input.FileInput
import org.codeoverflow.chatoverflow.registry.Impl
import org.codeoverflow.chatoverflow.requirement.impl.InputImpl
import org.codeoverflow.chatoverflow.requirement.service.file.FileConnector

@Impl(impl = classOf[FileInput], connector = classOf[FileConnector])
class FileInputImpl extends InputImpl[FileConnector] with FileInput with WithLogger {

  override def getFile(pathInResources: String): Optional[String] = Optional.ofNullable(sourceConnector.get.getFile(pathInResources).orNull)

  override def getBinaryFile(pathInResources: String): Optional[Array[Byte]] = Optional.ofNullable(sourceConnector.get.getBinaryFile(pathInResources).orNull)

  override def getImage(pathInResources: String): Optional[BufferedImage] = {
    val data = sourceConnector.get.getBinaryFile(pathInResources)
    if (data.isEmpty) {
      None
    }
    val bis = new ByteArrayInputStream(data.get)
    Optional.of(ImageIO.read(bis))
  }

  override def start(): Boolean = true

  
  override def stop(): Boolean = true
} 
Example 90
Source File: VerifyingSpec.scala    From jsoniter-scala   with MIT License 5 votes vote down vote up
package com.github.plokhotnyuk.jsoniter_scala.macros

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets.UTF_8

import com.github.plokhotnyuk.jsoniter_scala.core._
import org.scalatest.wordspec.AnyWordSpec
import org.scalatest.matchers.should.Matchers

class VerifyingSpec extends AnyWordSpec with Matchers {
  def verifySerDeser[T](codec: JsonValueCodec[T], obj: T, json: String, cfg: WriterConfig = WriterConfig): Unit = {
    verifySer(codec, obj, json, cfg)
    verifyDeser(codec, obj, json)
  }

  def verifySer[T](codec: JsonValueCodec[T], obj: T, json: String, cfg: WriterConfig = WriterConfig): Unit = {
    val len = json.getBytes(UTF_8).length
    verifyDirectByteBufferSer(codec, obj, len, cfg, json)
    verifyHeapByteBufferSer(codec, obj, len, cfg, json)
    verifyOutputStreamSer(codec, obj, cfg, json)
    verifyArraySer(codec, obj, cfg, json)
  }

  def verifyDeser[T](codec: JsonValueCodec[T], obj: T, json: String): Unit =
    verifyDeserByCheck[T](codec, json, check = (_: T) shouldBe obj)

  def verifyDeserByCheck[T](codec: JsonValueCodec[T], json: String, check: T => Unit): Unit = {
    val jsonBytes = json.getBytes(UTF_8)
    verifyDirectByteBufferDeser(codec, jsonBytes, check)
    verifyHeapByteBufferDeser(codec, jsonBytes, check)
    verifyInputStreamDeser(codec, jsonBytes, check)
    verifyByteArrayDeser(codec, jsonBytes, check)
  }

  def verifyDeserError[T](codec: JsonValueCodec[T], json: String, msg: String): Unit =
    verifyDeserError(codec, json.getBytes(UTF_8), msg)

  def verifyDeserError[T](codec: JsonValueCodec[T], jsonBytes: Array[Byte], msg: String): Unit = {
    assert(intercept[JsonReaderException](verifyDirectByteBufferDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
    assert(intercept[JsonReaderException](verifyHeapByteBufferDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
    assert(intercept[JsonReaderException](verifyInputStreamDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
    assert(intercept[JsonReaderException](verifyByteArrayDeser(codec, jsonBytes, (_: T) => ()))
      .getMessage.contains(msg))
  }

  def verifyDirectByteBufferSer[T](codec: JsonValueCodec[T], obj: T, len: Int, cfg: WriterConfig, expected: String): Unit = {
    val directBuf = ByteBuffer.allocateDirect(len + 100)
    directBuf.position(0)
    writeToByteBuffer(obj, directBuf, cfg)(codec)
    directBuf.position(0)
    val buf = new Array[Byte](len)
    directBuf.get(buf)
    toString(buf) shouldBe expected
  }

  def verifyHeapByteBufferSer[T](codec: JsonValueCodec[T], obj: T, len: Int, cfg: WriterConfig, expected: String): Unit = {
    val heapBuf = ByteBuffer.wrap(new Array[Byte](len + 100))
    heapBuf.position(0)
    writeToByteBuffer(obj, heapBuf, cfg)(codec)
    heapBuf.position(0)
    val buf = new Array[Byte](len)
    heapBuf.get(buf)
    toString(buf) shouldBe expected
  }

  def verifyOutputStreamSer[T](codec: JsonValueCodec[T], obj: T, cfg: WriterConfig, expected: String): Unit = {
    val baos = new ByteArrayOutputStream
    writeToStream(obj, baos, cfg)(codec)
    toString(baos.toByteArray) shouldBe expected
  }

  def verifyArraySer[T](codec: JsonValueCodec[T], obj: T, cfg: WriterConfig, expected: String): Unit =
    toString(writeToArray(obj, cfg)(codec)) shouldBe expected

  def verifyDirectByteBufferDeser[T](codec: JsonValueCodec[T], json:  Array[Byte], check: T => Unit): Unit = {
    val directBuf = ByteBuffer.allocateDirect(json.length)
    directBuf.put(json)
    directBuf.position(0)
    check(readFromByteBuffer(directBuf)(codec))
  }

  def verifyHeapByteBufferDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit =
    check(readFromByteBuffer(ByteBuffer.wrap(json))(codec))

  def verifyInputStreamDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit =
    check(readFromStream(new ByteArrayInputStream(json))(codec))

  def verifyByteArrayDeser[T](codec: JsonValueCodec[T], json:  Array[Byte], check: T => Unit): Unit =
    check(readFromArray(json)(codec))

  def toString(json:  Array[Byte]): String = new String(json, 0, json.length, UTF_8)
} 
Example 91
Source File: Release.scala    From ionroller   with MIT License 5 votes vote down vote up
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.nio.file.{Files, Paths}

import com.amazonaws.services.s3.model._
import com.amazonaws.services.s3.transfer.Transfer.TransferState
import com.amazonaws.services.s3.transfer.TransferManager
import com.amazonaws.util.IOUtils
import sbt._

import scalaz.concurrent.Task

object Release {

  lazy val releaseCli = taskKey[Unit]("Releases ION-Roller CLI")

  def release(ver: String, zip: File, install: File) = {
    val files = Seq(
      (install.getName, replaceVersionAndReadBytes(ver, install), "text/plain"),
      (zip.getName, readBytes(zip), "application/zip"))
    val tx = new TransferManager
    val tasks = for {
      f <- files
    } yield uploadFile(tx, f._1, f._2, f._3)
    val t = for {
      results <- Task.gatherUnordered(tasks)
      finalResult = if (results.forall(_ == TransferState.Completed)) TransferState.Completed else TransferState.Failed
      printTask <- Task.delay(println(finalResult))
    } yield printTask
    t.run
  }

  def uploadFile(tx: TransferManager, name: String, getBytes: Task[Array[Byte]], contentType: String): Task[TransferState] = {
    for {
      bytes <- getBytes
      meta <- metadata(bytes, contentType)
      transferState <- upload(tx, bytes, name, meta)
    } yield transferState
  }

  def metadata(bytes: Array[Byte], contentType: String): Task[ObjectMetadata] = {
    Task.delay({
      val out = new ByteArrayOutputStream
      out.write(bytes)
      val metadata = new ObjectMetadata
      metadata.setContentType(contentType)
      val contentBytes = IOUtils.toByteArray(new ByteArrayInputStream(out.toByteArray)).length.toLong
      // we need to call new ByteArrayInputStream again, as checking the length reads the stream
      metadata.setContentLength(contentBytes)
      metadata
    })
  }

  def upload(tx: TransferManager, in: Array[Byte], name: String, meta: ObjectMetadata): Task[TransferState] = {
    Task.delay({
      println(s"Uploading $name...")
      val upload = tx.upload(
        new PutObjectRequest("ionroller-cli", name, new ByteArrayInputStream(in), meta)
          .withCannedAcl(CannedAccessControlList.PublicRead)
      )
      while (!upload.isDone) {
        Thread.sleep(2000)
        println(upload.getProgress.getPercentTransferred.toInt + "%")
      }
      upload.getState
    })
  }

  def replaceVersionAndReadBytes(ver: String, file: File): Task[Array[Byte]] = {
    Task.delay({
      scala.io.Source.fromFile(file).getLines()
        .map(in => if (in startsWith "VERSION=") s"VERSION=$ver" else in)
        .mkString("\n")
        .getBytes
        .toSeq
        .toArray
    })
  }

  def readBytes(file: File): Task[Array[Byte]] = Task.delay({
    Files.readAllBytes(Paths.get(file.getAbsolutePath))
  })

} 
Example 92
Source File: TemplateSpec.scala    From cluster-broccoli   with Apache License 2.0 5 votes vote down vote up
package de.frosner.broccoli.models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import org.specs2.mutable.Specification
import play.api.libs.json.Json

import Template.{templateApiWrites, templatePersistenceReads}

class TemplateSpec extends Specification {

  "A template" should {

    "extract only parameters specified in the parameters" in {
      Template("test",
               "Hallo {{id}}. I like {{person_name}}.",
               "desc",
               Map("id" -> ParameterInfo("id", None, None, None, ParameterType.Raw, None))).parameters === Set("id")
    }

    "not automatically extract parameters from a template" in {
      Template("test", "Hallo {{id}}, how is {{object}}", "desc", Map.empty).parameters === Set.empty
    }

    "create the template version correctly in" in {
      Template("test", "template JSON", "desc", Map.empty).version === "889df4c8118c30a28ed4f51674a0f19d"
    }

    "result in different template versions if the template JSON differs" in {
      Template("test", "template JSON", "desc", Map.empty).version !== Template("test",
                                                                                "template JSONs",
                                                                                "desc",
                                                                                Map.empty).version
    }

    "result in different template versions if the template parameter info differs" in {
      Template(
        id = "test",
        template = "template JSON {{id}}",
        description = "desc",
        parameterInfos = Map.empty
      ).version !== Template(
        id = "test",
        template = "template JSON {{id}}",
        description = "desc",
        parameterInfos = Map(
          "id" -> ParameterInfo("id",
                                None,
                                None,
                                secret = Some(false),
                                `type` = ParameterType.String,
                                orderIndex = None)
        )
      ).version
    }

  }

  "Template serialization" should {

    "work correctly" in {
      val originalTemplate = Template("test", "Hallo {{name}}", "desc", Map.empty)
      val bos = new ByteArrayOutputStream()
      val oos = new ObjectOutputStream(bos)
      oos.writeObject(originalTemplate)
      oos.close()

      val ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray))
      val deserializedTemplate = ois.readObject()
      ois.close()

      originalTemplate === deserializedTemplate
    }

  }

  "Template back-end JSON serialization" should {

    "work" in {
      val template = Template(
        id = "t",
        template = "{{id}}",
        description = "d",
        parameterInfos = Map.empty
      )
      Json
        .fromJson(Json.toJson(template)(Template.templatePersistenceWrites))(Template.templatePersistenceReads)
        .get === template
    }

  }

} 
Example 93
Source File: JavaSerializationBenchmark.scala    From scala-commons   with MIT License 5 votes vote down vote up
package com.avsystem.commons
package rpc.akka.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Fork, Measurement, Mode, Scope, State, Warmup}
import org.openjdk.jmh.infra.Blackhole


@Warmup(iterations = 5)
@Measurement(iterations = 20)
@Fork(1)
@BenchmarkMode(Array(Mode.Throughput))
@State(Scope.Thread)
class JavaSerializationBenchmark {

  val something = Something(42, Nested(4 :: 8 :: 15 :: 16 :: 23 :: 42 :: Nil, 0), "lol")
  val array = {
    val baos = new ByteArrayOutputStream()
    val o = new ObjectOutputStream(baos)

    o.writeObject(something)
    o.close()

    baos.toByteArray
  }

  @Benchmark
  def byteStringOutput(): Something = {
    val baos = new ByteArrayOutputStream()
    val o = new ObjectOutputStream(baos)

    o.writeObject(something)
    o.close()

    val array = baos.toByteArray

    new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something]
  }

  @Benchmark
  def writeTest(): Array[Byte] = {
    val baos = new ByteArrayOutputStream()
    val o = new ObjectOutputStream(baos)

    o.writeObject(something)
    o.close()

    baos.toByteArray
  }

  @Benchmark
  def readTest(): Something = {
    new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something]
  }
} 
Example 94
Source File: StreamInputOutputBenchmark.scala    From scala-commons   with MIT License 5 votes vote down vote up
package com.avsystem.commons
package ser

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import com.avsystem.commons.serialization.{GenCodec, StreamInput, StreamOutput}
import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Fork, Measurement, Mode, Scope, State, Warmup}
import org.openjdk.jmh.infra.Blackhole


case class Toplevel(int: Int, nested: Nested, str: String)
case class Nested(list: List[Int], int: Int)

object Toplevel {
  implicit val nestedCodec: GenCodec[Nested] = GenCodec.materialize[Nested]
  implicit val codec: GenCodec[Toplevel] = GenCodec.materialize[Toplevel]
}

@Warmup(iterations = 10)
@Measurement(iterations = 20)
@Fork(1)
@BenchmarkMode(Array(Mode.Throughput))
@State(Scope.Thread)
class StreamInputOutputBenchmark {

  val something = Toplevel(35, Nested(List(121, 122, 123, 124, 125, 126), 53), "lol")

  val inputArray: Array[Byte] = {
    val os = new ByteArrayOutputStream()

    GenCodec.write(new StreamOutput(new DataOutputStream(os)), something)
    os.toByteArray
  }

  @Benchmark
  def testEncode(bh: Blackhole): Unit = {
    val os = new ByteArrayOutputStream(inputArray.length)
    val output = new StreamOutput(new DataOutputStream(os))
    GenCodec.write(output, something)
    bh.consume(os.toByteArray)
  }

  @Benchmark
  def testDecode(bh: Blackhole): Unit = {
    val is = new DataInputStream(new ByteArrayInputStream(inputArray))
    val input = new StreamInput(is)
    bh.consume(GenCodec.read[Toplevel](input))
  }

  @Benchmark
  def testEncodeRaw(bh: Blackhole): Unit = {
    val os = new ByteArrayOutputStream(inputArray.length)
    val output = new StreamOutput(new DataOutputStream(os))
    val toplevelOutput = output.writeObject()
    toplevelOutput.writeField("int").writeSimple().writeInt(35)
    val nestedOutput = toplevelOutput.writeField("nested").writeObject()
    val listOutput = nestedOutput.writeField("list").writeList()
    listOutput.writeElement().writeSimple().writeInt(121)
    listOutput.writeElement().writeSimple().writeInt(122)
    listOutput.writeElement().writeSimple().writeInt(123)
    listOutput.writeElement().writeSimple().writeInt(124)
    listOutput.writeElement().writeSimple().writeInt(125)
    listOutput.writeElement().writeSimple().writeInt(126)
    listOutput.finish()
    nestedOutput.writeField("int").writeSimple().writeInt(53)
    nestedOutput.finish()
    toplevelOutput.writeField("str").writeSimple().writeString("lol")
    toplevelOutput.finish()
    bh.consume(os.toByteArray)
  }

  @Benchmark
  def testDecodeRaw(bh: Blackhole): Unit = {
    val is = new DataInputStream(new ByteArrayInputStream(inputArray))
    val input = new StreamInput(is)
    val objInput = input.readObject()
    val intField = objInput.nextField().readSimple().readInt()
    val nestedInput = objInput.nextField().readObject()
    val listInput = nestedInput.nextField().readList()
    val listNested = List(
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt(),
      listInput.nextElement().readSimple().readInt()
    )
    listInput.hasNext
    val intNested = nestedInput.nextField().readSimple().readInt()
    nestedInput.hasNext
    val strField = objInput.nextField().readSimple().readString()
    objInput.hasNext
    bh.consume(Toplevel(intField, Nested(listNested, intNested), strField))
  }
} 
Example 95
Source File: StreamGenCodecTest.scala    From scala-commons   with MIT License 5 votes vote down vote up
package com.avsystem.commons
package serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

class StreamGenCodecTest extends GenCodecRoundtripTest {
  type Raw = Array[Byte]

  def writeToOutput(write: Output => Unit): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    write(new StreamOutput(new DataOutputStream(baos)))
    baos.toByteArray
  }

  def createInput(raw: Array[Byte]): Input =
    new StreamInput(new DataInputStream(new ByteArrayInputStream(raw)))
} 
Example 96
Source File: MessageSerializationSuite.scala    From bahir   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.pubnub

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.ObjectInputStream
import java.io.ObjectOutputStream

import com.google.gson.JsonParser
import com.pubnub.api.models.consumer.pubsub.PNMessageResult

import org.apache.spark.SparkFunSuite

class MessageSerializationSuite extends SparkFunSuite {
  test("Full example") {
    checkMessageSerialization(
      "{\"message\":\"Hello, World!\"}", "channel1",
      "publisher1", "subscription1", System.currentTimeMillis * 10000
    )
  }

  test("Message from channel") {
    checkMessageSerialization("{\"message\":\"Hello, World!\"}", "c", "p", null, 13534398158620385L)
  }

  test("Message from subscription") {
    checkMessageSerialization("{\"message\":\"Hello, World!\"}", null, "p", "s", 13534397812467596L)
  }

  def checkMessageSerialization(payload: String, channel: String,
      publisher: String, subscription: String, timestamp: Long): Unit = {
    val builder = PNMessageResult.builder
      .message(if (payload != null) new JsonParser().parse(payload) else null)
      .channel(channel)
      .publisher(publisher)
      .subscription(subscription)
      .timetoken(timestamp)
    val pubNubMessage = builder.build()
    val sparkMessage = new SparkPubNubMessage
    sparkMessage.message = pubNubMessage

    // serializer
    val byteOutStream = new ByteArrayOutputStream
    val outputStream = new ObjectOutputStream(byteOutStream)
    outputStream.writeObject(sparkMessage)
    outputStream.flush()
    outputStream.close()
    byteOutStream.close()
    val serializedBytes = byteOutStream.toByteArray

    // deserialize
    val byteInStream = new ByteArrayInputStream(serializedBytes)
    val inputStream = new ObjectInputStream(byteInStream)
    val deserializedMessage = inputStream.readObject().asInstanceOf[SparkPubNubMessage]
    inputStream.close()
    byteInStream.close()

    assert(payload.equals(deserializedMessage.getPayload))
    if (channel != null) {
      assert(channel.equals(deserializedMessage.getChannel))
    } else {
      assert(deserializedMessage.getChannel == null)
    }
    if (subscription != null) {
      assert(subscription.equals(deserializedMessage.getSubscription))
    } else {
      assert(deserializedMessage.getSubscription == null)
    }
    assert(publisher.equals(deserializedMessage.getPublisher))
    val unixTimestamp = Math.ceil(timestamp / 10000).longValue()
    assert(unixTimestamp.equals(deserializedMessage.getTimestamp))
  }
} 
Example 97
Source File: TDMLInfosetOutputter.scala    From incubator-daffodil   with Apache License 2.0 5 votes vote down vote up
package org.apache.daffodil.tdml

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream

import org.apache.daffodil.infoset.DIArray
import org.apache.daffodil.infoset.DIComplex
import org.apache.daffodil.infoset.DISimple
import org.apache.daffodil.infoset.InfosetOutputter
import org.apache.daffodil.infoset.JDOMInfosetInputter
import org.apache.daffodil.infoset.JDOMInfosetOutputter
import org.apache.daffodil.infoset.JsonInfosetInputter
import org.apache.daffodil.infoset.JsonInfosetOutputter
import org.apache.daffodil.infoset.ScalaXMLInfosetInputter
import org.apache.daffodil.infoset.ScalaXMLInfosetOutputter
import org.apache.daffodil.infoset.W3CDOMInfosetInputter
import org.apache.daffodil.infoset.W3CDOMInfosetOutputter
import org.apache.daffodil.infoset.XMLTextInfosetInputter
import org.apache.daffodil.infoset.XMLTextInfosetOutputter

class TDMLInfosetOutputter() extends InfosetOutputter {

  private def implString: String = "daffodil"

  private val jsonStream = new ByteArrayOutputStream()
  private val xmlStream = new ByteArrayOutputStream()

  private val scalaOut = new ScalaXMLInfosetOutputter()
  private val jdomOut = new JDOMInfosetOutputter()
  private val w3cdomOut = new W3CDOMInfosetOutputter()
  private val jsonOut = new JsonInfosetOutputter(jsonStream, false)
  private val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)

  private val outputters = Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)

  override def reset(): Unit = {
    outputters.foreach(_.reset())
  }

  override def startSimple(simple: DISimple): Boolean = {
    if (!outputters.forall(_.startSimple(simple)))
      throw TDMLException("startSimple failed", Some(implString))
    true
  }

  override def endSimple(simple: DISimple): Boolean = {
    if (!outputters.forall(_.endSimple(simple)))
      throw TDMLException("endSimple failed", Some(implString))
    true
  }

  override def startComplex(complex: DIComplex): Boolean = {
    if (!outputters.forall(_.startComplex(complex)))
      throw TDMLException("startComplex failed", Some(implString))
    true
  }

  override def endComplex(complex: DIComplex): Boolean = {
    if (!outputters.forall(_.endComplex(complex)))
      throw TDMLException("endComplex failed", Some(implString))
    true
  }

  override def startArray(array: DIArray): Boolean = {
    if (!outputters.forall(_.startArray(array)))
      throw TDMLException("startArray failed", Some(implString))
    true
  }

  override def endArray(array: DIArray): Boolean = {
    if (!outputters.forall(_.endArray(array)))
      throw TDMLException("endArray failed", Some(implString))
    true
  }

  override def startDocument(): Boolean = {
    if (!outputters.forall(_.startDocument()))
      throw TDMLException("startDocument failed", Some(implString))
    true
  }

  override def endDocument(): Boolean = {
    if (!outputters.forall(_.endDocument()))
      throw TDMLException("endDocument failed", Some(implString))
    true
  }

  def getResult() = scalaOut.getResult

  def toInfosetInputter() = {
    val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
    val jdomIn = new JDOMInfosetInputter(jdomOut.getResult)
    val w3cdomIn = new W3CDOMInfosetInputter(w3cdomOut.getResult)
    val jsonIn = new JsonInfosetInputter(new ByteArrayInputStream(jsonStream.toByteArray))
    val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray))
    new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn))
  }
} 
Example 98
Source File: Implicits.scala    From incubator-daffodil   with Apache License 2.0 5 votes vote down vote up
package org.apache.daffodil

import java.io.{ ByteArrayInputStream, BufferedInputStream }

import org.apache.daffodil.xml.NS
import org.apache.daffodil.exceptions.Assert
import scala.language.reflectiveCalls
import scala.language.implicitConversions
import scala.language.{ implicitConversions, reflectiveCalls } // silences scala 2.10 warnings

object Implicits {

  object ImplicitsSuppressUnusedImportWarning {
    def apply() = if (scala.math.random.isNaN()) Assert.impossible()
  }

  
  def intercept[T <: AnyRef](body: => Any)(implicit tag: scala.reflect.ClassTag[T]): T = {
    val clazz = tag.runtimeClass.asInstanceOf[Class[T]]
    val caught = try {
      body
      None
    } catch {
      case npe: NullPointerException => throw npe
      case s: scala.util.control.ControlThrowable => throw s
      case u: Throwable => {
        if (!clazz.isAssignableFrom(u.getClass)) {
          throw new InterceptFailedException(
            "Failed to intercept expected exception. Expected '%s' but got '%s'.".format(clazz.getName, u.getClass.getName))
        } else {
          Some(u)
        }
      }
    }
    caught match {
      case None => throw new InterceptFailedException("Failed to intercept any exceptions.")
      case Some(e) => e.asInstanceOf[T]
    }
  }

  class InterceptFailedException(msg: String) extends RuntimeException(msg)

} 
Example 99
Source File: TestSerializationAndLazy.scala    From incubator-daffodil   with Apache License 2.0 5 votes vote down vote up
package org.apache.daffodil.util

import org.junit.Assert._
import java.io.ByteArrayOutputStream
import java.io.ObjectOutputStream
import java.io.ByteArrayInputStream
import java.io.ObjectInputStream
import org.junit.Test

class ToSerialize extends Serializable {

  val v = 5
  var lazyValWasEvaluated = false
  lazy val x = {
    // println("v is " + v)
    lazyValWasEvaluated = true
    2 * v
  }

}


class TestSerializationAndLazy {

  @Test
  def testSerializeBeforeLazyEval(): Unit = {
    val instance = new ToSerialize
    val baos = new ByteArrayOutputStream
    val stream = new ObjectOutputStream(baos)
    stream.writeObject(instance)
    stream.flush()
    stream.close()
    assertFalse(instance.lazyValWasEvaluated)
    val ba = baos.toByteArray()
    val bais = new ByteArrayInputStream(ba)
    val istream = new ObjectInputStream(bais)
    val restoredInstance = istream.readObject()
    istream.close()
    assertTrue(restoredInstance.isInstanceOf[ToSerialize])
    val ts = restoredInstance.asInstanceOf[ToSerialize]
    assertFalse(ts.lazyValWasEvaluated)
    ts.x
    assertTrue(ts.lazyValWasEvaluated)
  }

} 
Example 100
Source File: TarFlowSpec.scala    From nexus   with Apache License 2.0 5 votes vote down vote up
package ch.epfl.bluebrain.nexus.storage

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Path, Paths}

import akka.actor.ActorSystem
import akka.stream.alpakka.file.scaladsl.Directory
import akka.stream.scaladsl.{FileIO, Source}
import akka.testkit.TestKit
import akka.util.ByteString
import ch.epfl.bluebrain.nexus.storage.utils.{EitherValues, IOEitherValues, Randomness}
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.io.FileUtils
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatest.{BeforeAndAfterAll, Inspectors, OptionValues}

import scala.annotation.tailrec

class TarFlowSpec
    extends TestKit(ActorSystem("TarFlowSpec"))
    with AnyWordSpecLike
    with Matchers
    with IOEitherValues
    with Randomness
    with EitherValues
    with OptionValues
    with Inspectors
    with BeforeAndAfterAll {

  val basePath = Files.createTempDirectory("tarflow")
  val dir1     = basePath.resolve("one")
  val dir2     = basePath.resolve("two")

  override def afterAll(): Unit = {
    super.afterAll()
    FileUtils.cleanDirectory(basePath.toFile)
    ()
  }

  type PathAndContent = (Path, String)

  "A TarFlow" should {

    Files.createDirectories(dir1)
    Files.createDirectories(dir2)

    def relativize(path: Path): String = basePath.getParent().relativize(path).toString

    "generate the byteString for a tar file correctly" in {
      val file1        = dir1.resolve("file1.txt")
      val file1Content = genString()
      val file2        = dir1.resolve("file3.txt")
      val file2Content = genString()
      val file3        = dir2.resolve("file3.txt")
      val file3Content = genString()
      val files        = List(file1 -> file1Content, file2 -> file2Content, file3 -> file3Content)
      forAll(files) {
        case (file, content) => Source.single(ByteString(content)).runWith(FileIO.toPath(file)).futureValue
      }
      val byteString   = Directory.walk(basePath).via(TarFlow.writer(basePath)).runReduce(_ ++ _).futureValue
      val bytes        = new ByteArrayInputStream(byteString.toArray)
      val tar          = new TarArchiveInputStream(bytes)

      @tailrec def readEntries(
          tar: TarArchiveInputStream,
          entries: List[PathAndContent] = Nil
      ): List[PathAndContent] = {
        val entry = tar.getNextTarEntry
        if (entry == null) entries
        else {
          val data = Array.ofDim[Byte](entry.getSize.toInt)
          tar.read(data)
          readEntries(tar, (Paths.get(entry.getName) -> ByteString(data).utf8String) :: entries)
        }
      }
      val directories = List(relativize(basePath) -> "", relativize(dir1) -> "", relativize(dir2) -> "")
      val untarred    = readEntries(tar).map { case (path, content) => path.toString -> content }
      val expected    = files.map { case (path, content) => relativize(path) -> content } ++ directories
      untarred should contain theSameElementsAs expected
    }
  }

} 
Example 101
Source File: ProcessBuilderUtils.scala    From scalastringcourseday7   with Apache License 2.0 5 votes vote down vote up
package util

import java.io.ByteArrayInputStream
import java.nio.charset.{Charset, CodingErrorAction}

import text.StringOption

import scala.collection.mutable.ListBuffer
import scala.io.{Codec, Source}
import scala.sys.process.ProcessBuilder


object ProcessBuilderUtils {
  implicit def processToProcessUtils(repr: ProcessBuilder): ProcessBuilderUtils = {
    new ProcessBuilderUtils(repr)
  }
}

class ProcessBuilderUtils(repr: ProcessBuilder) {
  def lineStream(encoding: Charset,
                 onMalformedInput: CodingErrorAction,
                 onUnmappableCharacter: CodingErrorAction,
                 replacementOpt: StringOption): Iterator[String] = {
    val lines: Iterator[String] = repr.lineStream_!.iterator
    val byteBuffer = ListBuffer.empty[Byte]
    while (lines.hasNext) {
      val line: String = lines.next.trim concat "\n"
      byteBuffer ++= line.getBytes
    }
    implicit val codec = Codec(encoding).
      onMalformedInput(onMalformedInput).
      onUnmappableCharacter(onUnmappableCharacter)
    if (replacementOpt.nonEmpty) {
      codec.decodingReplaceWith(replacementOpt.get)
    }
    Source.fromInputStream(new ByteArrayInputStream(byteBuffer.toArray)).getLines
  }
} 
Example 102
Source File: H2OLoader.scala    From ForestFlow   with Apache License 2.0 5 votes vote down vote up
package ai.forestflow.serving.impl

import java.io.{ByteArrayInputStream, FileReader}
import java.nio.file.Paths

import ai.forestflow.serving.MLFlow.H2OMLFlowSpec
import ai.forestflow.serving.interfaces.Loader
import cats.syntax.either._
import ai.forestflow.domain.{FQRV, FlavorShim, ServableSettings}
import ai.forestflow.serving.MLFlow.H2OMLFlowSpec
import ai.forestflow.serving.interfaces.Loader
import hex.genmodel.MojoReaderBackendFactory
import hex.genmodel.MojoReaderBackendFactory.CachingStrategy
import io.circe.{Error, yaml}

trait H2OLoader extends Loader {
  def version: Option[String]
  override def createServable(servableBinary: Array[Byte], fqrv: FQRV, settings: ServableSettings)(implicit eCTX: EnvironmentContext): H2OServable = {
    import hex.genmodel.MojoModel

    val mojoReader = MojoReaderBackendFactory.createReaderBackend(
      new ByteArrayInputStream(servableBinary),
      CachingStrategy.MEMORY)

    H2OServable(MojoModel.load(mojoReader), fqrv, settings)
  }
}

case class MLFlowH2OLoader(dataPath: String, version: Option[String]) extends H2OLoader {

  override def getRelativeServablePath(implicit eCTX: EnvironmentContext): String = {
    val json = yaml.parser.parse(new FileReader(Paths.get(eCTX.localDir.getAbsolutePath, dataPath, "h2o.yaml").toFile)) // TODO move "h2o.yaml" constant to configuration

    val h2oSpec = json
      .leftMap(err => err: Error)
      .flatMap(_.as[H2OMLFlowSpec])
      .valueOr(throw _)

    Paths.get(dataPath, h2oSpec.modelFile).toString
  }
}


trait BasicH2OMojoLoader extends H2OLoader  {
  this : FlavorShim with Loader =>
  val mojoPath: String
  val version: Option[String]

  override def getRelativeServablePath(implicit eCTX: EnvironmentContext): String = mojoPath
} 
Example 103
Source File: ParseTests.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.benchmark

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit

import coursier.maven.MavenRepository
import coursier.moduleString
import org.apache.maven.model.io.xpp3.MavenXpp3Reader
import org.openjdk.jmh.annotations._

import scala.concurrent.Await
import scala.concurrent.duration.Duration

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MILLISECONDS)
class ParseTests {

  @Benchmark
  def parseSparkParent(state: TestState): Unit = {
    val t = state.repositories.head.find(
      mod"org.apache.spark:spark-parent_2.12",
      "2.4.0",
      state.fetcher
    ).run
    val e = Await.result(t.future()(state.ec), Duration.Inf)
    assert(e.isRight)
  }

  @Benchmark
  def parseSparkParentXmlDom(state: TestState): Unit = {
    val content = state.inMemoryCache.fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom")
    val res = MavenRepository.parseRawPomDom(content)
    assert(res.isRight)
  }

  @Benchmark
  def parseSparkParentXmlSax(state: TestState): Unit = {
    val content = state.inMemoryCache.fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom")
    val res = MavenRepository.parseRawPomSax(content)
    assert(res.isRight)
  }

  @Benchmark
  def parseApacheParent(state: TestState): Unit = {
    val t = state.repositories.head.find(
      mod"org.apache:apache",
      "18",
      state.fetcher
    ).run
    val e = Await.result(t.future()(state.ec), Duration.Inf)
    assert(e.isRight)
  }

  @Benchmark
  def parseSparkParentMavenModel(state: TestState): Unit = {
    val b = state
      .inMemoryCache
      .fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom")
      .getBytes(StandardCharsets.UTF_8)
    val reader = new MavenXpp3Reader
    val model = reader.read(new ByteArrayInputStream(b))
  }

} 
Example 104
Source File: ZipTests.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.cli.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Random
import java.util.zip.{Deflater, ZipEntry, ZipInputStream, ZipOutputStream}

import coursier.launcher.internal.Zip
import org.junit.runner.RunWith
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatestplus.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class ZipTests extends AnyFlatSpec {

  "zipEntries" should "be fine with custom deflaters" in {

    // Inspired by https://github.com/spring-projects/spring-boot/commit/a50646b7cc3ad941e748dfb450077e3a73706205#diff-2297c301250b25e3b80301c58daf3ea0R621

    val baos = new ByteArrayOutputStream
    val output = new ZipOutputStream(baos) {
      `def` = new Deflater(Deflater.NO_COMPRESSION, true)
    }
    val data = Array.ofDim[Byte](1024 * 1024)
    new Random().nextBytes(data)
    val entry = new ZipEntry("entry.dat")
    output.putNextEntry(entry)
    output.write(data)
    output.closeEntry()
    output.close()

    val result = baos.toByteArray

    val zos = new ZipOutputStream(new ByteArrayOutputStream)
    val entryNames = Zip.zipEntries(new ZipInputStream(new ByteArrayInputStream(result)))
      .map {
        case (ent, content) =>
          println(ent.getCompressedSize)
          val name = ent.getName
          zos.putNextEntry(ent)
          zos.write(content)
          zos.closeEntry()
          name
      }
      .toVector
    zos.close()
    assert(entryNames == Vector("entry.dat"))
  }

} 
Example 105
Source File: VectorGrid.scala    From osmesa   with Apache License 2.0 5 votes vote down vote up
package osmesa.analytics

import java.io.ByteArrayInputStream
import java.net.URI
import java.util.zip.GZIPInputStream

import geotrellis.proj4.WebMercator
import geotrellis.spark.tiling.ZoomedLayoutScheme
import geotrellis.vector.{Extent, PointFeature}
import geotrellis.vectortile.{Layer, VInt64, VectorTile}
import org.apache.commons.io.IOUtils
import org.apache.spark.internal.Logging
import osmesa.analytics.updater.Implicits._
import osmesa.analytics.updater._

import scala.collection.GenMap
import scala.collection.parallel.TaskSupport

trait VectorGrid extends Logging {
  // Default base zoom (highest resolution tiles produced)
  val DefaultBaseZoom: Int = 10

  // Number of cells per side in a gridded tile
  implicit val Cells: Int = 128

  // Number of cells in a gridded tile at the base of the pyramid (may be used for over-zooming)
  val BaseCells: Int = Cells

  // Default upload concurrency
  val DefaultUploadConcurrency: Int = 8

  implicit val LayoutScheme: ZoomedLayoutScheme = ZoomedLayoutScheme(WebMercator)
  val SequenceLayerName: String = "__sequences__"

  def getCommittedSequences(tile: VectorTile): Set[Int] =
    // NOTE when working with hashtags, this should be the changeset sequence, since changes from a
    // single sequence may appear in different batches depending on when changeset metadata arrives
    tile.layers
      .get(SequenceLayerName)
      .map(_.features.flatMap(f => f.data.values.map(valueToLong).map(_.intValue)))
      .map(_.toSet)
      .getOrElse(Set.empty)

  def makeSequenceLayer(sequences: Set[Int], extent: Extent, tileWidth: Int = 4096): (String, Layer) = {
    // create a second layer w/ a feature corresponding to committed sequences (in the absence of
    // available tile / layer metadata)
    val updatedSequences =
      sequences.toSeq.sorted
        .takeRight(1000)
        .zipWithIndex
        .map {
          case (seq, idx) =>
            idx.toString -> VInt64(seq)
        }
        .toMap

    val sequenceFeature = PointFeature(extent.center, updatedSequences)

    makeLayer(SequenceLayerName, extent, Seq(sequenceFeature), tileWidth)
  }

  def loadMVTs(urls: Map[URI, Extent])(
      implicit taskSupport: TaskSupport): GenMap[URI, VectorTile] = {
    // convert to a parallel collection to load more tiles concurrently
    val parUrls = urls.par
    parUrls.tasksupport = taskSupport

    parUrls.map {
      case (uri, extent) =>
        (uri,
         read(uri).map(
           bytes =>
             VectorTile.fromBytes(
               IOUtils.toByteArray(new GZIPInputStream(new ByteArrayInputStream(bytes))),
               extent)))
    } filter {
      case (_, mvt) => mvt.isDefined
    } map {
      case (uri, mvt) => uri -> mvt.get
    }
  }
} 
Example 106
Source File: CodecSpec.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream}

import is.hail.annotations.{Region, RegionValue}
import is.hail.asm4s.{Code, TypeInfo, Value}
import is.hail.expr.ir.{EmitClassBuilder, EmitFunctionBuilder, ExecuteContext, typeToTypeInfo}
import is.hail.types.encoded.EType
import is.hail.types.physical.PType
import is.hail.types.virtual.Type
import is.hail.rvd.RVDContext
import is.hail.sparkextras.ContextRDD
import is.hail.utils.using
import org.apache.spark.rdd.RDD

trait AbstractTypedCodecSpec extends Spec {
  def encodedType: EType
  def encodedVirtualType: Type

  type StagedEncoderF[T] = (Value[Region], Value[T], Value[OutputBuffer]) => Code[Unit]
  type StagedDecoderF[T] = (Value[Region], Value[InputBuffer]) => Code[T]

  def buildEncoder(ctx: ExecuteContext, t: PType): (OutputStream) => Encoder

  def decodedPType(requestedType: Type): PType

  def buildDecoder(ctx: ExecuteContext, requestedType: Type): (PType, (InputStream) => Decoder)

  def encode(ctx: ExecuteContext, t: PType, offset: Long): Array[Byte] = {
    val baos = new ByteArrayOutputStream()
    using(buildEncoder(ctx, t)(baos))(_.writeRegionValue(offset))
    baos.toByteArray
  }

  def decode(ctx: ExecuteContext, requestedType: Type, bytes: Array[Byte], region: Region): (PType, Long) = {
    val bais = new ByteArrayInputStream(bytes)
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, dec(bais).readRegionValue(region))
  }

  def buildCodeInputBuffer(is: Code[InputStream]): Code[InputBuffer]

  def buildCodeOutputBuffer(os: Code[OutputStream]): Code[OutputBuffer]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_]): (PType, StagedDecoderF[T])

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_]): StagedEncoderF[T]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_], ti: TypeInfo[T]): (PType, StagedDecoderF[T]) = {
    val (ptype, dec) = buildEmitDecoderF[T](requestedType, cb)
    assert(ti == typeToTypeInfo(requestedType))
    ptype -> dec
  }

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_], ti: TypeInfo[T]): StagedEncoderF[T] = {
    assert(ti == typeToTypeInfo(t))
    buildEmitEncoderF[T](t, cb)
  }

  // FIXME: is there a better place for this to live?
  def decodeRDD(ctx: ExecuteContext, requestedType: Type, bytes: RDD[Array[Byte]]): (PType, ContextRDD[Long]) = {
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, ContextRDD.weaken(bytes).cmapPartitions { (ctx, it) =>
      RegionValue.fromBytes(dec, ctx.region, it)
    })
  }

  override def toString: String = super[Spec].toString
} 
Example 107
Source File: StringTests.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.util.string.test

import java.io.{ByteArrayInputStream, InputStream}

import org.scalatest.{FunSpec, Matchers}
import cmwell.util.string._

class StringTests extends FunSpec with Matchers {
  private def mkString(is: InputStream) = {
    val buffSrc = scala.io.Source.fromInputStream(is)
    val res = buffSrc.mkString
    buffSrc.close()
    res
  }

  describe("mapInputStreamLines should") {
    it("return empty for empty input") {
      val input = new ByteArrayInputStream(Array.emptyByteArray)
      val result = mapInputStreamLines(input)(identity)
      result.read() should be(-1)
      input.close()
      result.close()
    }
    it("provide the delimiter as well") {
      val delim = '\n'
      val s = "provide the\ndelimiter as well"
      val expectedAmount = s.count(delim.==)

      val input = stringToInputStream(s)
      val result = mapInputStreamLines(input)(_.toUpperCase)
      mkString(result).count(delim.==) should be(expectedAmount)
      input.close()
      result.close()
    }
    it("not end with the delimiter") {
      val input = stringToInputStream("not end with\nthe delimiter")
      val result = mapInputStreamLines(input)(_.toUpperCase)
      mkString(result).last should be('R')
      input.close()
      result.close()
    }
    it("handle a concat mapper") {
      val input = stringToInputStream("handle\na\nconcat\nmapper")
      val result = mapInputStreamLines(input)(_ + " not")
      mkString(result) should be("handle not\na not\nconcat not\nmapper not")
      input.close()
      result.close()
    }
  }

} 
Example 108
Source File: S3KVPersisted.scala    From fotm-info   with MIT License 5 votes vote down vote up
package info.fotm.util

import java.io.ByteArrayInputStream

import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.{GetObjectRequest, ObjectListing, ObjectMetadata, S3ObjectInputStream}
import com.amazonaws.util.IOUtils
import com.twitter.bijection.Bijection

import scala.collection.JavaConverters._
import scala.collection.breakOut
import scala.util.Try

class S3KVPersisted[K, V](bucket: String, keyPathBijection: Bijection[K, String])
                         (implicit valueSerializer: Bijection[V, Array[Byte]])
  extends Persisted[Map[K, V]] {

  val s3client = new AmazonS3Client()

  override def save(state: Map[K, V]): Try[Unit] = Try {
    for ((k, v) <- state) {
      val path: String = keyPathBijection(k)
      val bytes = valueSerializer(v)
      val stream = new ByteArrayInputStream(bytes)
      val meta = new ObjectMetadata()
      meta.setContentLength(bytes.length)
      s3client.putObject(bucket, path, stream, meta)
    }
  }

  override def fetch(): Try[Map[K, V]] = Try {
    val listing: ObjectListing = s3client.listObjects(bucket)
    val bucketEntries = listing.getObjectSummaries.asScala.toList
    val s3keys = bucketEntries.map(_.getKey)

    val result: Map[K, V] = (
      for (s3key <- s3keys) yield {
        println(s"Loading $s3key...")
        val request = new GetObjectRequest(bucket, s3key)
        val s3object = s3client.getObject(request)
        val objectData: S3ObjectInputStream = s3object.getObjectContent
        val bytes = IOUtils.toByteArray(objectData)
        objectData.close()
        println(s"Loaded $s3key! Deserializing...")
        val k = keyPathBijection.inverse(s3key)
        val v = valueSerializer.inverse(bytes)
        println(s"Done with $s3key.")
        (k, v)
      })(breakOut)

    result
  }
} 
Example 109
Source File: SubEntryTest.scala    From lila-openingexplorer   with GNU Affero General Public License v3.0 5 votes vote down vote up
package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import org.specs2.mutable._

import chess.{ Color, Pos }
import chess.format.Uci

class SubEntryTest extends Specification {

  private def pipe(entry: SubEntry): SubEntry = {
    val out = new ByteArrayOutputStream()
    entry.write(out)

    val in = new ByteArrayInputStream(out.toByteArray)
    SubEntry.read(in)
  }

  "master database packer" should {

    "pack a single game" in {
      val ref   = GameRef("ref00000", Some(Color.White), SpeedGroup.Blitz, 1230)
      val entry = SubEntry.fromGameRef(ref, Left(Uci.Move(Pos.E2, Pos.E4)))

      pipe(entry).gameRefs mustEqual List(ref)
    }

    "pack two games" in {
      val move  = Left(Uci.Move(Pos.D2, Pos.D4))
      val g1    = GameRef("g0000001", Some(Color.Black), SpeedGroup.Classical, 2300)
      val g2    = GameRef("g0000002", None, SpeedGroup.Classical, 2455)
      val entry = SubEntry.fromGameRef(g1, move).withGameRef(g2, move)

      pipe(entry).gameRefs mustEqual List(g2, g1)
    }
  }
} 
Example 110
Source File: PackHelperTest.scala    From lila-openingexplorer   with GNU Affero General Public License v3.0 5 votes vote down vote up
package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import org.specs2.mutable._
import chess.format.Uci
import chess.Pos
import chess.{ King, Rook }

class PackHelperTest extends Specification with PackHelper {

  def pipeMove(move: Either[Uci.Move, Uci.Drop]): Either[Uci.Move, Uci.Drop] = {
    val out = new ByteArrayOutputStream()
    writeUci(out, move)

    val in = new ByteArrayInputStream(out.toByteArray)
    readUci(in)
  }

  "the pack helper" should {
    "correctly pack moves" in {
      val move = Uci.Move(Pos.E2, Pos.E3)
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack promotions" in {
      val move = Uci.Move(Pos.A7, Pos.A8, Some(Rook))
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack drops" in {
      val drop = Uci.Drop(King, Pos.H3)
      pipeMove(Right(drop)) mustEqual Right(drop)
    }
  }

  List(7, 127, 128, 129, 254, 255, 256, 257, 1234, 864197252500L).foreach { x =>
    "correctly pack uint: " + x in {
      val out = new ByteArrayOutputStream()
      writeUint(out, x)

      val in = new ByteArrayInputStream(out.toByteArray)
      readUint(in) mustEqual x
    }
  }
} 
Example 111
Source File: Json4sSerialization.scala    From kafka-serialization   with Apache License 2.0 5 votes vote down vote up
package com.ovoenergy.kafka.serialization.json4s

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.ovoenergy.kafka.serialization.core._
import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import org.json4s.Formats
import org.json4s.native.Serialization.{read, write}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._

trait Json4sSerialization {

  def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO Use scala-arm
    try {
      write(data, writer)
      writer.flush()
    } finally {
      writer.close()
    }
    bout.toByteArray
  }

  def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) =>
    val tt = implicitly[TypeTag[T]]
    implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe))
    read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8))
  }

} 
Example 112
Source File: GenericAvroSerializerSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 113
Source File: MqttConfig.scala    From akka-iot-mqtt-v2   with GNU Lesser General Public License v3.0 5 votes vote down vote up
package akkaiot

import scala.concurrent.duration._

import java.io.Serializable
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.ObjectInputStream
import java.io.ObjectOutputStream

import com.sandinh.paho.akka._
import com.sandinh.paho.akka.MqttPubSub._

object MqttConfig {
  val topic = "akka-iot-mqtt-topic"

  // Pub-Sub config
  val psConfig = PSConfig(
    brokerUrl = "tcp://test.mosquitto.org:1883",
    userName = null,
    password = null,
    stashTimeToLive = 1.minute,
    stashCapacity = 8000,
    reconnectDelayMin = 10.millis,
    reconnectDelayMax = 30.seconds,
    cleanSession = false
  )

  // Serialize object to byte array
  def writeToByteArray(obj: Any): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    val oos = new ObjectOutputStream(baos)
    try {
      oos.writeObject(obj)
      baos.toByteArray
    } finally {
      try {
        oos.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }

  // Deserialize object from byte array
  def readFromByteArray[A](bytes: Array[Byte]): A = {
    val bais = new ByteArrayInputStream(bytes)
    val ois = new ObjectInputStream(bais)
    try {
      val obj = ois.readObject
      obj.asInstanceOf[A]
    } finally {
      try {
        ois.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }
} 
Example 114
Source File: Sedes.scala    From shc   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.hbase

import java.io.ByteArrayInputStream

import org.apache.avro.Schema
import org.apache.avro.Schema.Type._
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io._
import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.types._

trait Sedes {
  def serialize(value: Any): Array[Byte]
  def deserialize(bytes: Array[Byte], start: Int, end: Int): Any
}

class DoubleSedes extends Sedes {
  override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double])
  override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = {
    Bytes.toLong(bytes, start)
  }
} 
Example 115
Source File: package.scala    From pulsar4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.pulsar4s

import java.io.ByteArrayOutputStream
import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import com.sksamuel.avro4s.AvroSchema
import com.sksamuel.avro4s.AvroInputStream
import com.sksamuel.avro4s.AvroOutputStream
import com.sksamuel.avro4s.Decoder
import com.sksamuel.avro4s.Encoder
import com.sksamuel.avro4s.SchemaFor
import org.apache.pulsar.client.api.Schema
import org.apache.pulsar.common.schema.{SchemaInfo, SchemaType}

import scala.annotation.implicitNotFound


package object avro {

  @implicitNotFound("No Avro Schema for type ${T} found.")
  implicit def avroSchema[T: Manifest: SchemaFor: Encoder: Decoder]: Schema[T] = new Schema[T] {

    val schema: org.apache.avro.Schema = AvroSchema[T]

    override def clone(): Schema[T] = this

    override def encode(t: T): Array[Byte] = {
      val baos = new ByteArrayOutputStream
      val aos = AvroOutputStream.binary[T].to(baos).build(schema)
      aos.write(t)
      aos.flush()
      aos.close()
      baos.toByteArray()
    }

    override def decode(bytes: Array[Byte]): T = {
      val bais = new ByteArrayInputStream(bytes)
      val ais = AvroInputStream.binary[T].from(bais).build(schema)
      val first = ais.iterator.next()
      ais.close()
      first
    }

    override def getSchemaInfo: SchemaInfo =
      new SchemaInfo()
        .setName(manifest[T].runtimeClass.getCanonicalName)
        .setType(SchemaType.AVRO)
        .setSchema(schema.toString.getBytes(StandardCharsets.UTF_8))
  }
} 
Example 116
Source File: Logger.scala    From c4proto   with Apache License 2.0 5 votes vote down vote up
package ee.cone.c4actor_logback_impl

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Path, Paths}
import java.nio.charset.StandardCharsets.UTF_8

import ch.qos.logback.classic.LoggerContext
import ch.qos.logback.classic.joran.JoranConfigurator
import com.typesafe.scalalogging.LazyLogging
import ee.cone.c4actor._
import ee.cone.c4di.c4
import org.slf4j.LoggerFactory

import scala.annotation.tailrec

@c4("BasicLoggingApp") final class LoggerTest extends Executable with Early with LazyLogging {
  def run(): Unit = if(Option(System.getenv("C4LOGBACK_TEST")).nonEmpty) iteration(0L)
  @tailrec private def iteration(v: Long): Unit = {
    Thread.sleep(1000)
    logger.warn(s"logger test $v")
    logger.debug(s"logger test $v")
    iteration(v+1L)
  }
}

@c4("BasicLoggingApp") final class DefLoggerConfigurator(
  config: ListConfig,
  catchNonFatal: CatchNonFatal
) extends LoggerConfigurator(
  config.get("C4LOGBACK_XML").map(Paths.get(_)) ::: Paths.get("/tmp/logback.xml") :: Nil,
  catchNonFatal,
  5000
) with Executable with Early

class LoggerConfigurator(paths: List[Path], catchNonFatal: CatchNonFatal, scanPeriod: Long) extends Executable {
  def run(): Unit = iteration("")
  @tailrec private def iteration(wasContent: String): Unit = {
    val content =
      s"""
      <configuration>
        <statusListener class="ch.qos.logback.core.status.NopStatusListener" />
        ${paths.map(path=>if(Files.exists (path)) new String(Files.readAllBytes(path), UTF_8) else "").mkString}
        <appender name="CON" class="ch.qos.logback.core.ConsoleAppender">
          <encoder><pattern>%d{HH:mm:ss.SSS} %-5level %logger{36} - %msg%n</pattern></encoder>
        </appender>
        <appender name="ASYNСCON" class="ch.qos.logback.classic.AsyncAppender">
          <discardingThreshold>0</discardingThreshold>
          <queueSize>1000000</queueSize>
          <appender-ref ref="CON" />
        </appender>
        <root level="INFO">
          <appender-ref ref="ASYNСCON" />
        </root>
        <shutdownHook/>
      </configuration>
      """
    if(wasContent != content) reconfigure(content)
    Thread.sleep(scanPeriod)
    iteration(content)
  }
  def reconfigure(content: String): Unit = catchNonFatal{
    println("logback reconfigure 2 started")
    val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext]
    val configurator = new JoranConfigurator()
    configurator.setContext(context)
    context.reset()
    configurator.doConfigure(new ByteArrayInputStream(content.getBytes(UTF_8)))
    println("logback reconfigure 2 ok")
  }("reconfigure"){ e => () }
} 
Example 117
Source File: GenericAvroSerializerSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 118
Source File: SerializationTestHelper.scala    From xmlconfect   with Apache License 2.0 5 votes vote down vote up
package com.mthaler.xmlconfect

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream }

object SerializationTestHelper {

  
  def serializeDeserialize[T](obj: T): T = {
    val bout = new ByteArrayOutputStream()
    val out = new ObjectOutputStream(bout)
    out.writeObject(obj)
    val bin = new ByteArrayInputStream(bout.toByteArray)
    val in = new ObjectInputStream(bin)
    in.readObject().asInstanceOf[T]
  }
} 
Example 119
Source File: ToCurlConverterTest.scala    From sttp   with Apache License 2.0 5 votes vote down vote up
package sttp.client

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ToCurlConverterTest extends AnyFlatSpec with Matchers with ToCurlConverterTestExtension {
  private val localhost = uri"http://localhost"

  it should "convert base request" in {
    basicRequest
      .get(uri"$localhost")
      .toCurl shouldBe """curl -L --max-redirs 32 -X GET 'http://localhost'"""
  }

  it should "convert request with method to curl" in {
    basicRequest.get(localhost).toCurl should include("-X GET")
    basicRequest.post(localhost).toCurl should include("-X POST")
    basicRequest.put(localhost).toCurl should include("-X PUT")
    basicRequest.delete(localhost).toCurl should include("-X DELETE")
    basicRequest.patch(localhost).toCurl should include("-X PATCH")
    basicRequest.head(localhost).toCurl should include("-X HEAD")
    basicRequest.options(localhost).toCurl should include("-X OPTIONS")
  }

  it should "convert request with header" in {
    basicRequest.header("User-Agent", "myapp").get(localhost).toCurl should include(
      """-H 'User-Agent: myapp'"""
    )
  }

  it should "convert request with body" in {
    basicRequest.body(Map("name" -> "john", "org" -> "sml")).post(localhost).toCurl should include(
      """-H 'Content-Type: application/x-www-form-urlencoded' -H 'Content-Length: 17' -F 'name=john&org=sml'"""
    )
    basicRequest.body("name=john").post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 9' --data 'name=john'"""
    )
    basicRequest.body("name=john", StandardCharsets.ISO_8859_1.name()).post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=ISO-8859-1' -H 'Content-Length: 9' --data 'name=john'"""
    )
    basicRequest.body("name='john'").post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 11' --data 'name=\'john\''"""
    )
    basicRequest.body("name=\"john\"").post(localhost).toCurl should include(
      """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 11' --data 'name="john"'"""
    )
  }

  it should "convert request with options" in {
    basicRequest.followRedirects(false).get(localhost).toCurl should not include "-L"
    basicRequest.maxRedirects(11).get(localhost).toCurl should include("--max-redirs 11")
  }

  it should "put placeholder when sending binary data" in {
    val testBodyBytes = "this is the body".getBytes("UTF-8")

    val curl = basicRequest
      .post(localhost)
      .body(new ByteArrayInputStream(testBodyBytes))
      .toCurl
    curl should include("--data-binary <PLACEHOLDER>")
  }

  it should "render multipart form data if content is a plain string" in {
    basicRequest.multipartBody(multipart("k1", "v1"), multipart("k2", "v2")).post(localhost).toCurl should include(
      """--form 'k1=v1' --form 'k2=v2'"""
    )
  }
} 
Example 120
Source File: RetryWhenDefaultTest.scala    From sttp   with Apache License 2.0 5 votes vote down vote up
package sttp.client

import java.io.ByteArrayInputStream

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
import sttp.client
import sttp.model.StatusCode

class RetryWhenDefaultTest extends AnyFlatSpec with Matchers {
  private val simpleRequest = basicRequest.get(uri"http://localhost")

  it should "not retry 200 response" in {
    RetryWhen.Default(simpleRequest, Right(Response.ok(""))) shouldBe false
  }

  it should "retry 500 response" in {
    RetryWhen.Default(simpleRequest, Right(Response("", StatusCode.InternalServerError))) shouldBe true
  }

  it should "retry connection exceptions" in {
    RetryWhen.Default(simpleRequest, Left(new client.SttpClientException.ConnectException(null))) shouldBe true
  }

  it should "not retry read exceptions" in {
    RetryWhen.Default(simpleRequest, Left(new client.SttpClientException.ReadException(null))) shouldBe false
  }

  it should "not retry input stream bodies" in {
    RetryWhen.Default(
      simpleRequest.body(new ByteArrayInputStream(new Array[Byte](8))),
      Right(Response("", StatusCode.InternalServerError))
    ) shouldBe false
  }
} 
Example 121
Source File: BackupWriter.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package controllers.document

import controllers.HasConfig
import java.io.{File, FileInputStream, FileOutputStream, BufferedInputStream, ByteArrayInputStream, InputStream, PrintWriter}
import java.nio.file.Paths
import java.math.BigInteger
import java.security.{MessageDigest, DigestInputStream}
import java.util.UUID
import java.util.zip.{ZipEntry, ZipOutputStream}
import services.HasDate
import services.annotation.{Annotation, AnnotationService}
import services.document.{ExtendedDocumentMetadata, DocumentToJSON}
import services.generated.tables.records.{DocumentRecord, DocumentFilepartRecord}
import play.api.libs.json.Json
import play.api.libs.Files.TemporaryFileCreator
import scala.concurrent.{ExecutionContext, Future}
import storage.TempDir
import storage.uploads.Uploads

trait BackupWriter extends HasBackupValidation { self: HasConfig =>
  
  // Frontend annotation format
  import services.annotation.FrontendAnnotation._
  
  private val BUFFER_SIZE = 2048
  
  private def writeToZip(inputStream: InputStream, filename: String, zip: ZipOutputStream) = {
    zip.putNextEntry(new ZipEntry(filename))
     
    val md = MessageDigest.getInstance(ALGORITHM)    
    val in = new DigestInputStream(new BufferedInputStream(inputStream), md)

    var data= new Array[Byte](BUFFER_SIZE)
    var count: Int = 0

    while ({ count = in.read(data, 0, BUFFER_SIZE); count } > -1) {
      zip.write(data, 0, count)
    }

    in.close()
    zip.closeEntry()
    
    new BigInteger(1, md.digest()).toString(16)
  }
  
  def createBackup(doc: ExtendedDocumentMetadata)(implicit ctx: ExecutionContext, uploads: Uploads, 
      annotations: AnnotationService, tmpFile: TemporaryFileCreator): Future[File] = {
    
    def getFileAsStream(owner: String, documentId: String, filename: String) = {
      val dir = uploads.getDocumentDir(owner, documentId).get // Fail hard if the dir doesn't exist
      new FileInputStream(new File(dir, filename))
    }
    
    def getManifestAsStream() = {
      val manifest = "Recogito-Version: 2.0.1-alpha"
      new ByteArrayInputStream(manifest.getBytes)
    }
    
    def getMetadataAsStream(doc: ExtendedDocumentMetadata) = {
      
      // DocumentRecord JSON serialization
      import services.document.DocumentToJSON._
      
      val json = Json.prettyPrint(Json.toJson((doc.document, doc.fileparts)))
      new ByteArrayInputStream(json.getBytes)
    }
    
    def getAnnotationsAsStream(docId: String, annotations: Seq[Annotation], parts: Seq[DocumentFilepartRecord]): InputStream = {
      val path = Paths.get(TempDir.get()(self.config), s"${docId}_annotations.json")
      val tmp = tmpFile.create(path)
      val writer = new PrintWriter(path.toFile)
      annotations.foreach(a => writer.println(Json.stringify(Json.toJson(a))))
      writer.close()
      new FileInputStream(path.toFile)
    }
    
    Future {
      tmpFile.create(Paths.get(TempDir.get()(self.config), s"${doc.id}.zip"))
    } flatMap { zipFile =>
      val zipStream = new ZipOutputStream(new FileOutputStream(zipFile.path.toFile))

      writeToZip(getManifestAsStream(), "manifest", zipStream)
      val metadataHash = writeToZip(getMetadataAsStream(doc), "metadata.json", zipStream)

      val fileHashes = doc.fileparts.map { part =>
        writeToZip(getFileAsStream(doc.ownerName, doc.id, part.getFile), "parts" + File.separator + part.getFile, zipStream)
      }

      annotations.findByDocId(doc.id).map { annotations =>
        val annotationsHash = writeToZip(getAnnotationsAsStream(doc.id, annotations.map(_._1), doc.fileparts), "annotations.jsonl", zipStream)
        
        val signature = computeSignature(metadataHash, fileHashes, annotationsHash)
        writeToZip(new ByteArrayInputStream(signature.getBytes), "signature", zipStream)
        
        zipStream.close()
        zipFile.path.toFile
      }
    }
  }
  
} 
Example 122
Source File: DefineMacroCmd.scala    From piglet   with Apache License 2.0 5 votes vote down vote up
package dbis.piglet.op.cmd

import java.io.{ObjectInputStream, ByteArrayInputStream, ObjectOutputStream, ByteArrayOutputStream}
import dbis.piglet.plan.DataflowPlan
import scala.collection.mutable.ListBuffer
import dbis.piglet.op.{Pipe,PigOperator}


case class DefineMacroCmd(
    out: Pipe, 
    macroName: String, 
    params: Option[List[String]], 
    stmts: List[PigOperator]
  ) extends PigOperator(out) {

  var subPlan: Option[DataflowPlan] = None
  var inPipes = List[Pipe]()

  def deepClone(): DefineMacroCmd = {
      val baos = new ByteArrayOutputStream()
      val oos = new ObjectOutputStream(baos)
      oos.writeObject(this)
      val bais = new ByteArrayInputStream(baos.toByteArray())
      val ois = new ObjectInputStream(bais)
      ois.readObject().asInstanceOf[DefineMacroCmd]
  }

  override def preparePlan: Unit = {
    
  def pipeParamPositions(): List[Int] = {
    val l = ListBuffer[Int]()
    inPipes.foreach(i => {
      val pos = params.get.indexOf(i.name.substring(1))
      if (pos >= 0) l += pos
    })
    l.toList
  }
} 
Example 123
Source File: ManifestUploader.scala    From teamcity-s3-plugin   with Apache License 2.0 5 votes vote down vote up
package com.gu.teamcity

import java.io.ByteArrayInputStream
import java.util.Date

import jetbrains.buildServer.messages.{BuildMessage1, DefaultMessagesInfo, Status}
import jetbrains.buildServer.serverSide.{BuildServerAdapter, SRunningBuild}
import org.joda.time.{DateTime, DateTimeZone}
import org.json4s.JsonAST.JObject
import org.json4s.JsonDSL._
import org.json4s.native.JsonMethods._

import scala.util.{Failure, Success}

class ManifestUploader(config: S3ConfigManager, s3: S3) extends BuildServerAdapter {

  override def beforeBuildFinish(runningBuild: SRunningBuild) {
    import scala.collection.convert.wrapAsScala._

    if (!runningBuild.isHasInternalArtifactsOnly) {
      val properties = Seq(
        "projectName" -> S3Plugin.cleanFullName(runningBuild),
        "buildNumber" -> runningBuild.getBuildNumber,
        "startTime" -> new DateTime(runningBuild.getStartDate).withZone(DateTimeZone.UTC).toString //Joda default is ISO8601
      ) ++ runningBuild.getRevisions.flatMap(revision => Seq(
        "revision" -> revision.getRevision,
        "vcsURL" -> revision.getRoot.getProperties.get("url")
      )) ++ Option(runningBuild.getBranch).map(b =>
        "branch" -> b.getDisplayName
      ).orElse(runningBuild.getVcsRootEntries.headOption.map(r =>
        "branch" -> r.getProperties.get("branch")
      ))

      val propertiesJSON = pretty(render(properties.foldLeft(JObject())(_ ~ _)))
      val jsBytes = propertiesJSON.getBytes("UTF-8")

      config.buildManifestBucket.map { bucket =>
        s3.upload(bucket, runningBuild, "build.json", new ByteArrayInputStream(jsBytes), jsBytes.length) match {			
          case Failure(e) => runningBuild.getBuildLog().message(s"Error uploading manifest: ${e.getMessage}",
              Status.ERROR,new Date,DefaultMessagesInfo.MSG_BUILD_FAILURE,DefaultMessagesInfo.SOURCE_ID,null)
          case Success(_) => runningBuild.getBuildLog().message("Manifest S3 upload complete",
              Status.NORMAL,new Date,DefaultMessagesInfo.MSG_TEXT,DefaultMessagesInfo.SOURCE_ID,null) 
        }
      }
    }
  }

  private def normalMessage(text: String) =
    new BuildMessage1(DefaultMessagesInfo.SOURCE_ID, DefaultMessagesInfo.MSG_TEXT, Status.NORMAL, new Date, text)
} 
Example 124
Source File: ModelStateSerde.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafkastreams.store.store

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.util

import com.lightbend.model.modeldescriptor.ModelDescriptor
import com.lightbend.scala.modelServer.model.PMML.PMMLModel
import com.lightbend.scala.modelServer.model.tensorflow.TensorFlowModel
import com.lightbend.scala.modelServer.model.{ModelToServeStats, ModelWithDescriptor}
import com.lightbend.scala.kafkastreams.store.StoreState
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}


class ModelStateSerde extends Serde[StoreState] {

  private val mserializer = new ModelStateSerializer()
  private val mdeserializer = new ModelStateDeserializer()

  override def deserializer() = mdeserializer

  override def serializer() = mserializer

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}

  override def close() = {}
}

object ModelStateDeserializer {
  val factories = Map(
    ModelDescriptor.ModelType.PMML.index -> PMMLModel,
    ModelDescriptor.ModelType.TENSORFLOW.index -> TensorFlowModel
  )
}

class ModelStateDeserializer extends Deserializer[StoreState] {

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}

  override def deserialize(topic: String, data: Array[Byte]): StoreState = {
    if(data != null) {
      val input = new DataInputStream(new ByteArrayInputStream(data))
      new StoreState(ModelWithDescriptor.readModel(input), ModelWithDescriptor.readModel(input),
        ModelToServeStats.readServingInfo(input), ModelToServeStats.readServingInfo(input))
    }
    else new StoreState()
  }

  override def close(): Unit = {}

}

class ModelStateSerializer extends Serializer[StoreState] {

  private val bos = new ByteArrayOutputStream()

  override def serialize(topic: String, state: StoreState): Array[Byte] = {
    bos.reset()
    val output = new DataOutputStream(bos)
    ModelWithDescriptor.writeModel(output, state.currentModel.orNull)
    ModelWithDescriptor.writeModel(output, state.newModel.orNull)
    ModelToServeStats.writeServingInfo(output, state.currentState.orNull)
    ModelToServeStats.writeServingInfo(output, state.newState.orNull)
    try {
      output.flush()
      output.close()
    } catch {
      case t: Throwable =>
    }
    bos.toByteArray
  }

  override def close(): Unit = {}

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}
} 
Example 125
Source File: StringParserTest.scala    From MoVE   with Mozilla Public License 2.0 5 votes vote down vote up
package de.thm.move.loader.parser

import java.io.{ByteArrayInputStream, InputStreamReader}
import java.nio.charset.StandardCharsets

import scala.util._
import de.thm.move.MoveSpec
import de.thm.move.loader.parser.PropertyParser._
import de.thm.move.loader.parser.ast._

class StringParserTest extends MoveSpec {
  val parser = new ModelicaParser()
  def parseString(str:String): String = {
    parser.stringLiteral(str)
  }

  "The parser for Modelica strings" should "parse simple strings" in {
    val s = "this is a super awesome test"
    true shouldBe true
  }

  "PropertyParser#transformEscapeChars" should
    "transform literal escape characters to ansi escape characters" in {
      val s = "this\\t\\tis a\\n test\\rmöb\\b"
      parser.transformEscapeChars(s) shouldBe "this\t\tis a\n test\rmöb\b"

      val s2 = "\\n\\n\\t"
      parser.transformEscapeChars(s2) shouldBe "\n\n\t"
  }

  it should "return the same string for strings without escape characters" in {
    val s = "this is awesome"
    parser.transformEscapeChars(s) shouldBe s
  }
} 
Example 126
Source File: package.scala    From MoVE   with Mozilla Public License 2.0 5 votes vote down vote up
package de.thm.move.loader

import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import scala.util._
import org.scalatest.Matchers._
import de.thm.move.loader.parser.ast._

package object parser {
  private  val parser:ModelicaParserLike = new ModelicaParser
  def parse(str:String): Try[List[Model]] =
    parser.parse(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)))

  val withParseSuccess: String => Model = parse(_) match {
    case Success(elem) => elem.head
    case Failure(exc) => throw exc
  }

  val withException: String => Unit = parse(_) match {
    case Success(_) => throw new IllegalStateException("Expected failure")
    case Failure(_) => //yeay
  }

  def iconEqual(icon1:Model, icon2:Model): Unit = {
    icon2.name shouldBe icon1.name
    (icon1.annot, icon2.annot) match {
      case (Icon(system1, shapes1, _,_),Icon(system2,shapes2,_,_)) =>
        system2 shouldBe system1
        shapes2 shouldBe shapes1
      case _ => throw new AssertionError(s"Given icon1 and icon2 aren't both Icons!")
    }
  }

  def annotationModel(modelname:String, content:String): String =
    s"""
       |model $modelname
       | annotation(
       |  $content
       | );
       |end $modelname;
     """.stripMargin

  def graphicModel(modelname:String, content:String):String = {
    annotationModel(modelname,
    s"""
       |Icon( graphics = {
       |$content
       |})
     """.stripMargin
    )
  }
} 
Example 127
Source File: ModelSerializationTestHelper.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha

import java.io.{ObjectInputStream, ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}


trait ModelSerializationTestHelper {
  def serializeDeserializeRoundTrip[A <: java.io.Serializable](a: A): A = {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(a)
    val bais = new ByteArrayInputStream(baos.toByteArray)
    val ois = new ObjectInputStream(bais)
    val out = ois.readObject()
    out.asInstanceOf[A]
  }
} 
Example 128
Source File: CypherParser.scala    From ingraph   with Eclipse Public License 1.0 5 votes vote down vote up
package ingraph.compiler.cypher2gplan

import java.io.ByteArrayInputStream

import ingraph.compiler.exceptions.CompilerException
import org.apache.log4j.{Level, Logger}
import org.eclipse.emf.common.util.URI
import org.eclipse.emf.ecore.resource.Resource
import org.eclipse.xtext.diagnostics.Severity
import org.eclipse.xtext.resource.{XtextResource, XtextResourceSet}
import org.eclipse.xtext.util.CancelIndicator
import org.eclipse.xtext.validation.CheckMode
import org.slizaa.neo4j.opencypher.OpenCypherStandaloneSetup
import org.slizaa.neo4j.opencypher.openCypher.Cypher

import scala.collection.JavaConverters._


object CypherParser {
  def parseFile(fileName: String): Cypher = {
    Logger.getLogger("org.eclipse.xtext").setLevel(Level.ERROR)

    // https://typefox.io/how-and-why-use-xtext-without-the-ide
    val injector = new OpenCypherStandaloneSetup().createInjectorAndDoEMFRegistration()
    val resourceSet = injector.getInstance(classOf[XtextResourceSet])
    val filePath = "../queries/" + fileName + ".cypher"
    val resource = resourceSet.getResource(URI.createFileURI(filePath), true)
    validateAndThrowError(resource)

    resource.getContents.get(0).asInstanceOf[Cypher]
  }

  def parseString(queryString: String): Cypher = {
    Logger.getLogger("org.eclipse.xtext").setLevel(Level.ERROR)

    // https://wiki.eclipse.org/Xtext/FAQ
    val injector = new OpenCypherStandaloneSetup().createInjectorAndDoEMFRegistration()
    val resourceSet = injector.getInstance(classOf[XtextResourceSet])
    val resource = resourceSet.createResource(URI.createURI("http:/example.cypher"))
    val in = new ByteArrayInputStream(queryString.getBytes())
    resource.load(in, resourceSet.getLoadOptions())
    validateAndThrowError(resource)

    resource.getContents.get(0).asInstanceOf[Cypher]
  }

  def validateAndThrowError(resource: Resource) {
    var seenError = false
    var firstError: String = null
    val validator = resource.asInstanceOf[XtextResource].getResourceServiceProvider.getResourceValidator
    val issues = validator.validate(resource, CheckMode.ALL, CancelIndicator.NullImpl).asScala
    for (issue <- issues) {
      if (issue.getSeverity == Severity.ERROR && !seenError) {
        seenError = true
        firstError = issue.getMessage
      }
      println(issue.getMessage)
    }
    if (seenError) {
      throw new CompilerException(s"Error during cypher parse, the first error was: ${firstError}")
    }
  }
} 
Example 129
Source File: JacksonParserSuite.scala    From circe-jackson   with Apache License 2.0 5 votes vote down vote up
package io.circe.jackson

import cats.data.Validated
import com.fasterxml.jackson.core.JsonToken
import com.fasterxml.jackson.databind.{ ObjectMapper, ObjectReader }
import io.circe.Json
import io.circe.testing.ParserTests
import java.io.{ ByteArrayInputStream, File }

import scala.io.Source

class JacksonParserSuite extends CirceSuite with JacksonInstances {
  checkAll("Parser", ParserTests(`package`).fromString(arbitraryCleanedJson, shrinkJson))
  checkAll(
    "Parser",
    ParserTests(`package`).fromFunction[Array[Byte]]("fromByteArray")(
      s => s.getBytes("UTF-8"),
      p => p.parseByteArray _,
      p => p.decodeByteArray[Json] _,
      p => p.decodeByteArrayAccumulating[Json] _
    )(arbitraryCleanedJson, shrinkJson)
  )

  "parse and decode(Accumulating)" should "fail on invalid input" in forAll { (s: String) =>
    assert(parse(s"Not JSON $s").isLeft)
    assert(decode[Json](s"Not JSON $s").isLeft)
    assert(decodeAccumulating[Json](s"Not JSON $s").isInvalid)
  }

  "parseFile and decodeFile(Accumulating)" should "parse a JSON file" in {
    val url = getClass.getResource("/io/circe/jackson/examples/glossary.json")
    val file = new File(url.toURI)

    assert(decodeFile[Json](file) === Right(glossary))
    assert(decodeFileAccumulating[Json](file) == Validated.valid(glossary))
    assert(parseFile(file) === Right(glossary))
  }

  "parseByteArray and decodeByteArray(Accumulating)" should "parse an array of elementAsBytes" in {
    val bytes = glossaryAsBytes

    assert(decodeByteArray[Json](bytes) === Right(glossary))
    assert(decodeByteArrayAccumulating[Json](bytes) === Validated.valid(glossary))
    assert(parseByteArray(bytes) === Right(glossary))
  }

  for (elementCount <- 1 to 4) {
    "CirceJsonDeserializer" should s"be useable with Jackson's MappingIterator " +
      s"with ${elementCount} elements in array" in {
      val input = new ByteArrayInputStream(createJsonArrayAsBytes(glossaryAsBytes, elementCount))
      val objectMapper = new ObjectMapper()
      objectMapper.registerModule(CirceJsonModule)
      val jsonParser = objectMapper.getFactory.createParser(input)

      assert(jsonParser.nextToken() == JsonToken.START_ARRAY)
      assert(jsonParser.nextToken() == JsonToken.START_OBJECT)

      val reader = createReader(objectMapper).forType(classOf[Json])
      val iterator = reader.readValues[Json](jsonParser)
      var counter = 0
      while (iterator.hasNext) {
        val glossaryFromIterator = iterator.next()
        assert(glossary == glossaryFromIterator)
        counter = counter + 1
      }
      assert(counter == elementCount)
    }
  }

  // workaround warnings from compiler with Jackson 2.5
  @unchecked
  private def createReader(objectMapper: ObjectMapper): ObjectReader =
    objectMapper.reader()

  private def createJsonArrayAsBytes(elementAsBytes: Array[Byte], elementCount: Int): Array[Byte] = {
    val byteArrayOutput = new java.io.ByteArrayOutputStream()
    byteArrayOutput.write('[')
    for (i <- 1 to elementCount) {
      if (i != 1) {
        byteArrayOutput.write(',')
      }
      byteArrayOutput.write(elementAsBytes)
    }
    byteArrayOutput.write(']')
    byteArrayOutput.toByteArray
  }

  private def glossaryAsBytes = {
    val stream = getClass.getResourceAsStream("/io/circe/jackson/examples/glossary.json")
    val source = Source.fromInputStream(stream)
    val bytes = source.map(_.toByte).toArray
    source.close()
    bytes
  }
} 
Example 130
Source File: Zip.scala    From scala-clippy   with Apache License 2.0 5 votes vote down vote up
package util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}

object Zip {
  private val BufferSize = 512

  def compress(string: String): Array[Byte] = {
    val os  = new ByteArrayOutputStream(string.length() / 5)
    val gos = new GZIPOutputStream(os)
    gos.write(string.getBytes("UTF-8"))
    gos.close()
    os.close()
    os.toByteArray
  }

  def decompress(compressed: Array[Byte]): String = {
    val is        = new ByteArrayInputStream(compressed)
    val gis       = new GZIPInputStream(is, BufferSize)
    val string    = new StringBuilder()
    val data      = new Array[Byte](BufferSize)
    var bytesRead = gis.read(data)
    while (bytesRead != -1) {
      string.append(new String(data, 0, bytesRead, "UTF-8"))
      bytesRead = gis.read(data)
    }
    gis.close()
    is.close()
    string.toString()
  }
} 
Example 131
Source File: PlayRequestToRawBody.scala    From tapir   with Apache License 2.0 5 votes vote down vote up
package sttp.tapir.server.play

import java.io.ByteArrayInputStream
import java.nio.charset.Charset

import akka.stream.Materializer
import akka.util.ByteString
import play.api.mvc.{RawBuffer, Request}
import play.core.parsers.Multipart
import sttp.model.Part
import sttp.tapir.{RawBodyType, RawPart}
import sttp.tapir.internal._

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

class PlayRequestToRawBody(serverOptions: PlayServerOptions) {
  def apply[R](bodyType: RawBodyType[R], charset: Option[Charset], request: Request[RawBuffer], body: ByteString)(implicit
      mat: Materializer
  ): Future[R] = {
    bodyType match {
      case RawBodyType.StringBody(defaultCharset) => Future(new String(body.toArray, charset.getOrElse(defaultCharset)))
      case RawBodyType.ByteArrayBody              => Future(body.toArray)
      case RawBodyType.ByteBufferBody             => Future(body.toByteBuffer)
      case RawBodyType.InputStreamBody            => Future(body.toArray).map(new ByteArrayInputStream(_))
      case RawBodyType.FileBody =>
        Future(java.nio.file.Files.write(serverOptions.temporaryFileCreator.create().path, body.toArray))
          .map(p => p.toFile)
      case m: RawBodyType.MultipartBody => multiPartRequestToRawBody(request, m, body)
    }
  }

  private def multiPartRequestToRawBody[R](request: Request[RawBuffer], m: RawBodyType.MultipartBody, body: ByteString)(implicit
      mat: Materializer
  ): Future[Seq[RawPart]] = {
    val bodyParser = serverOptions.playBodyParsers.multipartFormData(
      Multipart.handleFilePartAsTemporaryFile(serverOptions.temporaryFileCreator)
    )
    bodyParser.apply(request).run(body).flatMap {
      case Left(_) =>
        Future.failed(new IllegalArgumentException("Unable to parse multipart form data.")) // TODO
      case Right(value) =>
        val dataParts = value.dataParts.map {
          case (key, value) =>
            apply(
              m.partType(key).get,
              charset(m.partType(key).get),
              request,
              ByteString(value.flatMap(_.getBytes).toArray)
            ).map(body => Part(key, body).asInstanceOf[RawPart])
        }.toSeq

        val fileParts = value.files.map(f => {
          apply(
            m.partType(f.key).get,
            charset(m.partType(f.key).get),
            request,
            ByteString.apply(java.nio.file.Files.readAllBytes(f.ref.path))
          ).map(body =>
            Part(f.key, body, Map(f.key -> f.dispositionType, Part.FileNameDispositionParam -> f.filename), Nil)
              .asInstanceOf[RawPart]
          )
        })
        Future.sequence(dataParts ++ fileParts)
    }
  }
} 
Example 132
Source File: package.scala    From tapir   with Apache License 2.0 5 votes vote down vote up
package sttp.tapir.server.vertx

import java.io.{ByteArrayInputStream, InputStream}

import io.vertx.core.buffer.Buffer
import io.vertx.scala.core.Vertx

import scala.concurrent.Future

package object encoders {

  private val bufferSize = 1024

  
  private[vertx] def inputStreamToBuffer(is: InputStream, vertx: Vertx): Future[Buffer] = {
    is match {
      case _: ByteArrayInputStream => Future.successful(inputStreamToBufferUnsafe(is))
      case _                       => vertx.executeBlocking(() => inputStreamToBufferUnsafe(is))
    }
  }

  private def inputStreamToBufferUnsafe(is: InputStream): Buffer = {
    val buffer = Buffer.buffer()
    val buf = new Array[Byte](bufferSize)
    while (is.available() > 0) {
      val read = is.read(buf)
      buffer.appendBytes(buf, 0, read)
    }
    buffer
  }

} 
Example 133
Source File: FinatraRequestToRawBody.scala    From tapir   with Apache License 2.0 5 votes vote down vote up
package sttp.tapir.server.finatra

import java.io.ByteArrayInputStream
import java.nio.ByteBuffer
import java.nio.charset.Charset

import com.twitter.finagle.http.Request
import com.twitter.finatra.http.request.RequestUtils
import com.twitter.io.Buf
import com.twitter.util.Future
import org.apache.commons.fileupload.FileItemHeaders
import sttp.model.{Part, Header}
import sttp.tapir.{RawPart, RawBodyType}

import scala.collection.immutable.Seq
import scala.collection.JavaConverters._

class FinatraRequestToRawBody(serverOptions: FinatraServerOptions) {
  def apply[R](bodyType: RawBodyType[R], body: Buf, charset: Option[Charset], request: Request): Future[R] = {
    def asByteArray: Array[Byte] = {
      val array = new Array[Byte](body.length)
      body.write(array, 0)
      array
    }

    def asByteBuffer: ByteBuffer = {
      val buffer = ByteBuffer.allocate(body.length)
      body.write(buffer)
      buffer.flip()
      buffer
    }

    bodyType match {
      case RawBodyType.StringBody(defaultCharset) => Future.value[R](new String(asByteArray, charset.getOrElse(defaultCharset)))
      case RawBodyType.ByteArrayBody              => Future.value[R](asByteArray)
      case RawBodyType.ByteBufferBody             => Future.value[R](asByteBuffer)
      case RawBodyType.InputStreamBody            => Future.value[R](new ByteArrayInputStream(asByteArray))
      case RawBodyType.FileBody                   => serverOptions.createFile(asByteArray)
      case m: RawBodyType.MultipartBody           => multiPartRequestToRawBody(request, m)
    }
  }

  private def parseDispositionParams(headerValue: Option[String]): Map[String, String] =
    headerValue
      .map(
        _.split(";")
          .map(_.trim)
          .tail
          .map(_.split("="))
          .map(array => array(0) -> array(1))
          .toMap
      )
      .getOrElse(Map.empty)

  private def getCharset(contentType: Option[String]): Option[Charset] =
    contentType.flatMap(
      _.split(";")
        .map(_.trim)
        .tail
        .map(_.split("="))
        .map(array => array(0) -> array(1))
        .toMap
        .get("charset")
        .map(Charset.forName)
    )

  private def multiPartRequestToRawBody(request: Request, m: RawBodyType.MultipartBody): Future[Seq[RawPart]] = {
    def fileItemHeaders(headers: FileItemHeaders): Seq[Header] = {
      headers.getHeaderNames.asScala
        .flatMap { name => headers.getHeaders(name).asScala.map(name -> _) }
        .toSeq
        .filter(_._1.toLowerCase != "content-disposition")
        .map { case (k, v) => Header(k, v) }
        .toList
    }

    Future
      .collect(
        RequestUtils
          .multiParams(request)
          .flatMap {
            case (name, multiPartItem) =>
              val dispositionParams: Map[String, String] =
                parseDispositionParams(Option(multiPartItem.headers.getHeader("content-disposition")))
              val charset = getCharset(multiPartItem.contentType)

              for {
                partType <- m.partType(name)
                futureBody = apply(partType, Buf.ByteArray.Owned(multiPartItem.data), charset, request)
              } yield futureBody
                .map(body =>
                  Part(name, body, otherDispositionParams = dispositionParams - "name", headers = fileItemHeaders(multiPartItem.headers))
                    .asInstanceOf[RawPart]
                )
          }
          .toSeq
      )
      .map(_.toList)
  }
} 
Example 134
Source File: Http4sRequestToRawBody.scala    From tapir   with Apache License 2.0 5 votes vote down vote up
package sttp.tapir.server.http4s

import java.io.ByteArrayInputStream

import cats.effect.{Blocker, ContextShift, Sync}
import cats.implicits._
import fs2.Chunk
import org.http4s.headers.{`Content-Disposition`, `Content-Type`}
import org.http4s.{Charset, EntityDecoder, Request, multipart}
import sttp.model.{Header, Part}
import sttp.tapir.{RawPart, RawBodyType}

class Http4sRequestToRawBody[F[_]: Sync: ContextShift](serverOptions: Http4sServerOptions[F]) {
  def apply[R](body: fs2.Stream[F, Byte], bodyType: RawBodyType[R], charset: Option[Charset], req: Request[F]): F[R] = {
    def asChunk: F[Chunk[Byte]] = body.compile.to(Chunk)
    def asByteArray: F[Array[Byte]] = body.compile.to(Chunk).map(_.toByteBuffer.array())

    bodyType match {
      case RawBodyType.StringBody(defaultCharset) => asByteArray.map(new String(_, charset.map(_.nioCharset).getOrElse(defaultCharset)))
      case RawBodyType.ByteArrayBody              => asByteArray
      case RawBodyType.ByteBufferBody             => asChunk.map(_.toByteBuffer)
      case RawBodyType.InputStreamBody            => asByteArray.map(new ByteArrayInputStream(_))
      case RawBodyType.FileBody =>
        serverOptions.createFile(serverOptions.blockingExecutionContext, req).flatMap { file =>
          val fileSink = fs2.io.file.writeAll(file.toPath, Blocker.liftExecutionContext(serverOptions.blockingExecutionContext))
          body.through(fileSink).compile.drain.map(_ => file)
        }
      case m: RawBodyType.MultipartBody =>
        // TODO: use MultipartDecoder.mixedMultipart once available?
        implicitly[EntityDecoder[F, multipart.Multipart[F]]].decode(req, strict = false).value.flatMap {
          case Left(failure) =>
            throw new IllegalArgumentException("Cannot decode multipart body: " + failure) // TODO
          case Right(mp) =>
            val rawPartsF: Vector[F[RawPart]] = mp.parts
              .flatMap(part => part.name.flatMap(name => m.partType(name)).map((part, _)).toList)
              .map { case (part, codecMeta) => toRawPart(part, codecMeta, req).asInstanceOf[F[RawPart]] }

            val rawParts: F[Vector[RawPart]] = rawPartsF.sequence

            rawParts.asInstanceOf[F[R]] // R is Seq[RawPart]
        }
    }
  }

  private def toRawPart[R](part: multipart.Part[F], partType: RawBodyType[R], req: Request[F]): F[Part[R]] = {
    val dispositionParams = part.headers.get(`Content-Disposition`).map(_.parameters).getOrElse(Map.empty)
    val charset = part.headers.get(`Content-Type`).flatMap(_.charset)
    apply(part.body, partType, charset, req)
      .map(r =>
        Part(
          part.name.getOrElse(""),
          r,
          otherDispositionParams = dispositionParams - Part.NameDispositionParam,
          headers = part.headers.toList.map(h => Header(h.name.value, h.value))
        )
      )
  }
} 
Example 135
Source File: ConsoleModuleTest.scala    From scala-server-toolkit   with MIT License 5 votes vote down vote up
package com.avast.sst.jvm.system.console

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import cats.effect.SyncIO
import org.scalatest.funsuite.AnyFunSuite

import scala.{Console => SConsole}

class ConsoleModuleTest extends AnyFunSuite {

  test("Console input") {
    SConsole.withIn(new ByteArrayInputStream("test input\n".getBytes("UTF-8"))) {
      val test = for {
        line <- ConsoleModule.make[SyncIO].readLine
      } yield assert(line === "test input")

      test.unsafeRunSync()
    }
  }

  test("Console output") {
    val out = new ByteArrayOutputStream()
    SConsole.withOut(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLine("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

  test("Console error") {
    val out = new ByteArrayOutputStream()
    SConsole.withErr(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLineToError("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

} 
Example 136
Source File: DataWeaveCLITest.scala    From data-weave-native   with Apache License 2.0 5 votes vote down vote up
package org.mule.weave.dwnative.cli

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.PrintStream

import org.scalatest.FreeSpec
import org.scalatest.Matchers

import scala.io.Source

class DataWeaveCLITest extends FreeSpec with Matchers {

  "should work with output application/json" in {
    val out = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("output application/json --- (1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      println("Finish OK 3")
    }
  }

  "should work with simple script and not output" in {
    val defaultOut = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("(1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(defaultOut)
    }
  }

  "should work ok when sending payload from stdin" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[
          |  1,
          |  2,
          |  3
          |]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("payload[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }

  "should work with light formats" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[{
          |  "a" : 1,
          |  "b" : 2,
          |  "c" : 3
          |}]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("input payload json output csv header=false ---payload"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1,2,3"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }



} 
Example 137
Source File: ProxyRequestCodec.scala    From aws-lambda-scala   with MIT License 5 votes vote down vote up
package io.github.mkotsur.aws.codecs

import java.io.ByteArrayInputStream
import cats.syntax.either.catsSyntaxEither
import io.circe.generic.auto._
import io.github.mkotsur.aws.handler.CanDecode
import io.github.mkotsur.aws.proxy.ProxyRequest
import shapeless.Generic

import scala.language.{higherKinds, postfixOps}

private[aws] trait ProxyRequestCodec extends AllCodec with FutureCodec {

  
  def GenericProxyRequestOf[T] = shapeless.Generic[ProxyRequest[T]]

  implicit def canDecodeProxyRequest[T](implicit canDecode: CanDecode[T]) = CanDecode.instance[ProxyRequest[T]] { is =>
    {
      def extractBody(s: ProxyRequest[String]) = s.body match {
        case Some(bodyString) => canDecode.readStream(new ByteArrayInputStream(bodyString.getBytes)).map(Option.apply)
        case None             => Right(None)
      }

      def produceProxyResponse(decodedRequestString: ProxyRequest[String], bodyOption: Option[T]) = {
        val reqList = Generic[ProxyRequest[String]].to(decodedRequestString)
        Generic[ProxyRequest[T]].from((bodyOption :: reqList.reverse.tail).reverse)
      }

      for (decodedRequest$String <- CanDecode[ProxyRequest[String]].readStream(is);
           decodedBodyOption     <- extractBody(decodedRequest$String))
        yield produceProxyResponse(decodedRequest$String, decodedBodyOption)
    }
  }

} 
Example 138
Source File: Serialization.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization 
Example 139
Source File: GoogleDriveClient.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperations.readwritedataframe.googlestorage

import java.io.{ByteArrayInputStream, FileOutputStream}
import java.util

import com.google.api.client.googleapis.auth.oauth2.GoogleCredential
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport
import com.google.api.client.http.FileContent
import com.google.api.client.json.gson.GsonFactory
import com.google.api.services.drive.model.File
import com.google.api.services.drive.{Drive, DriveScopes}

import io.deepsense.commons.resources.ManagedResource
import io.deepsense.commons.utils.LoggerForCallerClass
import io.deepsense.deeplang.doperations.inout.CsvParameters.ColumnSeparatorChoice

private[googlestorage] object GoogleDriveClient {

  val logger = LoggerForCallerClass()

  val googleSheetCsvSeparator = ColumnSeparatorChoice.Comma()

  private val ApplicationName = "Seahorse"

  private val Scopes = util.Arrays.asList(DriveScopes.DRIVE)

  def uploadCsvFileAsGoogleSheet(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val fileMetadata = new File().setMimeType("application/vnd.google-apps.spreadsheet")
    val mediaContent = new FileContent("text/csv", new java.io.File(filePath))

    driveService(credentials).files.update(sheetId, fileMetadata, mediaContent).execute
  }

  def downloadGoogleSheetAsCsvFile(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val file = new java.io.File(filePath)
    file.getParentFile.mkdirs()

    ManagedResource(new FileOutputStream(file)) { fos =>
      driveService(credentials).files().export(sheetId, "text/csv").executeMediaAndDownloadTo(fos)
      logger.info(s"Downloaded google sheet id=$sheetId to the file $filePath")
    }
  }

  private def driveService(serviceAccountCredentials: String): Drive = {
    val credential = {
      val in = new ByteArrayInputStream(serviceAccountCredentials.getBytes)
      GoogleCredential.fromStream(in).createScoped(Scopes)
    }
    new Drive.Builder(
      GoogleNetHttpTransport.newTrustedTransport(),
      jsonFactory,
      credential
    ).setApplicationName(ApplicationName).build
  }

  // Default choice is JacksonFactory. However spark depends on Jackson as well
  // and google/spark jackson versions are binary incompatible with each other.
  private val jsonFactory = GsonFactory.getDefaultInstance

} 
Example 140
Source File: PythonNotebook.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperations

import java.io.ByteArrayInputStream

import io.deepsense.commons.utils.Version
import io.deepsense.deeplang.DOperation.Id
import io.deepsense.deeplang.ExecutionContext
import io.deepsense.deeplang.doperables.dataframe.DataFrame
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.reflect.runtime.{universe => ru}
import scala.util.Failure

import io.deepsense.commons.rest.client.NotebookRestClient

case class PythonNotebook()
  extends Notebook {

  override val id: Id = "e76ca616-0322-47a5-b390-70c9668265dd"
  override val name: String = "Python Notebook"
  override val description: String = "Creates a Python notebook with access to the DataFrame"

  override val since: Version = Version(1, 0, 0)
  override val notebookType: String = "python"

  override protected def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = {
    context.dataFrameStorage.setInputDataFrame(0, dataFrame.sparkDataFrame)
    headlessExecution(context)
  }

} 
Example 141
Source File: S3Brain.scala    From sumobot   with Apache License 2.0 5 votes vote down vote up
package com.sumologic.sumobot.brain

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Properties

import akka.actor.{Actor, Props}
import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider}
import com.amazonaws.services.s3.{AmazonS3Client, AmazonS3ClientBuilder}
import com.amazonaws.services.s3.model.ObjectMetadata
import com.sumologic.sumobot.brain.Brain._

import scala.collection.JavaConverters._
import scala.collection.immutable

object S3Brain {
  def props(credentials: AWSCredentials,
            bucket: String,
            s3Key: String): Props = Props(classOf[S3Brain], credentials, bucket, s3Key)
}

class S3Brain(credentials: AWSCredentials,
              bucket: String,
              s3Key: String) extends Actor {

  private val s3Client = AmazonS3ClientBuilder.standard()
    .withCredentials(new AWSStaticCredentialsProvider(credentials)).build

  private var brainContents: Map[String, String] = loadFromS3()

  override def receive: Receive = {
    case Store(key, value) =>
      brainContents += (key -> value)
      saveToS3(brainContents)

    case Remove(key) =>
      brainContents -= key
      saveToS3(brainContents)

    case Retrieve(key) =>
      brainContents.get(key) match {
        case Some(value) => sender() ! ValueRetrieved(key, value)
        case None => sender() ! ValueMissing(key)
      }

    case ListValues(prefix) =>
      sender() ! ValueMap(brainContents.filter(_._1.startsWith(prefix)))
  }

  private def loadFromS3(): Map[String, String] = {
    if (s3Client.doesBucketExistV2(bucket)) {
      val props = new Properties()
      props.load(s3Client.getObject(bucket, s3Key).getObjectContent)
      immutable.Map(props.asScala.toSeq: _*)
    } else {
      Map.empty
    }
  }

  private def saveToS3(contents: Map[String, String]): Unit = {
    if (!s3Client.doesBucketExistV2(bucket)) {
      s3Client.createBucket(bucket)
    }

    val props = new Properties()
    props.putAll(contents.asJava)
    val out = new ByteArrayOutputStream()
    props.store(out, "")
    out.flush()
    out.close()
    val in = new ByteArrayInputStream(out.toByteArray)
    s3Client.putObject(bucket, s3Key, in, new ObjectMetadata())
  }
} 
Example 142
Source File: IncrementalCache.scala    From sbt-idea-plugin   with Apache License 2.0 5 votes vote down vote up
package org.jetbrains.sbtidea.packaging.artifact

import java.io.{BufferedOutputStream, ByteArrayInputStream, ObjectInputStream, ObjectOutputStream}
import java.nio.file.{Files, Path}

import sbt.Keys.TaskStreams

import scala.collection.mutable

trait IncrementalCache extends AutoCloseable {
  def fileChanged(in: Path): Boolean
}

class DumbIncrementalCache extends IncrementalCache {
  override def fileChanged(in: Path): Boolean = true
  override def close(): Unit = ()
}

class PersistentIncrementalCache(private val root: Path)(implicit private val streams: TaskStreams) extends IncrementalCache {

  private val FILENAME = "sbtidea.cache"
  private val myFile   = root.resolve(FILENAME)
  private val myData   = loadOrCreate()

  type Data = mutable.HashMap[String, Long]

  private def loadFromDisk(): Either[String, Data] = {
    if (!Files.exists(myFile) || Files.size(myFile) <= 0)
      return Left("Cache file is empty or doesn't exist")
    val data = Files.readAllBytes(myFile)
    using(new ObjectInputStream(new ByteArrayInputStream(data))) { stream =>
      Right(stream.readObject().asInstanceOf[Data])
    }
  }

  private def loadOrCreate(): Data = loadFromDisk() match {
    case Left(message) =>
      streams.log.info(message)
      new Data()
    case Right(value) => value
  }

  private def saveToDisk(): Unit = {
    import java.nio.file.StandardOpenOption._
    if (!Files.exists(myFile.getParent)) {
      Files.createDirectories(myFile.getParent)
      Files.createFile(myFile)
    }
    using(new ObjectOutputStream(
          new BufferedOutputStream(
            Files.newOutputStream(myFile, CREATE, WRITE, TRUNCATE_EXISTING)))) { stream =>
      stream.writeObject(myData)
    }
  }

  override def close(): Unit = saveToDisk()

  override def fileChanged(in: Path): Boolean = {
    val newTimestamp = Files.getLastModifiedTime(in).toMillis
    val inStr = in.toString
    val lastTimestamp = myData.getOrElseUpdate(inStr, newTimestamp)
    val result = newTimestamp > lastTimestamp
    myData.put(inStr, newTimestamp)
    result
  }
} 
Example 143
Source File: TableRowJsonIOTest.scala    From ratatool   with Apache License 2.0 5 votes vote down vote up
package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.scalacheck._
import org.scalacheck.Gen
import scala.jdk.CollectionConverters._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class TableRowJsonIOTest extends AnyFlatSpec with Matchers {

  
  private def floatGen = Gen.choose[Float](0.0F, 1.0F)

  private val schema = Schemas.tableSchema
  private val data = Gen.listOfN(100,
    tableRowOf(schema)
      .amend(Gen.oneOf(
        Gen.const(null),
        floatGen
      ))(_.getRecord("nullable_fields").set("float_field"))
      .amend(floatGen)(_.getRecord("required_fields").set("float_field"))
      .amend(Gen.nonEmptyListOf(floatGen)
        .map(_.asJava)
      )(_.getRecord("repeated_fields").set("float_field"))
  ).sample.get

  "TableRowJsonIO" should "work with stream" in {
    val out = new ByteArrayOutputStream()
    TableRowJsonIO.writeToOutputStream(data, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = TableRowJsonIO.readFromInputStream(in).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

  it should "work with file" in {
    val file = File.createTempFile("ratatool-", ".json")
    file.deleteOnExit()
    TableRowJsonIO.writeToFile(data, file)
    val result = TableRowJsonIO.readFromFile(file).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

} 
Example 144
Source File: TestHelper.scala    From odsc-west-streaming-trends   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.StandardCharsets

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import com.twilio.open.protocol.Calls.CallEvent
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

  def asMockKafkaDataFrame(event: CallEvent): MockKafkaDataFrame = {
    val key = event.getEventId.getBytes(StandardCharsets.UTF_8)
    val value = event.toByteArray
    MockKafkaDataFrame(key, value)
  }

}

case class MockKafkaDataFrame(key: Array[Byte], value: Array[Byte])


@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

} 
Example 145
Source File: Serialization.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization 
Example 146
Source File: GoogleDriveClient.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperations.readwritedataframe.googlestorage

import java.io.{ByteArrayInputStream, FileOutputStream}
import java.util

import com.google.api.client.googleapis.auth.oauth2.GoogleCredential
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport
import com.google.api.client.http.FileContent
import com.google.api.client.json.gson.GsonFactory
import com.google.api.services.drive.model.File
import com.google.api.services.drive.{Drive, DriveScopes}

import ai.deepsense.commons.resources.ManagedResource
import ai.deepsense.commons.utils.LoggerForCallerClass
import ai.deepsense.deeplang.doperations.inout.CsvParameters.ColumnSeparatorChoice

private[googlestorage] object GoogleDriveClient {

  val logger = LoggerForCallerClass()

  val googleSheetCsvSeparator = ColumnSeparatorChoice.Comma()

  private val ApplicationName = "Seahorse"

  private val Scopes = util.Arrays.asList(DriveScopes.DRIVE)

  def uploadCsvFileAsGoogleSheet(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val fileMetadata = new File().setMimeType("application/vnd.google-apps.spreadsheet")
    val mediaContent = new FileContent("text/csv", new java.io.File(filePath))

    driveService(credentials).files.update(sheetId, fileMetadata, mediaContent).execute
  }

  def downloadGoogleSheetAsCsvFile(
      credentials: GoogleCretendialsJson,
      sheetId: GoogleSheetId,
      filePath: String
    ): Unit = {
    val file = new java.io.File(filePath)
    file.getParentFile.mkdirs()

    ManagedResource(new FileOutputStream(file)) { fos =>
      driveService(credentials).files().export(sheetId, "text/csv").executeMediaAndDownloadTo(fos)
      logger.info(s"Downloaded google sheet id=$sheetId to the file $filePath")
    }
  }

  private def driveService(serviceAccountCredentials: String): Drive = {
    val credential = {
      val in = new ByteArrayInputStream(serviceAccountCredentials.getBytes)
      GoogleCredential.fromStream(in).createScoped(Scopes)
    }
    new Drive.Builder(
      GoogleNetHttpTransport.newTrustedTransport(),
      jsonFactory,
      credential
    ).setApplicationName(ApplicationName).build
  }

  // Default choice is JacksonFactory. However spark depends on Jackson as well
  // and google/spark jackson versions are binary incompatible with each other.
  private val jsonFactory = GsonFactory.getDefaultInstance

} 
Example 147
Source File: PythonNotebook.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperations

import java.io.ByteArrayInputStream

import ai.deepsense.commons.utils.Version
import ai.deepsense.deeplang.DOperation.Id
import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.reflect.runtime.{universe => ru}
import scala.util.Failure

import ai.deepsense.commons.rest.client.NotebookRestClient

case class PythonNotebook()
  extends Notebook {

  override val id: Id = "e76ca616-0322-47a5-b390-70c9668265dd"
  override val name: String = "Python Notebook"
  override val description: String = "Creates a Python notebook with access to the DataFrame"

  override val since: Version = Version(1, 0, 0)
  override val notebookType: String = "python"

  override protected def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = {
    context.dataFrameStorage.setInputDataFrame(0, dataFrame.sparkDataFrame)
    headlessExecution(context)
  }

} 
Example 148
Source File: JavaSerde.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.core.serde

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}

import akka.actor.ExtendedActorSystem
import akka.serialization.JavaSerializer
import akka.util.ClassLoaderObjectInputStream

class JavaSerde(system: ExtendedActorSystem) extends Serde[AnyRef] {

  override def identifier: Int = 101

  override def close(): Unit = ()

  override def fromBytes(bytes: Array[Byte]): AnyRef = {
    val in = new ClassLoaderObjectInputStream(system.dynamicAccess.classLoader, new ByteArrayInputStream(bytes))
    val obj = JavaSerializer.currentSystem.withValue(system) { in.readObject }
    in.close()
    obj
  }

  override def toBytes(o: AnyRef): Array[Byte] = {
    val bos = new ByteArrayOutputStream
    val out = new ObjectOutputStream(bos)
    JavaSerializer.currentSystem.withValue(system) { out.writeObject(o) }
    out.close()
    bos.toByteArray
  }

} 
Example 149
Source File: SeqSerde.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SeqSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Seq[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 141

  override def close(): Unit = ()

  override protected def fromBytes(bytes: Array[Byte]): Seq[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toList
    di.close()
    result
  }

  override def toBytes(seq: Seq[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(seq.size)
    for (a: Any <- seq) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }
} 
Example 150
Source File: SetSerde.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SetSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Set[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 142

  override protected def fromBytes(bytes: Array[Byte]): Set[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toSet
    di.close()
    result
  }

  override def toBytes(set: Set[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(set.size)
    for (a: Any <- set) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }

  override def close() = ()
} 
Example 151
Source File: S3DiffUploader.scala    From shield   with MIT License 5 votes vote down vote up
package shield.aws

import java.io.{ByteArrayInputStream, InputStream}
import java.nio.charset.StandardCharsets

import akka.actor.{Actor, ActorLogging, Props}
import com.amazonaws.auth.profile.ProfileCredentialsProvider
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ObjectMetadata
import shield.actors.listeners.ComparisonDiffFile

object S3DiffUploader{
  def props(bucket: String, folder: String) : Props = Props(new S3DiffUploader(bucket, folder))
}

class S3DiffUploader(bucket: String, folder: String)  extends Actor with ActorLogging {
  val s3Client = new AmazonS3Client()
  val charset = StandardCharsets.UTF_8
  val stripped = folder.stripPrefix("/").stripSuffix("/")
  val prefix = if (stripped.isEmpty) {
    stripped
  } else {
    stripped + "/"
  }

  def receive = {
    case file: ComparisonDiffFile =>
      val metadata = new ObjectMetadata()
      metadata.setContentLength(file.contents.length)
      s3Client.putObject(bucket, s"$prefix${file.fileName}", new ByteArrayInputStream(file.contents), metadata)
  }
} 
Example 152
Source File: VinylRequest.scala    From vinyldns   with Apache License 2.0 5 votes vote down vote up
package models

import java.io.{ByteArrayInputStream, InputStream}
import java.util

import com.amazonaws.{ReadLimitInfo, SignableRequest}
import com.amazonaws.http.HttpMethodName

object VinylDNSRequest {
  val APPLICATION_JSON = "application/json"
}

case class VinylDNSRequest(
    method: String,
    url: String,
    path: String = "",
    payload: Option[String] = None,
    parameters: util.HashMap[String, java.util.List[String]] =
      new util.HashMap[String, java.util.List[String]]()
)

class SignableVinylDNSRequest(origReq: VinylDNSRequest) extends SignableRequest[VinylDNSRequest] {

  import VinylDNSRequest._

  val contentType: String = APPLICATION_JSON

  private val headers = new util.HashMap[String, String]()
  private val parameters = origReq.parameters
  private val uri = new java.net.URI(origReq.url)
  // I hate to do this, but need to be able to set the content after creation to
  // implement the interface properly
  private var contentStream: InputStream = new ByteArrayInputStream(
    origReq.payload.getOrElse("").getBytes("UTF-8")
  )

  override def addHeader(name: String, value: String): Unit = headers.put(name, value)
  override def getHeaders: java.util.Map[String, String] = headers
  override def getResourcePath: String = origReq.path
  override def addParameter(name: String, value: String): Unit = {
    if (!parameters.containsKey(name)) parameters.put(name, new util.ArrayList[String]())
    parameters.get(name).add(value)
  }
  override def getParameters: java.util.Map[String, java.util.List[String]] = parameters
  override def getEndpoint: java.net.URI = uri
  override def getHttpMethod: HttpMethodName = HttpMethodName.valueOf(origReq.method)
  override def getTimeOffset: Int = 0
  override def getContent: InputStream = contentStream
  override def getContentUnwrapped: InputStream = getContent
  override def getReadLimitInfo: ReadLimitInfo = new ReadLimitInfo {
    override def getReadLimit: Int = -1
  }
  override def getOriginalRequestObject: Object = origReq
  override def setContent(content: InputStream): Unit = contentStream = content
} 
Example 153
Source File: GenericAvroSerializerSuite.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {//模式压缩与解压缩
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {//记录序列化和反序列化
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }
  //使用模式指纹以减少信息大小
  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {//缓存之前模式
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedScheam = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedScheam.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
} 
Example 154
Source File: Unpacker.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}
import java.nio.ByteBuffer
import java.util.zip.GZIPInputStream

import com.expedia.open.tracing.buffer.SpanBuffer
import com.github.luben.zstd.ZstdInputStream
import org.apache.commons.io.IOUtils
import org.json4s.jackson.Serialization
import org.xerial.snappy.SnappyInputStream

object Unpacker {
  import PackedMessage._

  private def readMetadata(packedDataBytes: Array[Byte]): Array[Byte] = {
    val byteBuffer = ByteBuffer.wrap(packedDataBytes)
    val magicBytesExist = MAGIC_BYTES.indices forall { idx => byteBuffer.get() == MAGIC_BYTES.apply(idx) }
    if (magicBytesExist) {
      val headerLength = byteBuffer.getInt
      val metadataBytes = new Array[Byte](headerLength)
      byteBuffer.get(metadataBytes, 0, headerLength)
      metadataBytes
    } else {
      null
    }
  }

  private def unpack(compressedStream: InputStream) = {
    val outputStream = new ByteArrayOutputStream()
    IOUtils.copy(compressedStream, outputStream)
    outputStream.toByteArray
  }

  def readSpanBuffer(packedDataBytes: Array[Byte]): SpanBuffer = {
    var parsedDataBytes: Array[Byte] = null
    val metadataBytes = readMetadata(packedDataBytes)
    if (metadataBytes != null) {
      val packedMetadata = Serialization.read[PackedMetadata](new String(metadataBytes))
      val compressedDataOffset = MAGIC_BYTES.length + 4 + metadataBytes.length
      packedMetadata.t match {
        case PackerType.SNAPPY =>
          parsedDataBytes = unpack(
            new SnappyInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.GZIP =>
          parsedDataBytes = unpack(
            new GZIPInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.ZSTD =>
          parsedDataBytes = unpack(
            new ZstdInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case _ =>
          return SpanBuffer.parseFrom(
            new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))
      }
    } else {
      parsedDataBytes = packedDataBytes
    }
    SpanBuffer.parseFrom(parsedDataBytes)
  }
} 
Example 155
Source File: Packer.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStream}
import java.util.zip.GZIPOutputStream

import com.expedia.www.haystack.trace.commons.packer.PackerType.PackerType
import com.github.luben.zstd.ZstdOutputStream
import com.google.protobuf.GeneratedMessageV3
import org.apache.commons.io.IOUtils
import org.xerial.snappy.SnappyOutputStream

object PackerType extends Enumeration {
  type PackerType = Value
  val GZIP, SNAPPY, NONE, ZSTD = Value
}

case class PackedMetadata(t: PackerType)

abstract class Packer[T <: GeneratedMessageV3] {
  val packerType: PackerType

  protected def compressStream(stream: OutputStream): OutputStream

  private def pack(protoObj: T): Array[Byte] = {
    val outStream = new ByteArrayOutputStream
    val compressedStream = compressStream(outStream)
    if (compressedStream != null) {
      IOUtils.copy(new ByteArrayInputStream(protoObj.toByteArray), compressedStream)
      compressedStream.close() // this flushes the data to final outStream
      outStream.toByteArray
    } else {
      protoObj.toByteArray
    }
  }

  def apply(protoObj: T): PackedMessage[T] = {
    PackedMessage(protoObj, pack, PackedMetadata(packerType))
  }
}

class NoopPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.NONE
  override protected def compressStream(stream: OutputStream): OutputStream = null
}

class SnappyPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.SNAPPY
  override protected def compressStream(stream: OutputStream): OutputStream = new SnappyOutputStream(stream)
}


class ZstdPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.ZSTD
  override protected def compressStream(stream: OutputStream): OutputStream = new ZstdOutputStream(stream)
}

class GzipPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.GZIP
  override protected def compressStream(stream: OutputStream): OutputStream = new GZIPOutputStream(stream)
} 
Example 156
Source File: ParquetIOTest.scala    From ratatool   with Apache License 2.0 5 votes vote down vote up
package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.nio.file.Files

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import com.spotify.ratatool.scalacheck._
import org.apache.commons.io.FileUtils
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ParquetIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "ParquetIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream(in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(genericData, genericSchema, file)
    val result = ParquetIO.readFromFile(file).toList
    result should equal (genericData)
    FileUtils.deleteDirectory(dir.toFile)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(specificData, specificSchema, file)
    val result = ParquetIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
    FileUtils.deleteDirectory(dir.toFile)
  }

} 
Example 157
Source File: AvroIOTest.scala    From ratatool   with Apache License 2.0 5 votes vote down vote up
package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import org.apache.avro.generic.GenericRecord
import com.spotify.ratatool.scalacheck._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class AvroIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "AvroIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[GenericRecord](in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(genericData, genericSchema, file)
    val result = AvroIO.readFromFile[GenericRecord](file).toList
    result should equal (genericData)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(specificData, specificSchema, file)
    val result = AvroIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }
} 
Example 158
Source File: MockHelpers.scala    From guardrail   with MIT License 5 votes vote down vote up
package helpers

import com.fasterxml.jackson.databind.ObjectMapper
import io.netty.handler.codec.http.EmptyHttpHeaders
import java.io.ByteArrayInputStream
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import java.util.Collections
import java.util.concurrent.CompletableFuture
import javax.ws.rs.container.AsyncResponse
import org.asynchttpclient.Response
import org.asynchttpclient.uri.Uri
import org.mockito.{ ArgumentMatchersSugar, MockitoSugar }
import org.scalatest.Assertions
import scala.reflect.ClassTag

object MockHelpers extends Assertions with MockitoSugar with ArgumentMatchersSugar {
  def mockAsyncResponse[T](future: CompletableFuture[T])(implicit cls: ClassTag[T]): AsyncResponse = {
    val asyncResponse = mock[AsyncResponse]

    when(asyncResponse.resume(any[T])) thenAnswer [AnyRef] { response =>
      response match {
        case t: Throwable => future.completeExceptionally(t)
        case other: T     => future.complete(other)
        case other        => fail(s"AsyncResponse.resume expected an object of type ${cls.runtimeClass.getName}, but got ${other.getClass.getName} instead")
      }
    }

    asyncResponse
  }

  def mockAHCResponse[T](uri: String, status: Int, maybeBody: Option[T] = None)(implicit mapper: ObjectMapper): Response = {
    val response = mock[Response]
    when(response.getUri) thenReturn Uri.create(uri)
    when(response.hasResponseStatus) thenReturn true
    when(response.getStatusCode) thenReturn status
    when(response.getStatusText) thenReturn "Some Status"
    when(response.hasResponseHeaders) thenReturn true
    when(response.getHeaders) thenReturn EmptyHttpHeaders.INSTANCE
    when(response.getHeader(any)) thenReturn null
    when(response.getHeaders(any)) thenReturn Collections.emptyList()
    maybeBody match {
      case None =>
        when(response.hasResponseBody) thenReturn true
      case Some(body) =>
        val responseBytes = mapper.writeValueAsBytes(body)
        val responseStr   = new String(responseBytes, StandardCharsets.UTF_8)
        when(response.hasResponseBody) thenReturn true
        when(response.getResponseBody(any)) thenReturn responseStr
        when(response.getResponseBody) thenReturn responseStr
        when(response.getResponseBodyAsStream) thenReturn new ByteArrayInputStream(responseBytes)
        when(response.getResponseBodyAsByteBuffer) thenReturn ByteBuffer.wrap(responseBytes)
        when(response.getResponseBodyAsBytes) thenReturn responseBytes
    }
    response
  }

} 
Example 159
Source File: MeetupReceiver.scala    From meetup-stream   with Apache License 2.0 5 votes vote down vote up
package receiver

import org.apache.spark.streaming.receiver.Receiver
import org.apache.spark.storage.StorageLevel
import org.apache.spark.Logging
import com.ning.http.client.AsyncHttpClientConfig
import com.ning.http.client._
import scala.collection.mutable.ArrayBuffer
import java.io.OutputStream
import java.io.ByteArrayInputStream
import java.io.InputStreamReader
import java.io.BufferedReader
import java.io.InputStream
import java.io.PipedInputStream
import java.io.PipedOutputStream

class MeetupReceiver(url: String) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging {
  
  @transient var client: AsyncHttpClient = _
  
  @transient var inputPipe: PipedInputStream = _
  @transient var outputPipe: PipedOutputStream = _  
       
  def onStart() {    
    val cf = new AsyncHttpClientConfig.Builder()
    cf.setRequestTimeout(Integer.MAX_VALUE)
    cf.setReadTimeout(Integer.MAX_VALUE)
    cf.setPooledConnectionIdleTimeout(Integer.MAX_VALUE)      
    client= new AsyncHttpClient(cf.build())
    
    inputPipe = new PipedInputStream(1024 * 1024)
    outputPipe = new PipedOutputStream(inputPipe)
    val producerThread = new Thread(new DataConsumer(inputPipe))
    producerThread.start()
    
    client.prepareGet(url).execute(new AsyncHandler[Unit]{
        
      def onBodyPartReceived(bodyPart: HttpResponseBodyPart) = {
        bodyPart.writeTo(outputPipe)
        AsyncHandler.STATE.CONTINUE        
      }
      
      def onStatusReceived(status: HttpResponseStatus) = {
        AsyncHandler.STATE.CONTINUE
      }
      
      def onHeadersReceived(headers: HttpResponseHeaders) = {
        AsyncHandler.STATE.CONTINUE
      }
            
      def onCompleted = {
        println("completed")
      }
      
      
      def onThrowable(t: Throwable)={
        t.printStackTrace()
      }
        
    })    
    
    
  }

  def onStop() {
    if (Option(client).isDefined) client.close()
    if (Option(outputPipe).isDefined) {
     outputPipe.flush()
     outputPipe.close() 
    }
    if (Option(inputPipe).isDefined) {
     inputPipe.close() 
    }    
  }
  
  class DataConsumer(inputStream: InputStream) extends Runnable 
  {
       
      override
      def run()
      {        
        val bufferedReader = new BufferedReader( new InputStreamReader( inputStream ))
        var input=bufferedReader.readLine()
        while(input!=null){          
          store(input)
          input=bufferedReader.readLine()
        }            
      }  
      
  }

} 
Example 160
Source File: GithubIssue485.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.CPWrapper
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import shapeless.Coproduct

class GithubIssue485 extends AnyFunSuite with Matchers {

  test("Serializable Coproduct Decoder #485") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[CPWrapper])
    oos.close()

    val decoder =
      new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)).readObject().asInstanceOf[Decoder[CPWrapper]]

    val schema = AvroSchema[CPWrapper]
    val record = new GenericData.Record(schema)
    record.put("u", new Utf8("wibble"))
    decoder.decode(record) shouldBe CPWrapper(Coproduct[CPWrapper.ISBG]("wibble"))
  }
} 
Example 161
Source File: GithubIssue484.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.ScalaEnumClass
import com.sksamuel.avro4s.schema.Colours
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.generic.GenericData.EnumSymbol
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class GithubIssue484 extends AnyFunSuite with Matchers {

  test("Serializable Scala Enum Decoder #484") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[ScalaEnumClass])
    oos.close()

    val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
      .readObject()
      .asInstanceOf[Decoder[ScalaEnumClass]]

    val schema = AvroSchema[ScalaEnumClass]
    val record = new GenericData.Record(schema)
    record.put("colour", new EnumSymbol(schema.getField("colour").schema(), "Green"))
    decoder.decode(record) shouldBe ScalaEnumClass(Colours.Green)
  }
} 
Example 162
Source File: GithubIssue432.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.Encoder
import org.scalatest.{FunSuite, Matchers}

class GithubIssue432 extends FunSuite with Matchers {

  test("Serializable Encoder[BigDecimal] #432") {
    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()
  }

  test("Deserialized Encoder[BigDecimal] works") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()

    val ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
    val encoder = ois.readObject().asInstanceOf[Encoder[BigDecimal]]

    encoder.encode(12.34)
  }
} 
Example 163
Source File: SchemaEvolutionTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.record.decoder

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.sksamuel.avro4s._
import org.apache.avro.SchemaBuilder
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class SchemaEvolutionTest extends AnyFunSuite with Matchers {

  case class Version1(original: String)
  case class Version2(@AvroAlias("original") renamed: String)

  case class P1(name: String, age: Int = 18)
  case class P2(name: String)

  case class OptionalStringTest(a: String, b: Option[String])
  case class DefaultStringTest(a: String, b: String = "foo")

  ignore("@AvroAlias should be used when a reader schema has a field missing from the write schema") {

    val v1schema = AvroSchema[Version1]
    val v1 = Version1("hello")
    val baos = new ByteArrayOutputStream()
    val output = AvroOutputStream.data[Version1].to(baos).build()
    output.write(v1)
    output.close()

    // we load using a v2 schema
    val is = new AvroDataInputStream[Version2](new ByteArrayInputStream(baos.toByteArray), Some(v1schema))
    val v2 = is.iterator.toList.head

    v2.renamed shouldBe v1.original
  }

  test("when decoding, if the record and schema are missing a field and the target has a scala default, use that") {

    val f1 = RecordFormat[P1]
    val f2 = RecordFormat[P2]

    f1.from(f2.to(P2("foo"))) shouldBe P1("foo")
  }

  test("when decoding, if the record is missing a field that is present in the schema with a default, use the default from the schema") {
    val schema = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val record = new GenericData.Record(schema)
    record.put("a", new Utf8("hello"))
    Decoder[DefaultStringTest].decode(record) shouldBe DefaultStringTest("hello")
  }

  test("when decoding, if the record is missing a field that is present in the schema and the type is option, then set to None") {
    val schema1 = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val schema2 = SchemaBuilder.record("foo").fields().requiredString("a").optionalString("b").endRecord()
    val record = new GenericData.Record(schema1)
    record.put("a", new Utf8("hello"))
    Decoder[OptionalStringTest].decode(record) shouldBe OptionalStringTest("hello", None)
  }
} 
Example 164
Source File: ProtoMarshaller.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
class ProtoMarshaller[T <: com.google.protobuf.Message](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
} 
Example 165
Source File: Gzip.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import java.util.zip.{ GZIPInputStream, GZIPOutputStream }

import akka.util.ByteString

object Gzip extends Codec {
  override val name: String = "gzip"

  override def compress(uncompressed: ByteString): ByteString = {
    val baos = new ByteArrayOutputStream(uncompressed.size)
    val gzos = new GZIPOutputStream(baos)
    gzos.write(uncompressed.toArray)
    gzos.flush()
    gzos.close()
    ByteString(baos.toByteArray)
  }

  override def uncompress(compressed: ByteString): ByteString = {
    val gzis = new GZIPInputStream(new ByteArrayInputStream(compressed.toArray))

    val baos = new ByteArrayOutputStream(compressed.size)
    val buffer = new Array[Byte](32 * 1024)
    var read = gzis.read(buffer)
    while (read != -1) {
      baos.write(buffer, 0, read)
      read = gzis.read(buffer)
    }
    ByteString(baos.toByteArray)
  }
} 
Example 166
Source File: Marshaller.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
final class Marshaller[T <: scalapb.GeneratedMessage](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
} 
Example 167
Source File: SslContexts.scala    From kubernetes-client   with Apache License 2.0 5 votes vote down vote up
package com.goyeau.kubernetes.client.util
import java.io.{ByteArrayInputStream, File, FileInputStream, InputStreamReader}
import java.security.cert.{CertificateFactory, X509Certificate}
import java.security.{KeyStore, SecureRandom, Security}
import java.util.Base64

import com.goyeau.kubernetes.client.KubeConfig
import javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory}
import org.bouncycastle.jce.provider.BouncyCastleProvider
import org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter
import org.bouncycastle.openssl.{PEMKeyPair, PEMParser}

object SslContexts {
  private val TrustStoreSystemProperty         = "javax.net.ssl.trustStore"
  private val TrustStorePasswordSystemProperty = "javax.net.ssl.trustStorePassword"
  private val KeyStoreSystemProperty           = "javax.net.ssl.keyStore"
  private val KeyStorePasswordSystemProperty   = "javax.net.ssl.keyStorePassword"

  def fromConfig(config: KubeConfig): SSLContext = {
    val sslContext = SSLContext.getInstance("TLS")
    sslContext.init(keyManagers(config), trustManagers(config), new SecureRandom)
    sslContext
  }

  private def keyManagers(config: KubeConfig) = {
    // Client certificate
    val certDataStream = config.clientCertData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data)))
    val certFileStream = config.clientCertFile.map(new FileInputStream(_))

    // Client key
    val keyDataStream = config.clientKeyData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data)))
    val keyFileStream = config.clientKeyFile.map(new FileInputStream(_))

    for {
      keyStream  <- keyDataStream.orElse(keyFileStream)
      certStream <- certDataStream.orElse(certFileStream)
    } yield {
      Security.addProvider(new BouncyCastleProvider())
      val pemKeyPair =
        new PEMParser(new InputStreamReader(keyStream)).readObject().asInstanceOf[PEMKeyPair] // scalafix:ok
      val privateKey = new JcaPEMKeyConverter().setProvider("BC").getPrivateKey(pemKeyPair.getPrivateKeyInfo)

      val certificateFactory = CertificateFactory.getInstance("X509")
      val certificate        = certificateFactory.generateCertificate(certStream).asInstanceOf[X509Certificate] // scalafix:ok

      defaultKeyStore.setKeyEntry(
        certificate.getSubjectX500Principal.getName,
        privateKey,
        config.clientKeyPass.fold(Array.empty[Char])(_.toCharArray),
        Array(certificate)
      )
    }

    val keyManagerFactory = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm)
    keyManagerFactory.init(defaultKeyStore, Array.empty)
    keyManagerFactory.getKeyManagers
  }

  private lazy val defaultKeyStore = {
    val propertyKeyStoreFile =
      Option(System.getProperty(KeyStoreSystemProperty, "")).filter(_.nonEmpty).map(new File(_))

    val keyStore = KeyStore.getInstance(KeyStore.getDefaultType)
    keyStore.load(
      propertyKeyStoreFile.map(new FileInputStream(_)).orNull,
      System.getProperty(KeyStorePasswordSystemProperty, "").toCharArray
    )
    keyStore
  }

  private def trustManagers(config: KubeConfig) = {
    val certDataStream = config.caCertData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data)))
    val certFileStream = config.caCertFile.map(new FileInputStream(_))

    certDataStream.orElse(certFileStream).foreach { certStream =>
      val certificateFactory = CertificateFactory.getInstance("X509")
      val certificate        = certificateFactory.generateCertificate(certStream).asInstanceOf[X509Certificate] // scalafix:ok
      defaultTrustStore.setCertificateEntry(certificate.getSubjectX500Principal.getName, certificate)
    }

    val trustManagerFactory = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
    trustManagerFactory.init(defaultTrustStore)
    trustManagerFactory.getTrustManagers
  }

  private lazy val defaultTrustStore = {
    val securityDirectory = s"${System.getProperty("java.home")}/lib/security"

    val propertyTrustStoreFile =
      Option(System.getProperty(TrustStoreSystemProperty, "")).filter(_.nonEmpty).map(new File(_))
    val jssecacertsFile = Option(new File(s"$securityDirectory/jssecacerts")).filter(f => f.exists && f.isFile)
    val cacertsFile     = new File(s"$securityDirectory/cacerts")

    val keyStore = KeyStore.getInstance(KeyStore.getDefaultType)
    keyStore.load(
      new FileInputStream(propertyTrustStoreFile.orElse(jssecacertsFile).getOrElse(cacertsFile)),
      System.getProperty(TrustStorePasswordSystemProperty, "changeit").toCharArray
    )
    keyStore
  }
} 
Example 168
Source File: ScaleAndConvert.scala    From SparkNet   with MIT License 5 votes vote down vote up
package preprocessing

import java.awt.image.DataBufferByte
import java.io.ByteArrayInputStream
import javax.imageio.ImageIO

import scala.collection.mutable.ArrayBuffer
import scala.collection.JavaConversions._
import net.coobird.thumbnailator._

import org.apache.spark.rdd.RDD

import libs._

object ScaleAndConvert {
  def BufferedImageToByteArray(image: java.awt.image.BufferedImage) : Array[Byte] = {
    val height = image.getHeight()
    val width = image.getWidth()
    val pixels = image.getRGB(0, 0, width, height, null, 0, width)
    val result = new Array[Byte](3 * height * width)
    var row = 0
    while (row < height) {
      var col = 0
      while (col < width) {
        val rgb = pixels(row * width + col)
        result(0 * height * width + row * width + col) = ((rgb >> 16) & 0xFF).toByte
        result(1 * height * width + row * width + col) = ((rgb >> 8) & 0xFF).toByte
        result(2 * height * width + row * width + col) = (rgb & 0xFF).toByte
        col += 1
      }
      row += 1
    }
    result
  }

  def decompressImageAndResize(compressedImage: Array[Byte], height: Int, width: Int) : Option[Array[Byte]] = {
    // this method takes a JPEG, decompresses it, and resizes it
    try {
      val im = ImageIO.read(new ByteArrayInputStream(compressedImage))
      val resizedImage = Thumbnails.of(im).forceSize(width, height).asBufferedImage()
      Some(BufferedImageToByteArray(resizedImage))
    } catch {
      // If images can't be processed properly, just ignore them
      case e: java.lang.IllegalArgumentException => None
      case e: javax.imageio.IIOException => None
      case e: java.lang.NullPointerException => None
    }
  }
} 
Example 169
Source File: TestingTypedCount.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
} 
Example 170
Source File: CreateJacksonParser.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.json

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text

import org.apache.spark.unsafe.types.UTF8String

private[sql] object CreateJacksonParser extends Serializable {
  def string(jsonFactory: JsonFactory, record: String): JsonParser = {
    jsonFactory.createParser(record)
  }

  def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = {
    val bb = record.getByteBuffer
    assert(bb.hasArray)

    val bain = new ByteArrayInputStream(
      bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
  }

  def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
    jsonFactory.createParser(record.getBytes, 0, record.getLength)
  }

  def inputStream(jsonFactory: JsonFactory, record: InputStream): JsonParser = {
    jsonFactory.createParser(record)
  }
} 
Example 171
Source File: GenericAvroSerializerSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}