java.io.ByteArrayOutputStream Scala Example

Source File: byte_message.scala From libisabelle with Apache License 2.0

5 votes

package isabelle

import java.io.{ByteArrayOutputStream, OutputStream, InputStream, IOException}


object Byte_Message
{
  

  private def is_length(msg: Bytes): Boolean =
    !msg.is_empty && msg.iterator.forall(b => Symbol.is_ascii_digit(b.toChar))

  private def is_terminated(msg: Bytes): Boolean =
  {
    val len = msg.length
    len > 0 && Symbol.is_ascii_line_terminator(msg.charAt(len - 1))
  }

  def write_line_message(stream: OutputStream, msg: Bytes)
  {
    if (is_length(msg) || is_terminated(msg))
      error ("Bad content for line message:\n" ++ msg.text.take(100))

    val n = msg.length
    write(stream,
      (if (n > 100 || msg.iterator.contains(10)) make_header(List(n + 1)) else Nil) :::
        List(msg, Bytes.newline))
    flush(stream)
  }

  def read_line_message(stream: InputStream): Option[Bytes] =
    read_line(stream) match {
      case None => None
      case Some(line) =>
        Value.Nat.unapply(line.text) match {
          case None => Some(line)
          case Some(n) => read_block(stream, n)._1.map(_.trim_line)
        }
    }
}

Source File: AvroConverter.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.converters.sink

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import io.confluent.connect.avro.AvroData
import java.io.ByteArrayOutputStream
import java.io.File
import org.apache.avro.{Schema => AvroSchema}
import org.apache.avro.generic.GenericRecord
import org.apache.avro.io.EncoderFactory
import org.apache.avro.reflect.ReflectDatumWriter
import org.apache.kafka.connect.sink.SinkRecord
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException


class AvroConverter extends Converter {
  private val avroData = new AvroData(8)
  private var sinkToSchemaMap: Map[String, AvroSchema] = Map.empty
  private var avroWritersMap: Map[String, ReflectDatumWriter[Object]] = Map.empty

  override def convert(sinkTopic: String,
                       data: SinkRecord): SinkRecord = {
    Option(data) match {
      case None =>
        new SinkRecord(
          sinkTopic,
          0,
          null,
          null,
          avroData.toConnectSchema(sinkToSchemaMap(sinkTopic)),
          null,
          0
        )
      case Some(_) =>
        val kafkaTopic = data.topic()
        val writer = avroWritersMap.getOrElse(kafkaTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $kafkaTopic"))

        val output = new ByteArrayOutputStream();
        val decoder = EncoderFactory.get().binaryEncoder(output, null)
        output.reset()

        val avro = avroData.fromConnectData(data.valueSchema(), data.value())
        avro.asInstanceOf[GenericRecord]

        val record = writer.write(avro, decoder)
        decoder.flush()
        val arr = output.toByteArray

        new SinkRecord(
          kafkaTopic,
          data.kafkaPartition(),
          MsgKey.schema,
          MsgKey.getStruct(sinkTopic, data.key().toString()),
          data.valueSchema(),
          arr,
          0
        )


    }
  }

  override def initialize(config: Map[String, String]): Unit = {
    sinkToSchemaMap = AvroConverter.getSchemas(config)
    avroWritersMap = sinkToSchemaMap.map { case (key, schema) =>
      key -> new ReflectDatumWriter[Object](schema)
    }
  }
}

object AvroConverter {
  val SCHEMA_CONFIG = "connect.converter.avro.schemas"

  def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = {
    config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided"))
      .toString
      .split(';')
      .filter(_.trim.nonEmpty)
      .map(_.split("="))
      .map {
        case Array(sink, path) =>
          val file = new File(path)
          if (!file.exists()) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!")
          }
          val s = sink.trim.toLowerCase()
          if (s.isEmpty) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path")
          }
          s -> new AvroSchema.Parser().parse(file)
        case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Sink->AVRO_FILE")
      }.toMap
  }
}

Source File: AvroSerializer.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.serialization

import java.io.{ByteArrayOutputStream, InputStream, OutputStream}

import com.sksamuel.avro4s.{RecordFormat, SchemaFor}
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}

object AvroSerializer {
  def write[T <: Product](t: T)(implicit os: OutputStream, formatter: RecordFormat[T], schemaFor: SchemaFor[T]): Unit = write(apply(t), schemaFor())

  def write(record: GenericRecord, schema: Schema)(implicit os: OutputStream) = {
    val writer = new GenericDatumWriter[GenericRecord](schema)
    val encoder = EncoderFactory.get().binaryEncoder(os, null)

    writer.write(record, encoder)
    encoder.flush()
    os.flush()
  }

  def getBytes[T <: Product](t: T)(implicit recordFormat: RecordFormat[T], schemaFor: SchemaFor[T]): Array[Byte] = getBytes(recordFormat.to(t), schemaFor())

  def getBytes(record: GenericRecord, schema: Schema): Array[Byte] = {
    implicit val output = new ByteArrayOutputStream()
    write(record, schema)
    output.toByteArray
  }

  def read(is: InputStream, schema: Schema): GenericRecord = {
    val reader = new GenericDatumReader[GenericRecord](schema)
    val decoder = DecoderFactory.get().binaryDecoder(is, null)
    reader.read(null, decoder)
  }

  def read[T <: Product](is: InputStream)(implicit schemaFor: SchemaFor[T], recordFormat: RecordFormat[T]): T = recordFormat.from(read(is, schemaFor()))

  def apply[T <: Product](t: T)(implicit formatter: RecordFormat[T]): GenericRecord = formatter.to(t)
}

Source File: FeaturePolygonTest.scala From spark-pip with Apache License 2.0

5 votes

package com.esri

import java.io.ByteArrayOutputStream

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import org.geotools.geometry.jts.WKTReader2
import org.scalatest._

import scala.io.Source


class FeaturePolygonTest extends FlatSpec with Matchers {

  it should "read zero area geometry" in {
    val kryo = new Kryo()
    kryo.register(classOf[FeaturePolygon])

    val reader = new WKTReader2()
    Source
      .fromFile("/tmp/world.tsv")
      .getLines()
      .foreach(line => {
        val tokens = line.split("\t")
        val geom = reader.read(tokens(14))
        FeaturePolygon(geom, Array.empty[String])
          .toRowCols(4.0)
          .foreach {
            case (rowcol, feature) => {
              feature.geom.getGeometryType should endWith("Polygon")

              val baos = new ByteArrayOutputStream(4096)
              val output = new Output(baos)
              kryo.writeObject(output, feature)
              output.flush()

              val obj = kryo.readObject[FeaturePolygon](new Input(baos.toByteArray), classOf[FeaturePolygon])
              obj.geom.equalsExact(feature.geom, 0.000001)
            }
          }
      })
  }
}

Source File: S3Brain.scala From sumobot with Apache License 2.0

5 votes

package com.sumologic.sumobot.brain

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Properties

import akka.actor.{Actor, Props}
import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider}
import com.amazonaws.services.s3.{AmazonS3Client, AmazonS3ClientBuilder}
import com.amazonaws.services.s3.model.ObjectMetadata
import com.sumologic.sumobot.brain.Brain._

import scala.collection.JavaConverters._
import scala.collection.immutable

object S3Brain {
  def props(credentials: AWSCredentials,
            bucket: String,
            s3Key: String): Props = Props(classOf[S3Brain], credentials, bucket, s3Key)
}

class S3Brain(credentials: AWSCredentials,
              bucket: String,
              s3Key: String) extends Actor {

  private val s3Client = AmazonS3ClientBuilder.standard()
    .withCredentials(new AWSStaticCredentialsProvider(credentials)).build

  private var brainContents: Map[String, String] = loadFromS3()

  override def receive: Receive = {
    case Store(key, value) =>
      brainContents += (key -> value)
      saveToS3(brainContents)

    case Remove(key) =>
      brainContents -= key
      saveToS3(brainContents)

    case Retrieve(key) =>
      brainContents.get(key) match {
        case Some(value) => sender() ! ValueRetrieved(key, value)
        case None => sender() ! ValueMissing(key)
      }

    case ListValues(prefix) =>
      sender() ! ValueMap(brainContents.filter(_._1.startsWith(prefix)))
  }

  private def loadFromS3(): Map[String, String] = {
    if (s3Client.doesBucketExistV2(bucket)) {
      val props = new Properties()
      props.load(s3Client.getObject(bucket, s3Key).getObjectContent)
      immutable.Map(props.asScala.toSeq: _*)
    } else {
      Map.empty
    }
  }

  private def saveToS3(contents: Map[String, String]): Unit = {
    if (!s3Client.doesBucketExistV2(bucket)) {
      s3Client.createBucket(bucket)
    }

    val props = new Properties()
    props.putAll(contents.asJava)
    val out = new ByteArrayOutputStream()
    props.store(out, "")
    out.flush()
    out.close()
    val in = new ByteArrayInputStream(out.toByteArray)
    s3Client.putObject(bucket, s3Key, in, new ObjectMetadata())
  }
}

Source File: Serialization.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization

Source File: IO.scala From RosHTTP with MIT License

5 votes

package fr.hmil.roshttp.tools.io

import java.io.{ByteArrayOutputStream, OutputStream, Writer, _}

import scala.annotation.tailrec
import scala.reflect.ClassTag


  def pipe(in: Reader, out: Writer): Unit = {
    val buffer = newBuffer[Char]

    @tailrec
    def loop(): Unit = {
      val size = in.read(buffer)
      if (size > 0) {
        out.write(buffer, 0, size)
        loop()
      }
    }
    loop()
  }

  @inline
  private def newBuffer[T: ClassTag] = new Array[T](4096)
}

Source File: XmlEncoder.scala From phobos with Apache License 2.0

5 votes

package ru.tinkoff.phobos.encoding

import java.io.ByteArrayOutputStream

import cats.syntax.option._
import com.fasterxml.aalto.stax.OutputFactoryImpl
import org.codehaus.stax2.XMLStreamWriter2
import ru.tinkoff.phobos.Namespace
import ru.tinkoff.phobos.encoding.XmlEncoder.XmlEncoderConfig


trait XmlEncoder[A] {
  val localname: String
  val namespaceuri: Option[String]
  val elementencoder: ElementEncoder[A]

  def encode(a: A, charset: String = "UTF-8"): String =
    new String(encodeToBytes(a, charset), charset)

  def encodeToBytes(a: A, charset: String = "UTF-8"): Array[Byte] = {
    val os      = new ByteArrayOutputStream
    val factory = new OutputFactoryImpl
    factory.setProperty("javax.xml.stream.isRepairingNamespaces", true)
    val sw = new PhobosStreamWriter(factory.createXMLStreamWriter(os, charset).asInstanceOf[XMLStreamWriter2])
    sw.writeStartDocument()
    elementencoder.encodeAsElement(a, sw, localname, namespaceuri)
    sw.writeEndDocument()
    sw.flush()
    sw.close()
    os.toByteArray
  }

  def encodeWithConfig(a: A, config: XmlEncoderConfig): String =
    new String(encodeToBytesWithConfig(a, config), config.encoding)

  def encodeToBytesWithConfig(a: A, config: XmlEncoderConfig): Array[Byte] = {
    val os      = new ByteArrayOutputStream
    val factory = new OutputFactoryImpl
    factory.setProperty("javax.xml.stream.isRepairingNamespaces", true)
    val sw = new PhobosStreamWriter(factory.createXMLStreamWriter(os, config.encoding).asInstanceOf[XMLStreamWriter2])
    if (config.writeProlog) {
      sw.writeStartDocument(config.encoding, config.version)
    }
    elementencoder.encodeAsElement(a, sw, localname, namespaceuri)
    if (config.writeProlog) {
      sw.writeEndDocument()
    }
    sw.flush()
    sw.close()
    os.toByteArray
  }

}

object XmlEncoder {

  def apply[A](implicit instance: XmlEncoder[A]): XmlEncoder[A] = instance

  def fromElementEncoder[A](localName: String, namespaceUri: Option[String])(
      implicit elementEncoder: ElementEncoder[A]): XmlEncoder[A] =
    new XmlEncoder[A] {
      val localname: String                 = localName
      val namespaceuri: Option[String]      = namespaceUri
      val elementencoder: ElementEncoder[A] = elementEncoder
    }

  def fromElementEncoder[A](localName: String)(implicit elementEncoder: ElementEncoder[A]): XmlEncoder[A] =
    fromElementEncoder(localName, None)

  def fromElementEncoderNs[A, NS](localName: String, namespaceInstance: NS)(implicit elementEncoder: ElementEncoder[A],
                                                                            namespace: Namespace[NS]): XmlEncoder[A] =
    fromElementEncoder(localName, namespace.getNamespace.some)

  def fromElementEncoderNs[A, NS](localName: String)(implicit elementEncoder: ElementEncoder[A],
                                                     namespace: Namespace[NS]): XmlEncoder[A] =
    fromElementEncoder(localName, namespace.getNamespace.some)

  final case class XmlEncoderConfig(
      encoding: String,
      version: String,
      writeProlog: Boolean
  ) {
    def withoutProlog: XmlEncoderConfig = copy(writeProlog = false)
  }

  val defaultConfig: XmlEncoderConfig =
    XmlEncoderConfig(
      encoding = "UTF-8",
      version = "1.0",
      writeProlog = true
    )
}

Source File: AllCodecTest.scala From aws-lambda-scala with MIT License

5 votes

package io.github.mkotsur.aws.codecs

import java.io.ByteArrayOutputStream

import com.amazonaws.services.lambda.runtime.Context
import io.circe.generic.auto._
import io.github.mkotsur.StringInputStream
import org.scalatest.EitherValues._
import org.scalatest.concurrent.Eventually
import org.mockito.MockitoSugar
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should
import org.scalatest.{FunSuite, Matchers}

class AllCodecTest extends AnyFunSuite with should.Matchers with MockitoSugar with Eventually {

  test("should decode null") {
    new AllCodec {
      val is = new StringInputStream("""null""")

      val value = canDecodeAll[None.type].readStream(is)
      value.right.value shouldBe Option.empty[None.type]
    }
  }

  test("should decode empty string") {
    new AllCodec {
      val is = new StringInputStream("")

      val value = canDecodeAll[None.type].readStream(is)
      value.right.value shouldBe Option.empty[None.type]
    }
  }

  test("should encode null") {
    new AllCodec {
      val os = new ByteArrayOutputStream()

      val context: Context = mock[Context]

      canEncodeAll[None.type].writeStream(os, Right(None), context)
      os.toString shouldBe "null"
    }
  }

}

Source File: FutureCodec.scala From aws-lambda-scala with MIT License

5 votes

package io.github.mkotsur.aws.codecs

import java.io.ByteArrayOutputStream
import java.nio.charset.Charset

import io.circe.Encoder
import io.github.mkotsur.aws.handler.CanEncode
import io.github.mkotsur.aws.proxy.ProxyResponse
import io.circe.generic.auto._
import io.circe.syntax._
import cats.syntax.either.catsSyntaxEither

import scala.concurrent.{Await, Future}
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.{Failure, Success, Try}

private[aws] trait FutureCodec {
  implicit def canEncodeFuture[I: Encoder](implicit canEncode: Encoder[I]) =
    CanEncode.instance[Future[I]]((os, responseEither, ctx) => {
      (for {
        response     <- responseEither.toTry
        futureResult <- Try(Await.result(response, ctx.getRemainingTimeInMillis millis))
        json         <- Try(canEncode(futureResult).noSpaces.getBytes)
        _            <- Try(os.write(json))
      } yield {
        ()
      }) match {
        case Success(v) => Right(v)
        case Failure(e) => Left(e)
      }
    })

  implicit def canEncodeProxyResponse[T](implicit canEncode: CanEncode[T]) = CanEncode.instance[ProxyResponse[T]](
    (output, proxyResponseEither, ctx) => {

      def writeBody(bodyOption: Option[T]): Either[Throwable, Option[String]] =
        bodyOption match {
          case None => Right(None)
          case Some(body) =>
            val os     = new ByteArrayOutputStream()
            val result = canEncode.writeStream(os, Right(body), ctx)
            os.close()
            result.map(_ => Some(os.toString()))
        }

      val proxyResposeOrError = for {
        proxyResponse <- proxyResponseEither
        bodyOption    <- writeBody(proxyResponse.body)
      } yield
        ProxyResponse[String](
          proxyResponse.statusCode,
          proxyResponse.headers,
          bodyOption
        )

      val response = proxyResposeOrError match {
        case Right(proxyRespose) =>
          proxyRespose
        case Left(e) =>
          ProxyResponse[String](
            500,
            Some(Map("Content-Type" -> s"text/plain; charset=${Charset.defaultCharset().name()}")),
            Some(e.getMessage)
          )
      }

      output.write(response.asJson.noSpaces.getBytes)

      Right(())
    }
  )
}

Source File: DataWeaveCLITest.scala From data-weave-native with Apache License 2.0

5 votes

package org.mule.weave.dwnative.cli

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.PrintStream

import org.scalatest.FreeSpec
import org.scalatest.Matchers

import scala.io.Source

class DataWeaveCLITest extends FreeSpec with Matchers {

  "should work with output application/json" in {
    val out = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("output application/json --- (1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      println("Finish OK 3")
    }
  }

  "should work with simple script and not output" in {
    val defaultOut = System.out
    try {
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      new DataWeaveCLIRunner().run(Array("(1 to 3)[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString
      result.trim shouldBe "1"
    } finally {
      System.setOut(defaultOut)
    }
  }

  "should work ok when sending payload from stdin" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[
          |  1,
          |  2,
          |  3
          |]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("payload[0]"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }

  "should work with light formats" in {
    val out = System.out
    val in = System.in
    try {
      val input =
        """[{
          |  "a" : 1,
          |  "b" : 2,
          |  "c" : 3
          |}]
        """.stripMargin.trim
      val stream = new ByteArrayOutputStream()
      System.setOut(new PrintStream(stream, true))
      System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8")))
      new DataWeaveCLIRunner().run(Array("input payload json output csv header=false ---payload"))
      val source = Source.fromBytes(stream.toByteArray, "UTF-8")
      val result = source.mkString.trim
      source.close()
      result.trim shouldBe "1,2,3"
    } finally {
      System.setOut(out)
      System.setIn(in)
      println("Finish OK 2")
    }
  }



}

Source File: ModelSerializationTestHelper.scala From aloha with MIT License

5 votes

package com.eharmony.aloha

import java.io.{ObjectInputStream, ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}


trait ModelSerializationTestHelper {
  def serializeDeserializeRoundTrip[A <: java.io.Serializable](a: A): A = {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(a)
    val bais = new ByteArrayInputStream(baos.toByteArray)
    val ois = new ObjectInputStream(bais)
    val out = ois.readObject()
    out.asInstanceOf[A]
  }
}

Source File: ConsoleModuleTest.scala From scala-server-toolkit with MIT License

5 votes

package com.avast.sst.jvm.system.console

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import cats.effect.SyncIO
import org.scalatest.funsuite.AnyFunSuite

import scala.{Console => SConsole}

class ConsoleModuleTest extends AnyFunSuite {

  test("Console input") {
    SConsole.withIn(new ByteArrayInputStream("test input\n".getBytes("UTF-8"))) {
      val test = for {
        line <- ConsoleModule.make[SyncIO].readLine
      } yield assert(line === "test input")

      test.unsafeRunSync()
    }
  }

  test("Console output") {
    val out = new ByteArrayOutputStream()
    SConsole.withOut(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLine("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

  test("Console error") {
    val out = new ByteArrayOutputStream()
    SConsole.withErr(out) {
      val test = for {
        _ <- ConsoleModule.make[SyncIO].printLineToError("test output")
      } yield ()

      test.unsafeRunSync()
    }

    assert(out.toString("UTF-8") === "test output\n")
  }

}

Source File: SparkTestsSuite.scala From amaterasu with Apache License 2.0

5 votes

package org.apache.amaterasu.spark

import java.io.{ByteArrayOutputStream, File}

import io.shinto.amaterasu.spark.PySparkRunnerTests
import org.apache.amaterasu.RunnersTests.RunnersLoadingTests
import org.apache.amaterasu.common.dataobjects.ExecData
import org.apache.amaterasu.common.execution.dependencies._
import org.apache.amaterasu.common.runtime.Environment
import org.apache.amaterasu.utilities.TestNotifier
import org.apache.amaterasu.executor.mesos.executors.ProvidersFactory
import org.apache.spark.repl.amaterasu.runners.spark.SparkScalaRunner
import org.apache.spark.sql.SparkSession
import org.scalatest._

import scala.collection.mutable.ListBuffer


class SparkTestsSuite extends Suites(
  new PySparkRunnerTests(),
  new RunnersLoadingTests()) with BeforeAndAfterAll {

  var env: Environment = _
  var factory: ProvidersFactory = _
  var spark: SparkSession = _

  override def beforeAll(): Unit = {

    env = Environment()
    env.workingDir = "file:///tmp/"
    env.master = "local[*]"

    // I can't apologise enough for this
    val resources = new File(getClass.getResource("/spark_intp.py").getPath).getParent

    val conf = Map[String, Any](
      "spark.cassandra.connection.host" -> "127.0.0.1",
      "sourceTable" -> "documents",
      "spark.local.ip" -> "127.0.0.1"
    )
    env.master = "local[1]"
    if (env.configuration != null) env.configuration ++ "pysparkPath" -> "/usr/bin/python" else env.configuration = Map(
      "pysparkPath" -> "/usr/bin/python",
      "cwd" -> resources
    )
    val excEnv = Map[String, Any](
      "PYTHONPATH" -> resources
    )
    env.configuration ++ "spark_exec_env" -> excEnv
    factory = ProvidersFactory(ExecData(env, Dependencies(ListBuffer.empty[Repo], List.empty[Artifact]), PythonDependencies(List.empty[PythonPackage]), Map("spark" -> Map.empty[String, Any],"spark_exec_env"->Map("PYTHONPATH"->resources))), "test", new ByteArrayOutputStream(), new TestNotifier(), "test")
    spark = factory.getRunner("spark", "scala").get.asInstanceOf[SparkScalaRunner].spark

    this.nestedSuites.filter(s => s.isInstanceOf[RunnersLoadingTests]).foreach(s => s.asInstanceOf[RunnersLoadingTests].factory = factory)
    this.nestedSuites.filter(s => s.isInstanceOf[PySparkRunnerTests]).foreach(s => s.asInstanceOf[PySparkRunnerTests].factory = factory)


    super.beforeAll()
  }

  override def afterAll(): Unit = {
    spark.stop()

    super.afterAll()
  }

}

Source File: SparkRRunner.scala From amaterasu with Apache License 2.0

5 votes

package org.apache.amaterasu.executor.execution.actions.runners.spark

import java.io.ByteArrayOutputStream
import java.util

import org.apache.amaterasu.common.execution.actions.Notifier
import org.apache.amaterasu.common.logging.Logging
import org.apache.amaterasu.common.runtime.Environment
import org.apache.amaterasu.sdk.AmaterasuRunner
import org.apache.spark.SparkContext


class SparkRRunner extends Logging with AmaterasuRunner {

  override def getIdentifier = "spark-r"

  override def executeSource(actionSource: String, actionName: String, exports: util.Map[String, String]): Unit = {
  }
}

object SparkRRunner {
  def apply(
    env: Environment,
    jobId: String,
    sparkContext: SparkContext,
    outStream: ByteArrayOutputStream,
    notifier: Notifier,
    jars: Seq[String]
  ): SparkRRunner = {
    new SparkRRunner()
  }
}

Source File: ProvidersFactory.scala From amaterasu with Apache License 2.0

5 votes

package org.apache.amaterasu.executor.mesos.executors

import java.io.ByteArrayOutputStream

import org.apache.amaterasu.common.dataobjects.ExecData
import org.apache.amaterasu.common.execution.actions.Notifier
import org.apache.amaterasu.sdk.{AmaterasuRunner, RunnersProvider}
import org.reflections.Reflections

import scala.collection.JavaConversions._

//TODO: Check if we can use this in the YARN impl
class ProvidersFactory {

  var providers: Map[String, RunnersProvider] = _

  def getRunner(groupId: String, id: String): Option[AmaterasuRunner] = {
    val provider = providers.get(groupId)
    provider match {
      case Some(provider) => Some(provider.getRunner(id))
      case None => None
    }
  }
}

object ProvidersFactory {

  def apply(data: ExecData,
            jobId: String,
            outStream: ByteArrayOutputStream,
            notifier: Notifier,
            executorId: String): ProvidersFactory = {

    val result = new ProvidersFactory()
    val reflections = new Reflections(getClass.getClassLoader)
    val runnerTypes = reflections.getSubTypesOf(classOf[RunnersProvider]).toSet

    result.providers = runnerTypes.map(r => {

      val provider = Manifest.classType(r).runtimeClass.newInstance.asInstanceOf[RunnersProvider]

      notifier.info(s"a provider for group ${provider.getGroupIdentifier} was created")
      provider.init(data, jobId, outStream, notifier, executorId)
      (provider.getGroupIdentifier, provider)
    }).toMap

    result
  }

}

Source File: AvroSerde.scala From event-sourcing-kafka-streams with MIT License

5 votes

package org.amitayh.invoices.common.serde

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer
import java.time.Instant
import java.util
import java.util.UUID

import com.sksamuel.avro4s._
import org.amitayh.invoices.common.domain._
import org.amitayh.invoices.common.serde.UuidConverters.{fromByteBuffer, toByteBuffer}
import org.apache.avro.Schema
import org.apache.avro.Schema.Field
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}

object AvroSerde {
  implicit val instantToSchema: ToSchema[Instant] = new ToSchema[Instant] {
    override val schema: Schema = Schema.create(Schema.Type.STRING)
  }

  implicit val instantToValue: ToValue[Instant] = new ToValue[Instant] {
    override def apply(value: Instant): String = value.toString
  }

  implicit val instantFromValue: FromValue[Instant] = new FromValue[Instant] {
    override def apply(value: Any, field: Field): Instant =
      Instant.parse(value.toString)
  }

  implicit val uuidToSchema: ToSchema[UUID] = new ToSchema[UUID] {
    override val schema: Schema = Schema.create(Schema.Type.BYTES)
  }

  implicit val uuidToValue: ToValue[UUID] = new ToValue[UUID] {
    override def apply(value: UUID): ByteBuffer = toByteBuffer(value)
  }

  implicit val uuidFromValue: FromValue[UUID] = new FromValue[UUID] {
    override def apply(value: Any, field: Field): UUID =
      fromByteBuffer(value.asInstanceOf[ByteBuffer])
  }

  val CommandSerde: Serde[Command] = serdeFor[Command]

  val CommandResultSerde: Serde[CommandResult] = serdeFor[CommandResult]

  val SnapshotSerde: Serde[InvoiceSnapshot] = serdeFor[InvoiceSnapshot]

  val EventSerde: Serde[Event] = serdeFor[Event]

  def toBytes[T: SchemaFor: ToRecord](data: T): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    val output = AvroOutputStream.binary[T](baos)
    output.write(data)
    output.close()
    baos.toByteArray
  }

  def fromBytes[T: SchemaFor: FromRecord](data: Array[Byte]): T = {
    val input = AvroInputStream.binary[T](data)
    input.iterator.next()
  }

  private def serdeFor[T: SchemaFor: ToRecord: FromRecord]: Serde[T] = new Serde[T] {
    override val serializer: Serializer[T] = new Serializer[T] {
      override def serialize(topic: String, data: T): Array[Byte] = toBytes(data)
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
      override def close(): Unit = ()
    }
    override val deserializer: Deserializer[T] = new Deserializer[T] {
      override def deserialize(topic: String, data: Array[Byte]): T = fromBytes(data)
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
      override def close(): Unit = ()
    }
    override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
    override def close(): Unit = ()
  }
}

Source File: PackageSpec.scala From sparkpipe-core with Apache License 2.0

5 votes

package software.uncharted.sparkpipe.ops.core.rdd.debug

import java.io.ByteArrayOutputStream

import org.scalatest._
import software.uncharted.sparkpipe.Spark

class PackageSpec extends FunSpec {
  describe("ops.core.rdd.debug") {
    val rdd = Spark.sc.parallelize(Seq((1, "alpha"), (2, "bravo"), (3, "charlie")))

    describe("#countRDDRows()") {
      it("should output a formatted count message using the supplied output function") {
        var output = ""
        countRDDRows("test", (s: String) => output += s)(rdd)
        assertResult("[test] Number of rows: 3")(output)
      }

      it("should output a formatted count message to std out when no output function is supplied") {
        val bos = new ByteArrayOutputStream()
        Console.withOut(bos) {
          countRDDRows("test")(rdd)
        }
        assertResult("[test] Number of rows: 3\n")(bos.toString)
      }
    }

    describe("#takeRDDRows()") {
      it("should output a list of the first N rows of the rdd") {
        var output = ""
        takeRDDRows(2, "test", (s: String) => output += s)(rdd)
        assertResult("[test] First 2 rows0: (1,alpha)1: (2,bravo)")(output)
      }
    }

    describe("#debugRDDRows()") {
      it("should apply a function to the first N rows of the rdd") {
        var output = Seq[(Int, String)]()
        debugRDDRows(2, (s: Seq[(Int, String)]) => output = s)(rdd)
        assertResult(2)(output.length)
        assertResult(output)(rdd.collect().slice(0, 2).toSeq)
      }
    }
  }
}

Source File: PackageSpec.scala From sparkpipe-core with Apache License 2.0

5 votes

package software.uncharted.sparkpipe.ops.core.dataframe.debug

import java.io.ByteArrayOutputStream

import org.apache.spark.sql.Row
import org.scalatest._
import software.uncharted.sparkpipe.Spark
import software.uncharted.sparkpipe.ops.core.rdd.toDF

class PackageSpec extends FunSpec {
  describe("ops.core.dataframe.debug") {
    val rdd = Spark.sc.parallelize(Seq((1, "alpha"), (2, "bravo"), (3, "charlie")))
    val df = toDF(Spark.sparkSession)(rdd)

   describe("#countDFRows()") {
      it("should output a formatted count message using the supplied output function") {
        var output = ""
        countDFRows("test", (s: String) => output += s)(df)
        assertResult("[test] Number of rows: 3")(output)
      }

      it("should output a formatted count message to std out when no output function is supplied") {
        val bos = new ByteArrayOutputStream()
        Console.withOut(bos) {
          countDFRows("test")(df)
        }
        assertResult("[test] Number of rows: 3\n")(bos.toString)
      }
    }

    describe("#takeDFRows()") {
      it("should output a list of the first N rows of the dataframe") {
        var output = ""
        takeDFRows(2, "test", (s: String) => output += s)(df)
        assertResult("[test] First 2 rows0: [1,alpha]1: [2,bravo]")(output)
      }
    }

    describe("#debugDFRows()") {
      it("should apply a function to the first N rows of the dataframe") {
        var output = Seq[Row]()
        debugDFRows(2, (s: Seq[Row]) => output = s)(df)
        assertResult(2)(output.length)
        assertResult(output)(df.collect().slice(0, 2).toSeq)
      }
    }
  }
}

Source File: Zip.scala From scala-clippy with Apache License 2.0

5 votes

package util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.{GZIPInputStream, GZIPOutputStream}

object Zip {
  private val BufferSize = 512

  def compress(string: String): Array[Byte] = {
    val os  = new ByteArrayOutputStream(string.length() / 5)
    val gos = new GZIPOutputStream(os)
    gos.write(string.getBytes("UTF-8"))
    gos.close()
    os.close()
    os.toByteArray
  }

  def decompress(compressed: Array[Byte]): String = {
    val is        = new ByteArrayInputStream(compressed)
    val gis       = new GZIPInputStream(is, BufferSize)
    val string    = new StringBuilder()
    val data      = new Array[Byte](BufferSize)
    var bytesRead = gis.read(data)
    while (bytesRead != -1) {
      string.append(new String(data, 0, bytesRead, "UTF-8"))
      bytesRead = gis.read(data)
    }
    gis.close()
    is.close()
    string.toString()
  }
}

Source File: Utils.scala From scala-clippy with Apache License 2.0

5 votes

package com.softwaremill.clippy

import java.io.{ByteArrayOutputStream, InputStream}
import java.io.Closeable
import scala.util.control.NonFatal
import scala.util.{Failure, Try}

object Utils {

  
  def runNonDaemon(t: => Unit) = {
    val shutdownHook = new Thread() {
      private val lock             = new Object
      @volatile private var didRun = false

      override def run() =
        lock.synchronized {
          if (!didRun) {
            t
            didRun = true
          }
        }
    }

    Runtime.getRuntime.addShutdownHook(shutdownHook)
    try shutdownHook.run()
    finally Runtime.getRuntime.removeShutdownHook(shutdownHook)
  }

  def inputStreamToBytes(is: InputStream): Array[Byte] =
    try {
      val baos = new ByteArrayOutputStream()
      val buf  = new Array[Byte](512)
      var read = 0
      while ({ read = is.read(buf, 0, buf.length); read } != -1) {
        baos.write(buf, 0, read)
      }
      baos.toByteArray
    } finally is.close()

  object TryWith {
    def apply[C <: Closeable, R](resource: => C)(f: C => R): Try[R] =
      Try(resource).flatMap(resourceInstance => {
        try {
          val returnValue = f(resourceInstance)
          Try(resourceInstance.close()).map(_ => returnValue)
        } catch {
          case NonFatal(exceptionInFunction) =>
            try {
              resourceInstance.close()
              Failure(exceptionInFunction)
            } catch {
              case NonFatal(exceptionInClose) =>
                exceptionInFunction.addSuppressed(exceptionInClose)
                Failure(exceptionInFunction)
            }
        }
      })
  }
}

Source File: FileUtil.scala From coursier with Apache License 2.0

5 votes

package coursier.cache.internal

import java.io.{ByteArrayOutputStream, InputStream}

object FileUtil {

  // Won't be necessary anymore with Java 9
  // (https://docs.oracle.com/javase/9/docs/api/java/io/InputStream.html#readAllBytes--,
  // via https://stackoverflow.com/questions/1264709/convert-inputstream-to-byte-array-in-java/37681322#37681322)
  def readFullyUnsafe(is: InputStream): Array[Byte] = {
    val buffer = new ByteArrayOutputStream
    val data = Array.ofDim[Byte](16384)

    var nRead = 0
    while ({
      nRead = is.read(data, 0, data.length)
      nRead != -1
    })
      buffer.write(data, 0, nRead)

    buffer.flush()
    buffer.toByteArray
  }

  def readFully(is: => InputStream): Array[Byte] = {
    var is0: InputStream = null
    try {
      is0 = is
      readFullyUnsafe(is0)
    } finally {
      if (is0 != null)
        is0.close()
    }
  }

  def withContent(is: InputStream, f: WithContent, bufferSize: Int = 16384): Unit = {
    val data = Array.ofDim[Byte](bufferSize)

    var nRead = is.read(data, 0, data.length)
    while (nRead != -1) {
      f(data, nRead)
      nRead = is.read(data, 0, data.length)
    }
  }

  trait WithContent {
    def apply(arr: Array[Byte], z: Int): Unit
  }

  class UpdateDigest(md: java.security.MessageDigest) extends FileUtil.WithContent {
    def apply(arr: Array[Byte], z: Int): Unit = md.update(arr, 0, z)
  }

}

Source File: ZipTests.scala From coursier with Apache License 2.0

5 votes

package coursier.cli.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.Random
import java.util.zip.{Deflater, ZipEntry, ZipInputStream, ZipOutputStream}

import coursier.launcher.internal.Zip
import org.junit.runner.RunWith
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatestplus.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class ZipTests extends AnyFlatSpec {

  "zipEntries" should "be fine with custom deflaters" in {

    // Inspired by https://github.com/spring-projects/spring-boot/commit/a50646b7cc3ad941e748dfb450077e3a73706205#diff-2297c301250b25e3b80301c58daf3ea0R621

    val baos = new ByteArrayOutputStream
    val output = new ZipOutputStream(baos) {
      `def` = new Deflater(Deflater.NO_COMPRESSION, true)
    }
    val data = Array.ofDim[Byte](1024 * 1024)
    new Random().nextBytes(data)
    val entry = new ZipEntry("entry.dat")
    output.putNextEntry(entry)
    output.write(data)
    output.closeEntry()
    output.close()

    val result = baos.toByteArray

    val zos = new ZipOutputStream(new ByteArrayOutputStream)
    val entryNames = Zip.zipEntries(new ZipInputStream(new ByteArrayInputStream(result)))
      .map {
        case (ent, content) =>
          println(ent.getCompressedSize)
          val name = ent.getName
          zos.putNextEntry(ent)
          zos.write(content)
          zos.closeEntry()
          name
      }
      .toVector
    zos.close()
    assert(entryNames == Vector("entry.dat"))
  }

}

Source File: WordEmbeddingsLoader.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.embeddings

import java.io.{BufferedInputStream, ByteArrayOutputStream, DataInputStream, FileInputStream}

import com.johnsnowlabs.storage.RocksDBConnection
import org.slf4j.LoggerFactory

import scala.io.Source

object WordEmbeddingsTextIndexer {

  def index(
             source: Iterator[String],
             writer: WordEmbeddingsWriter
           ): Unit = {
    try {
      for (line <- source) {
        val items = line.split(" ")
        val word = items(0)
        val embeddings = items.drop(1).map(i => i.toFloat)
        writer.add(word, embeddings)
      }
    } finally {
      writer.close()
    }
  }

  def index(
             source: String,
             writer: WordEmbeddingsWriter
           ): Unit = {
    val sourceFile = Source.fromFile(source)("UTF-8")
    val lines = sourceFile.getLines()
    index(lines, writer)
    sourceFile.close()
  }
}


object WordEmbeddingsBinaryIndexer {

  private val logger = LoggerFactory.getLogger("WordEmbeddings")

  def index(
             source: DataInputStream,
             writer: WordEmbeddingsWriter): Unit = {

    try {
      // File Header
      val numWords = Integer.parseInt(readString(source))
      val vecSize = Integer.parseInt(readString(source))

      // File Body
      for (i <- 0 until numWords) {
        val word = readString(source)

        // Unit Vector
        val vector = readFloatVector(source, vecSize, writer)
        writer.add(word, vector)
      }

      logger.info(s"Loaded $numWords words, vector size $vecSize")
    } finally {
      writer.close()
    }
  }

  def index(
             source: String,
             writer: WordEmbeddingsWriter): Unit = {

    val ds = new DataInputStream(new BufferedInputStream(new FileInputStream(source), 1 << 15))

    try {
      index(ds, writer)
    } finally {
      ds.close()
    }
  }

  
  private def readFloatVector(ds: DataInputStream, vectorSize: Int, indexer: WordEmbeddingsWriter): Array[Float] = {
    // Read Bytes
    val vectorBuffer = Array.fill[Byte](4 * vectorSize)(0)
    ds.read(vectorBuffer)

    // Convert Bytes to Floats
    indexer.fromBytes(vectorBuffer)
  }
}

Source File: TMNodesWriter.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.annotators.btm

import java.io.{ByteArrayOutputStream, ObjectOutputStream}

import com.johnsnowlabs.storage.{RocksDBConnection, StorageBatchWriter}

class TMNodesWriter(
                    override protected val connection: RocksDBConnection
                  ) extends StorageBatchWriter[TrieNode] {

  def toBytes(content: TrieNode): Array[Byte] = {
    val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(stream)
    oos.writeObject(content)
    oos.close()
    stream.toByteArray
  }

  def add(word: Int, value: TrieNode): Unit = {
    super.add(word.toString, value)
  }

  override protected def writeBufferSize: Int = 10000
}

Source File: MemoryAppender.scala From ncdbg with BSD 3-Clause "New" or "Revised" License

5 votes

package com.programmaticallyspeaking.ncd.testing

import ch.qos.logback.classic.spi.ILoggingEvent
import ch.qos.logback.core.UnsynchronizedAppenderBase
import ch.qos.logback.core.encoder.Encoder
import ch.qos.logback.core.status.ErrorStatus
import java.io.{ByteArrayOutputStream, IOException, OutputStream}
import java.nio.charset.StandardCharsets

import com.programmaticallyspeaking.ncd.messaging.{Observable, SerializedSubject}

object MemoryAppender {
  private[MemoryAppender] val logEventSubject = new SerializedSubject[String]

  def logEvents: Observable[String] = logEventSubject
}

class MemoryAppender extends UnsynchronizedAppenderBase[ILoggingEvent] {
  import MemoryAppender._
  private var encoder: Encoder[ILoggingEvent] = _
  private var outputStream = new OutputStream {
    override def write(b: Int): Unit = ???

    override def write(b: Array[Byte]): Unit = {
      val str = new String(b, StandardCharsets.UTF_8)
      logEventSubject.onNext(str)
    }
  }

  override def start(): Unit = {
    try {
      Option(encoder).foreach(_.init(outputStream))
      super.start()
    } catch {
      case e: IOException =>
        started = false
        addStatus(new ErrorStatus("Failed to initialize encoder for appender named [" + name + "].", this, e))
    }
  }

  override protected def append(event: ILoggingEvent): Unit = {
    if (!isStarted) return
    try {
      event.prepareForDeferredProcessing()
      Option(encoder).foreach(_.doEncode(event))
    } catch {
      case ioe: IOException =>
        started = false
        addStatus(new ErrorStatus("IO failure in appender", this, ioe))
    }
  }

  def setEncoder(e: Encoder[ILoggingEvent]): Unit = {
    encoder = e
  }
}

Source File: StreamUtilities.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.env

import java.io.{ByteArrayOutputStream, InputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils

import scala.io.Source
import scala.util.Random

object StreamUtilities {

  import scala.util.{Failure, Success, Try}
  def usingMany[T <: AutoCloseable, U](disposable: Seq[T])(task: Seq[T] => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.foreach(d => d.close())
    }
  }

  def using[T <: AutoCloseable, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  def usingSource[T <: Source, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  
  class ZipIterator(stream: InputStream, zipfile: String, random: Random, sampleRatio: Double = 1)
    extends Iterator[(String, Array[Byte])] {

    private val zipStream = new ZipInputStream(stream)

    private def getNext: Option[(String, Array[Byte])] = {
      var entry = zipStream.getNextEntry
      while (entry != null) {
        if (!entry.isDirectory && random.nextDouble < sampleRatio) {

          val filename = zipfile + java.io.File.separator + entry.getName

          //extracting all bytes of a given entry
          val byteStream = new ByteArrayOutputStream
          IOUtils.copy(zipStream, byteStream)
          val bytes = byteStream.toByteArray

          assert(bytes.length == entry.getSize,
            "incorrect number of bytes is read from zipstream: " + bytes.length + " instead of " + entry.getSize)

          return Some((filename, bytes))
        }
        entry = zipStream.getNextEntry
      }

      stream.close()
      None
    }

    private var nextValue = getNext

    def hasNext: Boolean = nextValue.isDefined

    def next: (String, Array[Byte]) = {
      val result = nextValue.get
      nextValue = getNext
      result
    }
  }

}

Source File: ConsoleProgressBarTest.scala From Argus-SAF with Apache License 2.0

5 votes

package hu.ssh.progressbar.console

import java.io.{ByteArrayOutputStream, PrintStream}

import com.google.common.base.{Splitter, Strings}
import com.google.common.collect.Iterables
import hu.ssh.progressbar.ConsoleProgressBar
import org.scalatest.{FlatSpec, Matchers}


class ConsoleProgressBarTest extends FlatSpec with Matchers {
  "ProgressBar" should "output as expected" in {
    val outputstream = new ByteArrayOutputStream
    try {
      val progressBar = ConsoleProgressBar.on(new PrintStream(outputstream)).withFormat(":percent")
      progressBar.tick(0)
      assert(getLastOutput(outputstream.toString) == "  0.00")
      progressBar.tick(25)
      assert(getLastOutput(outputstream.toString) == " 25.00")
      progressBar.tick(30)
      assert(getLastOutput(outputstream.toString) == " 55.00")
      progressBar.tick(44)
      assert(getLastOutput(outputstream.toString) == " 99.00")
      progressBar.tickOne()
      assert(getLastOutput(outputstream.toString) == "100.00")
    } finally outputstream.close()
  }

  private def getLastOutput(string: String): String = {
    if (Strings.isNullOrEmpty(string)) return string
    val outputs = Splitter.on(ConsoleProgressBar.CARRIAGE_RETURN).omitEmptyStrings.split(string)
    Iterables.getLast(outputs)
  }
}

Source File: ConsoleLoggerSpec.scala From odin with Apache License 2.0

5 votes

package io.odin.loggers

import java.io.{ByteArrayOutputStream, PrintStream}

import cats.effect.{IO, Timer}
import cats.syntax.all._
import io.odin.Level._
import io.odin.formatter.Formatter
import io.odin.{Level, LoggerMessage, OdinSpec}

class ConsoleLoggerSpec extends OdinSpec {
  implicit val timer: Timer[IO] = IO.timer(scala.concurrent.ExecutionContext.global)

  it should "route all messages with level <= INFO to stdout" in {
    forAll { (loggerMessage: LoggerMessage, formatter: Formatter) =>
      whenever(loggerMessage.level <= Info) {
        val outBaos = new ByteArrayOutputStream()
        val stdOut = new PrintStream(outBaos)
        val errBaos = new ByteArrayOutputStream()
        val stdErr = new PrintStream(errBaos)

        val consoleLogger = ConsoleLogger[IO](formatter, stdOut, stdErr, Level.Trace)
        consoleLogger.log(loggerMessage).unsafeRunSync()
        outBaos.toString() shouldBe (formatter.format(loggerMessage) + System.lineSeparator())
      }
    }
  }

  it should "route all messages with level >= WARN to stderr" in {
    forAll { (loggerMessage: LoggerMessage, formatter: Formatter) =>
      whenever(loggerMessage.level > Info) {
        val outBaos = new ByteArrayOutputStream()
        val stdOut = new PrintStream(outBaos)
        val errBaos = new ByteArrayOutputStream()
        val stdErr = new PrintStream(errBaos)

        val consoleLogger = ConsoleLogger[IO](formatter, stdOut, stdErr, Level.Trace)
        consoleLogger.log(loggerMessage).unsafeRunSync()
        errBaos.toString() shouldBe (formatter.format(loggerMessage) + System.lineSeparator())
      }
    }
  }
}

Source File: LoggerOutputStream.scala From hail with MIT License

5 votes

package is.hail.utils

import java.io.{ByteArrayOutputStream, OutputStream}
import java.nio.charset.StandardCharsets

import org.apache.log4j.{Level, Logger}

class LoggerOutputStream(logger: Logger, level: Level) extends OutputStream {
  private val buffer = new ByteArrayOutputStream()

  override def write(b: Int) {
    buffer.write(b)
    if (b == '\n') {
      val line = buffer.toString(StandardCharsets.UTF_8.name())
      level match {
        case Level.TRACE => logger.trace(line)
        case Level.DEBUG => logger.debug(line)
        case Level.INFO  => logger.info(line)
        case Level.WARN  => logger.warn(line)
        case Level.ERROR => logger.error(line)
      }
      buffer.reset()
    }
  }
}

Source File: CodecSpec.scala From hail with MIT License

5 votes

package is.hail.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream}

import is.hail.annotations.{Region, RegionValue}
import is.hail.asm4s.{Code, TypeInfo, Value}
import is.hail.expr.ir.{EmitClassBuilder, EmitFunctionBuilder, ExecuteContext, typeToTypeInfo}
import is.hail.types.encoded.EType
import is.hail.types.physical.PType
import is.hail.types.virtual.Type
import is.hail.rvd.RVDContext
import is.hail.sparkextras.ContextRDD
import is.hail.utils.using
import org.apache.spark.rdd.RDD

trait AbstractTypedCodecSpec extends Spec {
  def encodedType: EType
  def encodedVirtualType: Type

  type StagedEncoderF[T] = (Value[Region], Value[T], Value[OutputBuffer]) => Code[Unit]
  type StagedDecoderF[T] = (Value[Region], Value[InputBuffer]) => Code[T]

  def buildEncoder(ctx: ExecuteContext, t: PType): (OutputStream) => Encoder

  def decodedPType(requestedType: Type): PType

  def buildDecoder(ctx: ExecuteContext, requestedType: Type): (PType, (InputStream) => Decoder)

  def encode(ctx: ExecuteContext, t: PType, offset: Long): Array[Byte] = {
    val baos = new ByteArrayOutputStream()
    using(buildEncoder(ctx, t)(baos))(_.writeRegionValue(offset))
    baos.toByteArray
  }

  def decode(ctx: ExecuteContext, requestedType: Type, bytes: Array[Byte], region: Region): (PType, Long) = {
    val bais = new ByteArrayInputStream(bytes)
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, dec(bais).readRegionValue(region))
  }

  def buildCodeInputBuffer(is: Code[InputStream]): Code[InputBuffer]

  def buildCodeOutputBuffer(os: Code[OutputStream]): Code[OutputBuffer]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_]): (PType, StagedDecoderF[T])

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_]): StagedEncoderF[T]

  def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_], ti: TypeInfo[T]): (PType, StagedDecoderF[T]) = {
    val (ptype, dec) = buildEmitDecoderF[T](requestedType, cb)
    assert(ti == typeToTypeInfo(requestedType))
    ptype -> dec
  }

  def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_], ti: TypeInfo[T]): StagedEncoderF[T] = {
    assert(ti == typeToTypeInfo(t))
    buildEmitEncoderF[T](t, cb)
  }

  // FIXME: is there a better place for this to live?
  def decodeRDD(ctx: ExecuteContext, requestedType: Type, bytes: RDD[Array[Byte]]): (PType, ContextRDD[Long]) = {
    val (pt, dec) = buildDecoder(ctx, requestedType)
    (pt, ContextRDD.weaken(bytes).cmapPartitions { (ctx, it) =>
      RegionValue.fromBytes(dec, ctx.region, it)
    })
  }

  override def toString: String = super[Spec].toString
}

Source File: SparqlUtil.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.tools.neptune.export

import java.io.ByteArrayOutputStream
import java.net.URLEncoder

import org.apache.jena.graph.Graph
import org.apache.jena.riot.{Lang, RDFDataMgr}

object SparqlUtil {



   def extractSubjectFromTriple(triple: String):String = {
    triple.split(" ")(0)
  }

   def getTriplesOfSubGraph(subGraph:Graph):String  = {
    val tempOs = new ByteArrayOutputStream
    RDFDataMgr.write(tempOs, subGraph, Lang.NTRIPLES)
    new String(tempOs.toByteArray, "UTF-8")
  }

  def generateSparqlCmdForDefaultGraph(triplesPerGraph: Iterable[SubjectGraphTriple] ):String = {
    triplesPerGraph.map(subGraphTriple => encode(subGraphTriple.triple)).mkString
  }

  def generateSparqlCmdForNamedGraph(graph:String, triplesPerGraph: Iterable[SubjectGraphTriple] ):String = {

    " GRAPH <" + encode(graph) + "> { " + triplesPerGraph.map(trio => encode(trio.triple)).mkString + "}"
  }

  def buildGroupedSparqlCmd(subjects: Iterable[String], allSubjGraphTriples: Iterable[List[SubjectGraphTriple]], updateMode: Boolean): String = {
    var sparqlCmd = "update="
    val deleteSubj = if (updateMode) Some("DELETE { ?s ?p ?o . } WHERE { VALUES ?s { " + subjects.map(subject => encode(subject) + " ").mkString + "} ?s ?p ?o };") else None
    val insertDefaultGraphSparqlCmd = "INSERT DATA {" + allSubjGraphTriples.flatten.filterNot(trio => predicateContainsMeta(trio)).groupBy(trio => trio.graph).map(graphWithTriples => graphWithTriples._1.fold(generateSparqlCmdForDefaultGraph(graphWithTriples._2))(graph => "")).mkString + "}"
    val insertNamedGraphSparqlCmd = "INSERT DATA {" + allSubjGraphTriples.flatten.filterNot(trio => predicateContainsMeta(trio)).groupBy(trio => trio.graph).map(graphWithTriples => graphWithTriples._1.fold("")(graphName => generateSparqlCmdForNamedGraph(graphName, graphWithTriples._2))).mkString + "}"
    sparqlCmd + deleteSubj.getOrElse("") + insertDefaultGraphSparqlCmd + ";" + insertNamedGraphSparqlCmd
  }

   def encode(str: String):String = {
    URLEncoder.encode(str, "UTF-8")
  }

   def predicateContainsMeta(trio: SubjectGraphTriple): Boolean = {
    trio.triple.contains("meta/sys")
  }

}

Source File: SubEntryTest.scala From lila-openingexplorer with GNU Affero General Public License v3.0

5 votes

package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import org.specs2.mutable._

import chess.{ Color, Pos }
import chess.format.Uci

class SubEntryTest extends Specification {

  private def pipe(entry: SubEntry): SubEntry = {
    val out = new ByteArrayOutputStream()
    entry.write(out)

    val in = new ByteArrayInputStream(out.toByteArray)
    SubEntry.read(in)
  }

  "master database packer" should {

    "pack a single game" in {
      val ref   = GameRef("ref00000", Some(Color.White), SpeedGroup.Blitz, 1230)
      val entry = SubEntry.fromGameRef(ref, Left(Uci.Move(Pos.E2, Pos.E4)))

      pipe(entry).gameRefs mustEqual List(ref)
    }

    "pack two games" in {
      val move  = Left(Uci.Move(Pos.D2, Pos.D4))
      val g1    = GameRef("g0000001", Some(Color.Black), SpeedGroup.Classical, 2300)
      val g2    = GameRef("g0000002", None, SpeedGroup.Classical, 2455)
      val entry = SubEntry.fromGameRef(g1, move).withGameRef(g2, move)

      pipe(entry).gameRefs mustEqual List(g2, g1)
    }
  }
}

Source File: PackHelperTest.scala From lila-openingexplorer with GNU Affero General Public License v3.0

5 votes

package lila.openingexplorer

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import org.specs2.mutable._
import chess.format.Uci
import chess.Pos
import chess.{ King, Rook }

class PackHelperTest extends Specification with PackHelper {

  def pipeMove(move: Either[Uci.Move, Uci.Drop]): Either[Uci.Move, Uci.Drop] = {
    val out = new ByteArrayOutputStream()
    writeUci(out, move)

    val in = new ByteArrayInputStream(out.toByteArray)
    readUci(in)
  }

  "the pack helper" should {
    "correctly pack moves" in {
      val move = Uci.Move(Pos.E2, Pos.E3)
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack promotions" in {
      val move = Uci.Move(Pos.A7, Pos.A8, Some(Rook))
      pipeMove(Left(move)) mustEqual Left(move)
    }

    "correctly pack drops" in {
      val drop = Uci.Drop(King, Pos.H3)
      pipeMove(Right(drop)) mustEqual Right(drop)
    }
  }

  List(7, 127, 128, 129, 254, 255, 256, 257, 1234, 864197252500L).foreach { x =>
    "correctly pack uint: " + x in {
      val out = new ByteArrayOutputStream()
      writeUint(out, x)

      val in = new ByteArrayInputStream(out.toByteArray)
      readUint(in) mustEqual x
    }
  }
}

Source File: Json4sSerialization.scala From kafka-serialization with Apache License 2.0

5 votes

package com.ovoenergy.kafka.serialization.json4s

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.ovoenergy.kafka.serialization.core._
import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import org.json4s.Formats
import org.json4s.native.Serialization.{read, write}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._

trait Json4sSerialization {

  def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO Use scala-arm
    try {
      write(data, writer)
      writer.flush()
    } finally {
      writer.close()
    }
    bout.toByteArray
  }

  def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) =>
    val tt = implicitly[TypeTag[T]]
    implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe))
    read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8))
  }

}

Source File: SpraySerialization.scala From kafka-serialization with Apache License 2.0

5 votes

package com.ovoenergy.kafka.serialization.spray

import java.io.{ByteArrayOutputStream, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import spray.json._
import com.ovoenergy.kafka.serialization.core._

trait SpraySerialization {

  def spraySerializer[T](implicit format: JsonWriter[T]): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val osw = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO use scala-arm
    try {
      osw.write(data.toJson.compactPrint)
      osw.flush()
    } finally {
      osw.close()
    }
    bout.toByteArray
  }

  def sprayDeserializer[T](implicit format: JsonReader[T]): KafkaDeserializer[T] = deserializer { (_, data) =>
    JsonParser(ParserInput(data)).convertTo[T]
  }

}

Source File: Serialize.scala From morpheus with Apache License 2.0

5 votes

package org.opencypher.morpheus.impl.expressions

import java.io.ByteArrayOutputStream

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, _}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.opencypher.morpheus.impl.expressions.EncodeLong.encodeLong
import org.opencypher.morpheus.impl.expressions.Serialize._
import org.opencypher.okapi.impl.exception


case class Serialize(children: Seq[Expression]) extends Expression {

  override def dataType: DataType = BinaryType

  override def nullable: Boolean = false

  // TODO: Only write length if more than one column is serialized
  override def eval(input: InternalRow): Any = {
    // TODO: Reuse from a pool instead of allocating a new one for each serialization
    val out = new ByteArrayOutputStream()
    children.foreach { child =>
      child.dataType match {
        case BinaryType => write(child.eval(input).asInstanceOf[Array[Byte]], out)
        case StringType => write(child.eval(input).asInstanceOf[UTF8String], out)
        case IntegerType => write(child.eval(input).asInstanceOf[Int], out)
        case LongType => write(child.eval(input).asInstanceOf[Long], out)
        case other => throw exception.UnsupportedOperationException(s"Cannot serialize Spark data type $other.")
      }
    }
    out.toByteArray
  }

  override protected def doGenCode(
    ctx: CodegenContext,
    ev: ExprCode
  ): ExprCode = {
    ev.isNull = FalseLiteral
    val out = ctx.freshName("out")
    val serializeChildren = children.map { child =>
      val childEval = child.genCode(ctx)
      s"""|${childEval.code}
          |if (!${childEval.isNull}) {
          |  ${Serialize.getClass.getName.dropRight(1)}.write(${childEval.value}, $out);
          |}""".stripMargin
    }.mkString("\n")
    val baos = classOf[ByteArrayOutputStream].getName
    ev.copy(
      code = code"""|$baos $out = new $baos();
          |$serializeChildren
          |byte[] ${ev.value} = $out.toByteArray();""".stripMargin)
  }

}

object Serialize {

  val supportedTypes: Set[DataType] = Set(BinaryType, StringType, IntegerType, LongType)

  @inline final def write(value: Array[Byte], out: ByteArrayOutputStream): Unit = {
    out.write(encodeLong(value.length))
    out.write(value)
  }

  @inline final def write(
    value: Boolean,
    out: ByteArrayOutputStream
  ): Unit = write(if (value) 1.toLong else 0.toLong, out)

  @inline final def write(value: Byte, out: ByteArrayOutputStream): Unit = write(value.toLong, out)

  @inline final def write(value: Int, out: ByteArrayOutputStream): Unit = write(value.toLong, out)

  @inline final def write(value: Long, out: ByteArrayOutputStream): Unit = write(encodeLong(value), out)

  @inline final def write(value: UTF8String, out: ByteArrayOutputStream): Unit = write(value.getBytes, out)

  @inline final def write(value: String, out: ByteArrayOutputStream): Unit = write(value.getBytes, out)

}

Source File: ExampleTest.scala From morpheus with Apache License 2.0

5 votes

package org.opencypher.morpheus.examples

import java.io.{ByteArrayOutputStream, PrintStream}
import java.net.URI

import org.junit.runner.RunWith
import org.opencypher.okapi.testing.Bag._
import org.scalatest.{BeforeAndAfterAll, FunSpec, Matchers}
import org.scalatestplus.junit.JUnitRunner

import scala.io.Source

@RunWith(classOf[JUnitRunner])
abstract class ExampleTest extends FunSpec with Matchers with BeforeAndAfterAll {

  private val oldStdOut = System.out

  protected val emptyOutput: String = ""

  protected def validate(app: => Unit, expectedOut: URI): Unit = {
    validate(app, Source.fromFile(expectedOut).mkString)
  }

  protected def validateBag(app: => Unit, expectedOut: URI): Unit = {
    val source = Source.fromFile(expectedOut)
    val expectedLines = source.getLines().toList
    val appLines = capture(app).split(System.lineSeparator())
    withClue(s"${appLines.mkString("\n")} not equal to ${expectedLines.mkString("\n")}") {
      appLines.toBag shouldEqual expectedLines.toBag
    }
  }

  protected def validate(app: => Unit, expectedOut: String): Unit = {
    capture(app) shouldEqual expectedOut
  }

  private def capture(app: => Unit): String = {
    val charset = "UTF-8"
    val outCapture = new ByteArrayOutputStream()
    val printer = new PrintStream(outCapture, true, charset)
    Console.withOut(printer)(app)
    outCapture.toString(charset)
  }

  override protected def afterAll(): Unit = {
    System.setOut(oldStdOut)
    super.afterAll()
  }
}

Source File: RawTextSender.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: RateLimitedOutputStreamSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes(StandardCharsets.UTF_8)) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
}

Source File: ByteBufferOutputStream.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


private[spark] class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
}

Source File: PythonRDDSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes(StandardCharsets.UTF_8), null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes(StandardCharsets.UTF_8)), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(Iterator(
      (null, null),
      ("a".getBytes(StandardCharsets.UTF_8), null),
      (null, "b".getBytes(StandardCharsets.UTF_8))), buffer)
  }
}

Source File: GenericAvroSerializerSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: MqttConfig.scala From akka-iot-mqtt-v2 with GNU Lesser General Public License v3.0

5 votes

package akkaiot

import scala.concurrent.duration._

import java.io.Serializable
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.ObjectInputStream
import java.io.ObjectOutputStream

import com.sandinh.paho.akka._
import com.sandinh.paho.akka.MqttPubSub._

object MqttConfig {
  val topic = "akka-iot-mqtt-topic"

  // Pub-Sub config
  val psConfig = PSConfig(
    brokerUrl = "tcp://test.mosquitto.org:1883",
    userName = null,
    password = null,
    stashTimeToLive = 1.minute,
    stashCapacity = 8000,
    reconnectDelayMin = 10.millis,
    reconnectDelayMax = 30.seconds,
    cleanSession = false
  )

  // Serialize object to byte array
  def writeToByteArray(obj: Any): Array[Byte] = {
    val baos = new ByteArrayOutputStream
    val oos = new ObjectOutputStream(baos)
    try {
      oos.writeObject(obj)
      baos.toByteArray
    } finally {
      try {
        oos.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }

  // Deserialize object from byte array
  def readFromByteArray[A](bytes: Array[Byte]): A = {
    val bais = new ByteArrayInputStream(bytes)
    val ois = new ObjectInputStream(bais)
    try {
      val obj = ois.readObject
      obj.asInstanceOf[A]
    } finally {
      try {
        ois.close
      } catch {
        case _: Throwable => // Do nothing
      }
    }
  }
}

Source File: EventHistoryFileReportingSuite.scala From sparklens with Apache License 2.0

5 votes

package com.qubole.sparklens.app

import java.io.{ByteArrayOutputStream, PrintStream}

import com.qubole.sparklens.TestUtils
import org.scalatest.FunSuite

class EventHistoryFileReportingSuite extends FunSuite {

  test("Reporting from sparklens and event-history should be same") {
    val eventHistoryFile = s"${System.getProperty("user.dir")}" +
      s"/src/test/event-history-test-files/local-1532512550423"

    // corresponding sparklens dump is in same location and name, but additional suffix
    val sparklensDump = TestUtils.getFileContents(eventHistoryFile + ".sparklens.json")

    validateOutput(outputFromSparklensDump(sparklensDump),
      outputFromEventHistoryReport(eventHistoryFile))
  }


  private def outputFromSparklensDump(dump: String): String = {
    val out = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      ReporterApp.startAnalysersFromString(dump)
    }
    out.toString

  }
  private def outputFromEventHistoryReport(file: String): String = {
    val out = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      new EventHistoryReporter(file)
    }
    out.toString
  }

  private def validateOutput(file1:String, file2:String) = {
    assert(file1.size == file2.size,
      "output size is different between eventlogs report and sparklens.json report")
    assert(file1.lines.zip(file2.lines).filterNot(x => x._1 == x._2).size == 0,
      "Report lines are not matching between eventlogs report and sparklens.json report")
  }
}

Source File: CompatibilitySuite.scala From sparklens with Apache License 2.0

5 votes

import java.io.{ByteArrayOutputStream, FileNotFoundException, PrintStream}

import com.qubole.sparklens.TestUtils
import com.qubole.sparklens.app.ReporterApp
import org.scalatest.FunSuite

import scala.util.control.Breaks._

class CompatibilitySuite extends FunSuite {

  test("should be able to report on previously generated sparklens dumps") {

    breakable {

      (1 to 100).foreach(x => { //run for the versions of sparklens output saved
        try {

          val testInput = TestUtils.getFileContents(
            s"${System.getProperty("user.dir")}/src/test/compatibility-files/version-${x}.json")

          val testOut = new ByteArrayOutputStream()
          Console.withOut(new PrintStream(testOut)) {
            ReporterApp.startAnalysersFromString(testInput)
          }
          val testOutput = testOut.toString

          val olderOutput = TestUtils.getFileContents(
            s"${System.getProperty("user.dir")}/src/test/compatibility-files/version-${x}.output")

          
          olderOutput.split("\n").foreach(line => {
            assert(testOutput.contains(line))
          })
        } catch {
          case e: FileNotFoundException => break
        }
      })
    }
  }

}

Source File: RawTextSender.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: RateLimitedOutputStreamSuite.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
}

Source File: PythonRDDSuite.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes, null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(
      Iterator((null, null), ("a".getBytes, null), (null, "b".getBytes)), buffer)
  }
}

Source File: Serialization.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.commons.serialization

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

trait Serialization {

  def deserialize[T](bytes: Array[Byte]): T = {
    val bufferIn = new ByteArrayInputStream(bytes)
    val streamIn = new ObjectInputStream(bufferIn)
    try {
      streamIn.readObject().asInstanceOf[T]
    } finally {
      streamIn.close()
    }
  }

  def serialize[T](objectToSerialize: T): Array[Byte] = {
    val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArrayOutputStream)
    try {
      oos.writeObject(objectToSerialize)
      oos.flush()
      byteArrayOutputStream.toByteArray
    } finally {
      oos.close()
    }
  }

  def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj))
}

object Serialization extends Serialization

Source File: MavenAddManagedDependenciesSubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenAddManagedDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenAddManagedDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenAddManagedDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenAddManagedDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddManagedDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenAddManagedDependenciesRule")
    loggingEvent.setMessage("{} added managed dependency {} to {}")
    loggingEvent.setArgumentArray(Array("MavenAddManagedDependenciesSubscriber", "xxxx:yyyy:zzzz", new File(projectRoot, "pom.xml")))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
  }
}

Source File: MavenExcludeDependenciesSubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenExcludeDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenExcludeDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenExcludeDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenExcludeDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenExcludeDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenExcludeDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenExcludeDependenciesRule")
    loggingEvent.setMessage("{} excluded {} from {} in {}")
    loggingEvent.setArgumentArray(Array(
      "MavenExcludeDependenciesRule",
      Set("zzzz:yyyy:xxxx"),
      "xxxx:yyyy:zzzz",
      new File(projectRoot, "pom.xml")
    ))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
    result should include ("zzzz:yyyy:xxxx")
  }
}

Source File: MavenRemoveDependenciesSubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenRemoveDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenRemoveDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenRemoveDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenRemoveDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenRemoveDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenRemoveDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenRemoveDependenciesRule")
    loggingEvent.setMessage("{} removed dependency {} from {}")
    loggingEvent.setArgumentArray(Array("MavenRemoveDependenciesRule", "xxxx:yyyy:zzzz", new File(projectRoot, "pom.xml")))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
  }
}

Source File: MavenAddDependenciesSubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenAddDependenciesSubscriberTest extends FlatSpecLike with Matchers {
  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenAddDependenciesSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddDependenciesSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenAddDependenciesRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenAddDependenciesSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenAddDependenciesSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenAddDependenciesRule")
    loggingEvent.setMessage("{} added dependency {} to {}")
    loggingEvent.setArgumentArray(Array("MavenAddDependenciesRule", "xxxx:yyyy:zzzz", new File(projectRoot, "pom.xml")))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
  }
}

Source File: MavenDependenciesMappingSubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class MavenDependenciesMappingSubscriberTest extends FlatSpecLike with Matchers {

  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "MavenDependenciesMappingSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenDependenciesMappingSubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setLoggerName("MavenDependenciesMappingRule")
    loggingEvent2.setMessage("Some random message")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "MavenDependenciesMappingSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new MavenDependenciesMappingSubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("MavenDependenciesMappingRule")
    loggingEvent.setMessage("{} mapped {} to {} in {}")
    loggingEvent.setArgumentArray(Array(
      "MavenDependenciesMappingRule",
      Set("zzzz:yyyy:xxxx"),
      Set("xxxx:yyyy:zzzz"),
      new File(projectRoot, "pom.xml")
    ))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("#### File [pom.xml](pom.xml)")
    result should include ("xxxx:yyyy:zzzz")
    result should include ("zzzz:yyyy:xxxx")
  }
}

Source File: ArtifactsSummarySubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.maven.report

import java.io.{ByteArrayOutputStream, File}

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class ArtifactsSummarySubscriberTest extends FlatSpecLike with Matchers {

  val projectRoot = new File(getClass.getClassLoader.getResource(".").getFile)

  "ArtifactsSummarySubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ArtifactsSummarySubscriber(projectRoot)

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "ArtifactsSummarySubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ArtifactsSummarySubscriber(projectRoot)

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Found maven pom {} for artifact {}")
    loggingEvent.setArgumentArray(Array(new File(projectRoot, "abc/pom.xml"), "abc"))
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("abc/pom.xml")
    result should include ("abc")
  }
}

Source File: UpgradeSummarySubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.report.impl

import java.io.ByteArrayOutputStream

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class UpgradeSummarySubscriberTest extends FlatSpecLike with Matchers {

  "UpgradeSummarySubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new UpgradeSummarySubscriber

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setLoggerName("fake")
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "UpgradeSummarySubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new UpgradeSummarySubscriber

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Rule some_rule was applied to 3 files")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("|[some_rule](#some_rule) | impacted 3 file(s) |")
  }
}

Source File: ProjectDetailsSubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.report.impl

import java.io.ByteArrayOutputStream

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class ProjectDetailsSubscriberTest extends FlatSpecLike with Matchers {

  "ProjectDetailsSubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ProjectDetailsSubscriber

    subscriber.accept("hahah")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "ProjectDetailsSubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ProjectDetailsSubscriber

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Starting upgrade Scala project to 2.5.4-RELEASE, pom pom.xml with taskId None")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("pom.xml")
    result should include ("2.5.4-RELEASE")
    result should include ("Scala project upgrade report")
    result should include ("Upgrade job ID | None")
    result should include ("Full upgrade log | [link](raptor-upgrade-debug.log)")
    result should include ("Upgrade warnings only log | [link](raptor-upgrade-warn.log)")
    outputStream.reset()

    val loggingEvent2 = new LoggingEvent
    loggingEvent2.setMessage("Starting upgrade Scala project to 2.5.4-RELEASE, pom pom.xml with taskId Some(1234)")
    subscriber.accept(loggingEvent2)
    subscriber.dumpTo(outputStream)
    val result2 = new String(outputStream.toByteArray)
    result2 should include ("pom.xml")
    result2 should include ("2.5.4-RELEASE")
    result2 should include ("Scala project upgrade report")
    result2 should include ("Upgrade job ID | Some(1234)")
    result2 should include ("Full upgrade log | [link](raptor-upgrade-debug-1234.log)")
    result2 should include ("Upgrade warnings only log | [link](raptor-upgrade-warn-1234.log)")
  }
}

Source File: ManualChangesSummarySubscriberTest.scala From RTran with Apache License 2.0

5 votes

package com.ebay.rtran.report.impl

import java.io.ByteArrayOutputStream

import ch.qos.logback.classic.spi.LoggingEvent
import org.scalatest.{FlatSpecLike, Matchers}


class ManualChangesSummarySubscriberTest extends FlatSpecLike with Matchers {

  "ManualChangesSummarySubscriber" should "not accept unexpected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ManualChangesSummarySubscriber

    subscriber.accept("hahaha")
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Some random message")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    outputStream.toByteArray should be (Array.empty[Byte])
  }

  "ManualChangesSummarySubscriber" should "accept expected events" in {
    val outputStream = new ByteArrayOutputStream
    val subscriber = new ManualChangesSummarySubscriber

    val loggingEvent = new LoggingEvent
    loggingEvent.setMessage("Rule blahblah requires 1000 manual changes")
    subscriber.accept(loggingEvent)
    subscriber.dumpTo(outputStream)
    val result = new String(outputStream.toByteArray)
    result should include ("|[blahblah](#blahblah) | 1000 manual changes required |")
  }
}

Source File: JavaSerde.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.core.serde

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream}

import akka.actor.ExtendedActorSystem
import akka.serialization.JavaSerializer
import akka.util.ClassLoaderObjectInputStream

class JavaSerde(system: ExtendedActorSystem) extends Serde[AnyRef] {

  override def identifier: Int = 101

  override def close(): Unit = ()

  override def fromBytes(bytes: Array[Byte]): AnyRef = {
    val in = new ClassLoaderObjectInputStream(system.dynamicAccess.classLoader, new ByteArrayInputStream(bytes))
    val obj = JavaSerializer.currentSystem.withValue(system) { in.readObject }
    in.close()
    obj
  }

  override def toBytes(o: AnyRef): Array[Byte] = {
    val bos = new ByteArrayOutputStream
    val out = new ObjectOutputStream(bos)
    JavaSerializer.currentSystem.withValue(system) { out.writeObject(o) }
    out.close()
    bos.toByteArray
  }

}

Source File: SeqSerde.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SeqSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Seq[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 141

  override def close(): Unit = ()

  override protected def fromBytes(bytes: Array[Byte]): Seq[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toList
    di.close()
    result
  }

  override def toBytes(seq: Seq[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(seq.size)
    for (a: Any <- seq) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }
}

Source File: SetSerde.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.core.serde.collection

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import akka.actor.ExtendedActorSystem
import com.typesafe.config.Config
import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes}

class SetSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Set[Any]] {

  def this(system: ExtendedActorSystem) = this(Serde.tools(system))
  def this(config: Config) = this(Serde.tools(config))

  override def identifier: Int = 142

  override protected def fromBytes(bytes: Array[Byte]): Set[Any] = {
    val di = new DataInputStream(new ByteArrayInputStream(bytes))
    val numItems = di.readInt()
    val result = ((1 to numItems) map { _ =>
      val len = di.readInt()
      val item = new Array[Byte](len)
      di.read(item)
      fromBinaryWrapped(item)
    }).toSet
    di.close()
    result
  }

  override def toBytes(set: Set[Any]): Array[Byte] = {
    val os = new ByteArrayOutputStream()
    val d = new DataOutputStream(os)
    d.writeInt(set.size)
    for (a: Any <- set) a match {
      case ref: AnyRef =>
        val item = toBinaryWrapped(ref)
        d.writeInt(item.length)
        d.write(item)
    }
    os.close
    os.toByteArray
  }

  override def close() = ()
}

Source File: ResponseHelper.scala From OUTDATED_ledger-wallet-android with MIT License

5 votes

package co.ledger.wallet.core.net

import java.io.{ByteArrayOutputStream, BufferedInputStream}

import co.ledger.wallet.core.utils.io.IOUtils
import co.ledger.wallet.core.utils.logs.Logger
import org.json.{JSONArray, JSONObject}
import co.ledger.wallet.core.net.HttpRequestExecutor.defaultExecutionContext
import scala.concurrent.Future
import scala.io.Source
import scala.util.{Failure, Success}

object ResponseHelper {

  implicit class ResponseFuture(f: Future[HttpClient#Response]) {

    def json: Future[(JSONObject, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        Logger.d("Converting to json")
        (new JSONObject(body), response)
      }
    }

    def jsonArray: Future[(JSONArray, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        (new JSONArray(body), response)
      }
    }

    def string: Future[(String, HttpClient#Response)] = {
      f.map { response =>
        Logger.d("Converting to string")
        (Source.fromInputStream(response.body).mkString, response)
      }
    }

    def bytes: Future[(Array[Byte], HttpClient#Response)] = {
      f.map { response =>
        val input = new BufferedInputStream(response.body)
        val output = new ByteArrayOutputStream()
        IOUtils.copy(input, output)
        val result = output.toByteArray
        input.close()
        output.close()
        (result, response)
      }
    }

    def noResponseBody: Future[HttpClient#Response] = {
      f.andThen {
        case Success(response) =>
          response.body.close()
          response
        case Failure(cause) =>
          throw cause
      }
    }

  }

}

Source File: RawTextSender.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    //解析使用模式匹配的参数
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    //多次重复输入数据以填充缓冲区
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: RateLimitedOutputStreamSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {//写
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    //desiredBytesPerSec 每秒所需的字节数
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
}

Source File: PythonRDDSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {
  //写大串给worker
  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }
  //很好的处理null
  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes, null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(
      Iterator((null, null), ("a".getBytes, null), (null, "b".getBytes)), buffer)
  }
}

Source File: GenericAvroSerializerSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {//模式压缩与解压缩
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {//记录序列化和反序列化
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }
  //使用模式指纹以减少信息大小
  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {//缓存之前模式
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedScheam = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedScheam.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: ByteBufferOutputStream.scala From kraps-rpc with Apache License 2.0

5 votes

package net.neoremind.kraps.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
}

Source File: Unpacker.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}
import java.nio.ByteBuffer
import java.util.zip.GZIPInputStream

import com.expedia.open.tracing.buffer.SpanBuffer
import com.github.luben.zstd.ZstdInputStream
import org.apache.commons.io.IOUtils
import org.json4s.jackson.Serialization
import org.xerial.snappy.SnappyInputStream

object Unpacker {
  import PackedMessage._

  private def readMetadata(packedDataBytes: Array[Byte]): Array[Byte] = {
    val byteBuffer = ByteBuffer.wrap(packedDataBytes)
    val magicBytesExist = MAGIC_BYTES.indices forall { idx => byteBuffer.get() == MAGIC_BYTES.apply(idx) }
    if (magicBytesExist) {
      val headerLength = byteBuffer.getInt
      val metadataBytes = new Array[Byte](headerLength)
      byteBuffer.get(metadataBytes, 0, headerLength)
      metadataBytes
    } else {
      null
    }
  }

  private def unpack(compressedStream: InputStream) = {
    val outputStream = new ByteArrayOutputStream()
    IOUtils.copy(compressedStream, outputStream)
    outputStream.toByteArray
  }

  def readSpanBuffer(packedDataBytes: Array[Byte]): SpanBuffer = {
    var parsedDataBytes: Array[Byte] = null
    val metadataBytes = readMetadata(packedDataBytes)
    if (metadataBytes != null) {
      val packedMetadata = Serialization.read[PackedMetadata](new String(metadataBytes))
      val compressedDataOffset = MAGIC_BYTES.length + 4 + metadataBytes.length
      packedMetadata.t match {
        case PackerType.SNAPPY =>
          parsedDataBytes = unpack(
            new SnappyInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.GZIP =>
          parsedDataBytes = unpack(
            new GZIPInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.ZSTD =>
          parsedDataBytes = unpack(
            new ZstdInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case _ =>
          return SpanBuffer.parseFrom(
            new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))
      }
    } else {
      parsedDataBytes = packedDataBytes
    }
    SpanBuffer.parseFrom(parsedDataBytes)
  }
}

Source File: Packer.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStream}
import java.util.zip.GZIPOutputStream

import com.expedia.www.haystack.trace.commons.packer.PackerType.PackerType
import com.github.luben.zstd.ZstdOutputStream
import com.google.protobuf.GeneratedMessageV3
import org.apache.commons.io.IOUtils
import org.xerial.snappy.SnappyOutputStream

object PackerType extends Enumeration {
  type PackerType = Value
  val GZIP, SNAPPY, NONE, ZSTD = Value
}

case class PackedMetadata(t: PackerType)

abstract class Packer[T <: GeneratedMessageV3] {
  val packerType: PackerType

  protected def compressStream(stream: OutputStream): OutputStream

  private def pack(protoObj: T): Array[Byte] = {
    val outStream = new ByteArrayOutputStream
    val compressedStream = compressStream(outStream)
    if (compressedStream != null) {
      IOUtils.copy(new ByteArrayInputStream(protoObj.toByteArray), compressedStream)
      compressedStream.close() // this flushes the data to final outStream
      outStream.toByteArray
    } else {
      protoObj.toByteArray
    }
  }

  def apply(protoObj: T): PackedMessage[T] = {
    PackedMessage(protoObj, pack, PackedMetadata(packerType))
  }
}

class NoopPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.NONE
  override protected def compressStream(stream: OutputStream): OutputStream = null
}

class SnappyPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.SNAPPY
  override protected def compressStream(stream: OutputStream): OutputStream = new SnappyOutputStream(stream)
}


class ZstdPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.ZSTD
  override protected def compressStream(stream: OutputStream): OutputStream = new ZstdOutputStream(stream)
}

class GzipPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.GZIP
  override protected def compressStream(stream: OutputStream): OutputStream = new GZIPOutputStream(stream)
}

Source File: ParquetIOTest.scala From ratatool with Apache License 2.0

5 votes

package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.nio.file.Files

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import com.spotify.ratatool.scalacheck._
import org.apache.commons.io.FileUtils
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class ParquetIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "ParquetIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream(in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(genericData, genericSchema, file)
    val result = ParquetIO.readFromFile(file).toList
    result should equal (genericData)
    FileUtils.deleteDirectory(dir.toFile)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    ParquetIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = ParquetIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val dir = Files.createTempDirectory("ratatool-")
    val file = new File(dir.toString, "temp.parquet")
    ParquetIO.writeToFile(specificData, specificSchema, file)
    val result = ParquetIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
    FileUtils.deleteDirectory(dir.toFile)
  }

}

Source File: AvroIOTest.scala From ratatool with Apache License 2.0

5 votes

package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.avro.specific.TestRecord
import org.apache.avro.generic.GenericRecord
import com.spotify.ratatool.scalacheck._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class AvroIOTest extends AnyFlatSpec with Matchers {

  private val genericSchema = Schemas.avroSchema
  private val genericGen = genericRecordOf(genericSchema)
  private val genericData = (1 to 100).flatMap(_ => genericGen.sample)

  private val specificSchema = TestRecord.getClassSchema
  private val specificGen = specificRecordOf[TestRecord]
  private val specificData = (1 to 100).flatMap(_ => specificGen.sample)

  "AvroIO" should "work with generic record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(genericData, genericSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[GenericRecord](in).toList
    result should equal (genericData)
  }

  it should "work with generic record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(genericData, genericSchema, file)
    val result = AvroIO.readFromFile[GenericRecord](file).toList
    result should equal (genericData)
  }

  it should "work with specific record and stream" in {
    val out = new ByteArrayOutputStream()
    AvroIO.writeToOutputStream(specificData, specificSchema, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = AvroIO.readFromInputStream[TestRecord](in).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }

  it should "work with specific record and file" in {
    val file = File.createTempFile("ratatool-", ".avro")
    file.deleteOnExit()
    AvroIO.writeToFile(specificData, specificSchema, file)
    val result = AvroIO.readFromFile[TestRecord](file).toList
    result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_)))
  }
}

Source File: TableRowJsonIOTest.scala From ratatool with Apache License 2.0

5 votes

package com.spotify.ratatool.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}

import com.spotify.ratatool.Schemas
import com.spotify.ratatool.scalacheck._
import org.scalacheck.Gen
import scala.jdk.CollectionConverters._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class TableRowJsonIOTest extends AnyFlatSpec with Matchers {

  
  private def floatGen = Gen.choose[Float](0.0F, 1.0F)

  private val schema = Schemas.tableSchema
  private val data = Gen.listOfN(100,
    tableRowOf(schema)
      .amend(Gen.oneOf(
        Gen.const(null),
        floatGen
      ))(_.getRecord("nullable_fields").set("float_field"))
      .amend(floatGen)(_.getRecord("required_fields").set("float_field"))
      .amend(Gen.nonEmptyListOf(floatGen)
        .map(_.asJava)
      )(_.getRecord("repeated_fields").set("float_field"))
  ).sample.get

  "TableRowJsonIO" should "work with stream" in {
    val out = new ByteArrayOutputStream()
    TableRowJsonIO.writeToOutputStream(data, out)
    val in = new ByteArrayInputStream(out.toByteArray)
    val result = TableRowJsonIO.readFromInputStream(in).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

  it should "work with file" in {
    val file = File.createTempFile("ratatool-", ".json")
    file.deleteOnExit()
    TableRowJsonIO.writeToFile(data, file)
    val result = TableRowJsonIO.readFromFile(file).toList.map(_.toString)
    result should equal (data.map(_.toString))
  }

}

Source File: GenericSerde.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.kafka

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroFormat, AvroInputStream, AvroOutputStream, AvroSchema, BinaryFormat, DataFormat, Decoder, Encoder, JsonFormat, SchemaFor}
import org.apache.avro.Schema
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}


class GenericSerde[T >: Null : SchemaFor : Encoder : Decoder](avroFormat: AvroFormat = BinaryFormat) extends Serde[T]
  with Deserializer[T]
  with Serializer[T]
  with Serializable {

  val schema: Schema = AvroSchema[T]

  override def serializer(): Serializer[T] = this

  override def deserializer(): Deserializer[T] = this

  override def deserialize(topic: String, data: Array[Byte]): T = {
    if (data == null) null else {

      val avroInputStream = avroFormat match {
        case BinaryFormat => AvroInputStream.binary[T]
        case JsonFormat => AvroInputStream.json[T]
        case DataFormat => AvroInputStream.data[T]
      }

      val input = avroInputStream.from(data).build(schema)
      val result = input.iterator.next()
      input.close()
      result
    }
  }

  override def close(): Unit = ()

  override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = ()

  override def serialize(topic: String, data: T): Array[Byte] = {
    val baos = new ByteArrayOutputStream()

    val avroOutputStream = avroFormat match {
      case BinaryFormat => AvroOutputStream.binary[T]
      case JsonFormat => AvroOutputStream.json[T]
      case DataFormat => AvroOutputStream.data[T]
    }

    val output = avroOutputStream.to(baos).build()
    output.write(data)
    output.close()
    baos.toByteArray
  }
}

Source File: GithubIssue193.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroFixed, AvroInputStream, AvroOutputStream}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

case class Data(uuid: Option[UUID])
case class UUID(@AvroFixed(8) bytes: Array[Byte])

class GithubIssue193 extends AnyFunSuite with Matchers {

  test("Converting data with an optional fixed type field to GenericRecord fails #193") {

    val baos = new ByteArrayOutputStream()

    val output = AvroOutputStream.data[Data].to(baos).build()
    output.write(Data(Some(UUID(Array[Byte](0, 1, 2, 3, 4, 5, 6, 7)))))
    output.write(Data(None))
    output.write(Data(Some(UUID(Array[Byte](7, 6, 5, 4, 3, 2, 1, 0)))))
    output.close()

    val input = AvroInputStream.data[Data].from(baos.toByteArray).build
    val datas = input.iterator.toList
    datas.head.uuid.get.bytes should equal(Array[Byte](0, 1, 2, 3, 4, 5, 6, 7))
    datas(1).uuid shouldBe None
    datas.last.uuid.get.bytes should equal(Array[Byte](7, 6, 5, 4, 3, 2, 1, 0))
    input.close()
  }
}

Source File: GithubIssue191.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroOutputStream, AvroSchema}
import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput}
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

final case class SN(value: String) extends AnyVal
final case class SimpleUser(name: String, sn: Option[SN])

class GithubIssue191 extends AnyFunSuite with Matchers {

  test("writing out AnyVal in an option") {
    implicit val schema = AvroSchema[SimpleUser]
    val bytes = new ByteArrayOutputStream
    val out = AvroOutputStream.data[SimpleUser].to(bytes).build()
    out.write(SimpleUser("Tom", Some(SN("123"))))
    out.close()

    val datumReader = new GenericDatumReader[GenericRecord](schema)
    val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes.toByteArray), datumReader)
    val record = new Iterator[GenericRecord] {
      override def hasNext: Boolean = dataFileReader.hasNext
      override def next(): GenericRecord = dataFileReader.next
    }.toList.head
    record.getSchema shouldBe schema
    record.get("name") shouldBe new Utf8("Tom")
    record.get("sn") shouldBe new Utf8("123")
  }
}

Source File: GithubIssue235.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{Decoder, Encoder, RecordFormat, SchemaFor}
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

case class Label(value: String) extends AnyVal
case class Value[A](label: Label, value: A)

sealed trait OneOrTwo[A]
case class One[A](value: Value[A]) extends OneOrTwo[A]
case class Two[A](first: Value[A], second: Value[A]) extends OneOrTwo[A]
case class OneOrTwoWrapper[A](t: OneOrTwo[A])

object Bug {

  def apply[T <: Product](a: T)(
    implicit schemaFor: SchemaFor[T],
    encoder: Encoder[T],
    decoder: Decoder[T]
  ): Unit = {

    val format = RecordFormat[T]
    val schema = schemaFor.schema
    val datumReader = new GenericDatumReader[GenericRecord](schema)
    val datumWriter = new GenericDatumWriter[GenericRecord](schema)

    val stream = new ByteArrayOutputStream()
    val bEncoder = EncoderFactory.get().binaryEncoder(stream, null)

    datumWriter.write(format.to(a), bEncoder)
    bEncoder.flush()

    val bytes = stream.toByteArray
    val bDecoder = DecoderFactory.get().binaryDecoder(bytes, null)
    val record = datumReader.read(null, bDecoder)
    require(format.from(record) == a)
  }

}

class GithubIssue235 extends AnyFunSuite with Matchers {
  test("Broken typeclass derivation upgrading from 1.9.0 to 2.0.1 #235") {
    val o = OneOrTwoWrapper(One(Value(Label("lbl"), "foo")))
    Bug(o)
  }
}

Source File: GithubIssue485.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.CPWrapper
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import shapeless.Coproduct

class GithubIssue485 extends AnyFunSuite with Matchers {

  test("Serializable Coproduct Decoder #485") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[CPWrapper])
    oos.close()

    val decoder =
      new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)).readObject().asInstanceOf[Decoder[CPWrapper]]

    val schema = AvroSchema[CPWrapper]
    val record = new GenericData.Record(schema)
    record.put("u", new Utf8("wibble"))
    decoder.decode(record) shouldBe CPWrapper(Coproduct[CPWrapper.ISBG]("wibble"))
  }
}

Source File: GithubIssue484.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.record.decoder.ScalaEnumClass
import com.sksamuel.avro4s.schema.Colours
import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper}
import org.apache.avro.generic.GenericData
import org.apache.avro.generic.GenericData.EnumSymbol
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class GithubIssue484 extends AnyFunSuite with Matchers {

  test("Serializable Scala Enum Decoder #484") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Decoder[ScalaEnumClass])
    oos.close()

    val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
      .readObject()
      .asInstanceOf[Decoder[ScalaEnumClass]]

    val schema = AvroSchema[ScalaEnumClass]
    val record = new GenericData.Record(schema)
    record.put("colour", new EnumSymbol(schema.getField("colour").schema(), "Green"))
    decoder.decode(record) shouldBe ScalaEnumClass(Colours.Green)
  }
}

Source File: GithubIssue432.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.github

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.sksamuel.avro4s.Encoder
import org.scalatest.{FunSuite, Matchers}

class GithubIssue432 extends FunSuite with Matchers {

  test("Serializable Encoder[BigDecimal] #432") {
    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()
  }

  test("Deserialized Encoder[BigDecimal] works") {
    val baos = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(baos)
    oos.writeObject(Encoder.bigDecimalEncoder)
    oos.close()

    val ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray))
    val encoder = ois.readObject().asInstanceOf[Encoder[BigDecimal]]

    encoder.encode(12.34)
  }
}

Source File: BinaryOutputStreamTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.streams.output

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroInputStream, AvroOutputStream, AvroSchema, Encoder}
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

case class Work(name: String, year: Int, style: Style)
case class Composer(name: String, birthplace: String, works: Seq[Work])

class BinaryStreamsTest extends AnyWordSpec with Matchers {

  val ennio = Composer("ennio morricone", "rome", Seq(Work("legend of 1900", 1986, Style.Classical), Work("ecstasy of gold", 1969, Style.Classical)))
  val hans = Composer("hans zimmer", "frankfurt", Seq(Work("batman begins", 2007, Style.Modern), Work("dunkirk", 2017, Style.Modern)))

  "Avro binary streams" should {
    "not write schemas" in {

      implicit val schema = AvroSchema[Composer]
      implicit val encoder = Encoder[Composer]

      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.binary[Composer].to(baos).build()
      output.write(ennio)
      output.write(hans)
      output.close()

      // the schema should not be written in a binary stream
      new String(baos.toByteArray) should not include "birthplace"
      new String(baos.toByteArray) should not include "compositions"
      new String(baos.toByteArray) should not include "year"
      new String(baos.toByteArray) should not include "style"
    }
    "read and write" in {

      implicit val schema = AvroSchema[Composer]
      implicit val encoder = Encoder[Composer]

      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.binary[Composer].to(baos).build()
      output.write(ennio)
      output.write(hans)
      output.close()

      val in = AvroInputStream.binary[Composer].from(baos.toByteArray).build(schema)
      in.iterator.toList shouldBe List(ennio, hans)
      in.close()
    }
  }
}

Source File: AvroDataOutputStreamCodecTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.streams.output

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s.{AvroOutputStream, AvroSchema}
import org.apache.avro.file.CodecFactory
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

class AvroDataOutputStreamCodecTest extends AnyWordSpec with Matchers {

  case class Composer(name: String, birthplace: String, compositions: Seq[String])
  val schema = AvroSchema[Composer]
  val ennio = Composer("ennio morricone", "rome", Seq("legend of 1900", "ecstasy of gold"))

  "AvroDataOutputStream" should {
    "include schema" in {
      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.data[Composer].to(baos).build()
      output.write(ennio)
      output.close()
      new String(baos.toByteArray) should include("birthplace")
      new String(baos.toByteArray) should include("compositions")
    }

    "include deflate coded in metadata when serialized with deflate" in {
      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.data[Composer].to(baos).withCodec(CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL)).build()
      output.write(ennio)
      output.close()
      new String(baos.toByteArray) should include("deflate")
    }

    "include bzip2 coded in metadata when serialized with bzip2" in {
      val baos = new ByteArrayOutputStream()
      val output = AvroOutputStream.data[Composer].to(baos).withCodec(CodecFactory.bzip2Codec).build()
      output.write(ennio)
      output.close()
      new String(baos.toByteArray) should include("bzip2")
    }
  }
}

Source File: OutputStreamTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.streams.output

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s._
import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput}
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
import org.apache.avro.io.DecoderFactory
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

trait OutputStreamTest extends AnyFunSuite with Matchers {

  def readData[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readData(out.toByteArray)
  def readData[T: SchemaFor](bytes: Array[Byte]): GenericRecord = {
    val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T])
    val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes), datumReader)
    dataFileReader.next
  }

  def writeData[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.data[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def readBinary[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readBinary(out.toByteArray)
  def readBinary[T: SchemaFor](bytes: Array[Byte]): GenericRecord = {
    val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T])
    val decoder = DecoderFactory.get().binaryDecoder(new SeekableByteArrayInput(bytes), null)
    datumReader.read(null, decoder)
  }

  def writeBinary[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.binary[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def readJson[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readJson(out.toByteArray)
  def readJson[T: SchemaFor](bytes: Array[Byte]): GenericRecord = {
    val schema = AvroSchema[T]
    val datumReader = new GenericDatumReader[GenericRecord](schema)
    val decoder = DecoderFactory.get().jsonDecoder(schema, new SeekableByteArrayInput(bytes))
    datumReader.read(null, decoder)
  }

  def writeJson[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.json[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def writeRead[T: Encoder : SchemaFor](t: T)(fn: GenericRecord => Any): Unit = {
    {
      val out = writeData(t)
      val record = readData(out)
      fn(record)
    }
    {
      val out = writeBinary(t)
      val record = readBinary(out)
      fn(record)
    }
    {
      val out = writeJson(t)
      val record = readJson(out)
      fn(record)
    }
  }
}

Source File: InputStreamTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.streams.input

import java.io.ByteArrayOutputStream

import com.sksamuel.avro4s._
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

trait InputStreamTest extends AnyFunSuite with Matchers {

  def readData[T: SchemaFor: Decoder](out: ByteArrayOutputStream): T = readData(out.toByteArray)
  def readData[T: SchemaFor: Decoder](bytes: Array[Byte]): T = {
    AvroInputStream.data.from(bytes).build(implicitly[SchemaFor[T]].schema).iterator.next()
  }

  def writeData[T: Encoder: SchemaFor](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.data[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def readBinary[T: SchemaFor: Decoder](out: ByteArrayOutputStream): T = readBinary(out.toByteArray)
  def readBinary[T: SchemaFor: Decoder](bytes: Array[Byte]): T = {
    AvroInputStream.binary.from(bytes).build(implicitly[SchemaFor[T]].schema).iterator.next()
  }

  def writeBinary[T: Encoder](t: T): ByteArrayOutputStream = {
    val out = new ByteArrayOutputStream
    val avro = AvroOutputStream.binary[T].to(out).build()
    avro.write(t)
    avro.close()
    out
  }

  def writeRead[T: Encoder: Decoder: SchemaFor](t: T): Unit = {
    {
      val out = writeData(t)
      readData(out) shouldBe t
    }
    {
      val out = writeBinary(t)
      readBinary(out) shouldBe t
    }
  }

  def writeRead[T: Encoder: Decoder: SchemaFor](t: T, expected: T): Unit = {
    {
      val out = writeData(t)
      readData(out) shouldBe expected
    }
    {
      val out = writeBinary(t)
      readBinary(out) shouldBe expected
    }
  }
}

Source File: SchemaEvolutionTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.record.decoder

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.sksamuel.avro4s._
import org.apache.avro.SchemaBuilder
import org.apache.avro.generic.GenericData
import org.apache.avro.util.Utf8
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class SchemaEvolutionTest extends AnyFunSuite with Matchers {

  case class Version1(original: String)
  case class Version2(@AvroAlias("original") renamed: String)

  case class P1(name: String, age: Int = 18)
  case class P2(name: String)

  case class OptionalStringTest(a: String, b: Option[String])
  case class DefaultStringTest(a: String, b: String = "foo")

  ignore("@AvroAlias should be used when a reader schema has a field missing from the write schema") {

    val v1schema = AvroSchema[Version1]
    val v1 = Version1("hello")
    val baos = new ByteArrayOutputStream()
    val output = AvroOutputStream.data[Version1].to(baos).build()
    output.write(v1)
    output.close()

    // we load using a v2 schema
    val is = new AvroDataInputStream[Version2](new ByteArrayInputStream(baos.toByteArray), Some(v1schema))
    val v2 = is.iterator.toList.head

    v2.renamed shouldBe v1.original
  }

  test("when decoding, if the record and schema are missing a field and the target has a scala default, use that") {

    val f1 = RecordFormat[P1]
    val f2 = RecordFormat[P2]

    f1.from(f2.to(P2("foo"))) shouldBe P1("foo")
  }

  test("when decoding, if the record is missing a field that is present in the schema with a default, use the default from the schema") {
    val schema = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val record = new GenericData.Record(schema)
    record.put("a", new Utf8("hello"))
    Decoder[DefaultStringTest].decode(record) shouldBe DefaultStringTest("hello")
  }

  test("when decoding, if the record is missing a field that is present in the schema and the type is option, then set to None") {
    val schema1 = SchemaBuilder.record("foo").fields().requiredString("a").endRecord()
    val schema2 = SchemaBuilder.record("foo").fields().requiredString("a").optionalString("b").endRecord()
    val record = new GenericData.Record(schema1)
    record.put("a", new Utf8("hello"))
    Decoder[OptionalStringTest].decode(record) shouldBe OptionalStringTest("hello", None)
  }
}

Source File: Encoding.scala From avro4s with Apache License 2.0

5 votes

package benchmarks

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer

import benchmarks.record._
import com.sksamuel.avro4s._
import org.apache.avro.generic.{GenericDatumWriter, GenericRecord}
import org.apache.avro.io.EncoderFactory
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

object Encoding extends BenchmarkHelpers {

  @State(Scope.Thread)
  class Setup {
    val record = RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t))

    val specificRecord = {
      import benchmarks.record.generated.AttributeValue._
      import benchmarks.record.generated._
      new RecordWithUnionAndTypeField(new ValidInt(255, t))
    }

    val (avro4sEncoder, avro4sWriter) = {
      val schema = AvroSchema[RecordWithUnionAndTypeField]
      val encoder = Encoder[RecordWithUnionAndTypeField]
      val writer = new GenericDatumWriter[GenericRecord](schema)
      (encoder, writer)
    }

    val (handrolledEncoder, handrolledWriter) = {
      import benchmarks.handrolled_codecs._
      implicit val codec: AttributeValueCodec[Int] = AttributeValueCodec[Int]
      implicit val schemaForValid = codec.schemaForValid
      val schema = AvroSchema[RecordWithUnionAndTypeField]
      val encoder = Encoder[RecordWithUnionAndTypeField]
      val writer = new GenericDatumWriter[GenericRecord](schema)
      (encoder, writer)
    }

  }
}

class Encoding extends CommonParams with BenchmarkHelpers {

  import Encoding._

  def encode[T](value: T, encoder: Encoder[T], writer: GenericDatumWriter[GenericRecord]): ByteBuffer = {
    val outputStream = new ByteArrayOutputStream(512)
    val record = encoder.encode(value).asInstanceOf[GenericRecord]
    val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null)
    writer.write(record, enc)
    ByteBuffer.wrap(outputStream.toByteArray)
  }


  @Benchmark
  def avroSpecificRecord(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(setup.specificRecord.toByteBuffer)

  @Benchmark
  def avro4sGenerated(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(encode(setup.record, setup.avro4sEncoder, setup.avro4sWriter))

  @Benchmark
  def avro4sHandrolled(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(encode(setup.record, setup.handrolledEncoder, setup.handrolledWriter))
}

Source File: Decoding.scala From avro4s with Apache License 2.0

5 votes

package benchmarks

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer
import java.util.Collections

import benchmarks.record._
import com.sksamuel.avro4s._
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
import org.apache.avro.util.ByteBufferInputStream
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

object Decoding extends BenchmarkHelpers {
  @State(Scope.Thread)
  class Setup {
    val avroBytes = {
      import benchmarks.record.generated.AttributeValue._
      import benchmarks.record.generated._
      new RecordWithUnionAndTypeField(new ValidInt(255, t)).toByteBuffer
    }

    val avro4sBytes = encode(RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t)))

    val (handrolledDecoder, handrolledReader) = {
      import benchmarks.handrolled_codecs._
      implicit val codec: Codec[AttributeValue[Int]] = AttributeValueCodec[Int]
      implicit val schemaFor: SchemaFor[AttributeValue[Int]] = SchemaFor[AttributeValue[Int]](codec.schema)
      val recordSchemaFor = SchemaFor[RecordWithUnionAndTypeField]
      val decoder = Decoder[RecordWithUnionAndTypeField].withSchema(recordSchemaFor)
      val reader = new GenericDatumReader[GenericRecord](recordSchemaFor.schema)
      (decoder, reader)
    }

    val (avro4sDecoder, avro4sReader) = {
      val decoder = Decoder[RecordWithUnionAndTypeField]
      val reader = new GenericDatumReader[GenericRecord](decoder.schema)
      (decoder, reader)
    }
  }

  def encode[T: Encoder: SchemaFor](value: T): ByteBuffer = {
    val outputStream = new ByteArrayOutputStream(512)
    val encoder = Encoder[T]
    val schema = AvroSchema[T]
    val record = encoder.encode(value).asInstanceOf[GenericRecord]
    val writer = new GenericDatumWriter[GenericRecord](schema)
    val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null)
    writer.write(record, enc)
    ByteBuffer.wrap(outputStream.toByteArray)
  }
}

class Decoding extends CommonParams with BenchmarkHelpers {

  import Decoding._

  def decode[T](bytes: ByteBuffer, decoder: Decoder[T], reader: GenericDatumReader[GenericRecord]): T = {
    val dec =
      DecoderFactory.get().binaryDecoder(new ByteBufferInputStream(Collections.singletonList(bytes.duplicate)), null)
    val record = reader.read(null, dec)
    decoder.decode(record)
  }


  @Benchmark
  def avroSpecificRecord(setup: Setup, blackhole: Blackhole) = {
    import benchmarks.record.generated._
    blackhole.consume(RecordWithUnionAndTypeField.fromByteBuffer(setup.avroBytes.duplicate))
  }

  @Benchmark
  def avro4sHandrolled(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(decode(setup.avro4sBytes, setup.handrolledDecoder, setup.handrolledReader))

  @Benchmark
  def avro4sGenerated(setup: Setup, blackhole: Blackhole) =
    blackhole.consume(decode(setup.avro4sBytes, setup.avro4sDecoder, setup.avro4sReader))
}

Source File: StyleChecker.scala From big-data-scala-spark with MIT License

5 votes

import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import com.typesafe.config.ConfigFactory

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec](stream: PrintStream) extends Output[T] {
    private val messageHelper = new MessageHelper(ConfigFactory.load())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        stream.print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) stream.println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) stream.println("")
      fileCount += 1
      stream.println("  " + fileCount + ". " + level + pos(line, column) + ":")
      stream.println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(lineNumber) => " at line " + lineNumber + (column match {
        case Some(columnNumber) => " character " + columnNumber
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = new CustomTextOutput(new PrintStream(output)).output(messages)

    val msg = s"""${output.toString}
                 |Processed ${outputResult.files}  file(s)
                 |Found ${outputResult.errors} errors
                 |Found ${outputResult.warnings} warnings
                 |""".stripMargin

    (msg, score(outputResult))
  }
}

Source File: StyleChecker.scala From big-data-scala-spark with MIT License

5 votes

import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import com.typesafe.config.ConfigFactory

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec](stream: PrintStream) extends Output[T] {
    private val messageHelper = new MessageHelper(ConfigFactory.load())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        stream.print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) stream.println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) stream.println("")
      fileCount += 1
      stream.println("  " + fileCount + ". " + level + pos(line, column) + ":")
      stream.println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(lineNumber) => " at line " + lineNumber + (column match {
        case Some(columnNumber) => " character " + columnNumber
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = new CustomTextOutput(new PrintStream(output)).output(messages)

    val msg = s"""${output.toString}
                 |Processed ${outputResult.files}  file(s)
                 |Found ${outputResult.errors} errors
                 |Found ${outputResult.warnings} warnings
                 |""".stripMargin

    (msg, score(outputResult))
  }
}

Source File: StyleChecker.scala From big-data-scala-spark with MIT License

5 votes

import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import com.typesafe.config.ConfigFactory

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec](stream: PrintStream) extends Output[T] {
    private val messageHelper = new MessageHelper(ConfigFactory.load())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        stream.print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) stream.println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) stream.println("")
      fileCount += 1
      stream.println("  " + fileCount + ". " + level + pos(line, column) + ":")
      stream.println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(lineNumber) => " at line " + lineNumber + (column match {
        case Some(columnNumber) => " character " + columnNumber
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = new CustomTextOutput(new PrintStream(output)).output(messages)

    val msg = s"""${output.toString}
                 |Processed ${outputResult.files}  file(s)
                 |Found ${outputResult.errors} errors
                 |Found ${outputResult.warnings} warnings
                 |""".stripMargin

    (msg, score(outputResult))
  }
}

Source File: JsDataSpec.scala From mist with Apache License 2.0

5 votes

package mist.api.data

import java.io.{ByteArrayOutputStream, ObjectOutputStream}
import java.util
import mist.api.encoding.defaultEncoders._
import mist.api.encoding.JsSyntax._

import org.scalatest._
import org.scalatest.prop.TableDrivenPropertyChecks._

class JsDataSpec extends FunSpec with Matchers {
  import java.{lang => jl, util => ju}
  val rawToData = Table(
    ("raw", "data"),
    (1, JsNumber(1)),
    ("str", JsString("str")),
    (1.2, JsNumber(1.2)),
    (List(1, 2), JsList(Seq(JsNumber(1), JsNumber(2)))),
    (Array(1, 2), JsList(Seq(JsNumber(1), JsNumber(2)))),
    (Map("key" -> "value"), JsMap(Map("key" -> JsString("value"))))
  )

  val javaMap: ju.Map[String, jl.Integer] = {
    val m = new ju.HashMap[String, jl.Integer](1)
    m.put("test", new jl.Integer(42))
    m
  }

  val javaRawToData = Table(
    ("raw", "data"),
    (new jl.Integer(42), JsNumber(42)),
    (new jl.Double(42.0), JsNumber(42.0)),
    (ju.Arrays.asList(new jl.Integer(42)), JsList(Seq(JsNumber(42)))),
    (javaMap, JsMap(Map("test"-> JsNumber(42))))
  )


  it("should parse raw any structure") {
    forAll(rawToData) { (raw: Any, jsLike: JsData) =>
      JsData.fromScala(raw) shouldBe jsLike
    }
  }
  it("should parse raw any java structure") {
    forAll(javaRawToData){ (raw: Any, jsLike: JsData) =>
      JsData.fromJava(raw) shouldBe jsLike
    }
  }

  describe("JsLikeMap") {

    // problem with MapLike - akka can't serialize it
    // scala.collection.immutable.MapLike$$anon$2
    //    java.io.NotSerializableException: scala.collection.immutable.MapLike$$anon$2
    it("JsLikeMap should be serializable") {
      val map = Map("1" -> 1, "2" -> 2).mapValues(i => JsNumber(i))
      val jslikeMap = JsMap(map)

      val bos = new ByteArrayOutputStream
      val out = new ObjectOutputStream(bos)
      out.writeObject(jslikeMap)
      out.close()
    }
  }

  it("should return untyped map") {
    val js = JsMap(
      "a" -> 1.js,
      "b" -> false.js,
      "c" -> JsList(Seq(
        JsMap("x" -> "y".js)
      ))
    )
    val exp = Map(
      "a" -> 1,
      "b" -> false,
      "c" -> Seq(
        Map("x" -> "y")
      )
    )
    JsData.untyped(js) shouldBe exp
  }

}

Source File: ProtoMarshaller.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
class ProtoMarshaller[T <: com.google.protobuf.Message](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
}

Source File: Gzip.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
import java.util.zip.{ GZIPInputStream, GZIPOutputStream }

import akka.util.ByteString

object Gzip extends Codec {
  override val name: String = "gzip"

  override def compress(uncompressed: ByteString): ByteString = {
    val baos = new ByteArrayOutputStream(uncompressed.size)
    val gzos = new GZIPOutputStream(baos)
    gzos.write(uncompressed.toArray)
    gzos.flush()
    gzos.close()
    ByteString(baos.toByteArray)
  }

  override def uncompress(compressed: ByteString): ByteString = {
    val gzis = new GZIPInputStream(new ByteArrayInputStream(compressed.toArray))

    val baos = new ByteArrayOutputStream(compressed.size)
    val buffer = new Array[Byte](32 * 1024)
    var read = gzis.read(buffer)
    while (read != -1) {
      baos.write(buffer, 0, read)
      read = gzis.read(buffer)
    }
    ByteString(baos.toByteArray)
  }
}

Source File: Marshaller.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.internal

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream }
import io.grpc.KnownLength
import akka.annotation.InternalApi
import akka.grpc.ProtobufSerializer


@InternalApi
final class Marshaller[T <: scalapb.GeneratedMessage](u: ProtobufSerializer[T])
    extends io.grpc.MethodDescriptor.Marshaller[T] {
  override def parse(stream: InputStream): T = {
    val baos = new ByteArrayOutputStream(math.max(64, stream.available()))
    val buffer = new Array[Byte](32 * 1024)

    // Blocking calls underneath...
    // we can't avoid it for the moment because we are relying on the Netty's Channel API
    var bytesRead = stream.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = stream.read(buffer)
    }
    u.deserialize(akka.util.ByteString(baos.toByteArray))
  }

  override def stream(value: T): InputStream =
    new ByteArrayInputStream(value.toByteArray) with KnownLength
}

Source File: Main.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.gen

import java.io.ByteArrayOutputStream

import com.google.protobuf.compiler.PluginProtos.CodeGeneratorRequest
import akka.grpc.gen.javadsl.{ JavaClientCodeGenerator, JavaInterfaceCodeGenerator, JavaServerCodeGenerator }
import akka.grpc.gen.scaladsl.{ ScalaClientCodeGenerator, ScalaServerCodeGenerator, ScalaTraitCodeGenerator }

// This is the protoc plugin that the gradle plugin uses
object Main extends App {
  val inBytes: Array[Byte] = {
    val baos = new ByteArrayOutputStream(math.max(64, System.in.available()))
    val buffer = new Array[Byte](32 * 1024)

    var bytesRead = System.in.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = System.in.read(buffer)
    }
    baos.toByteArray
  }

  val req = CodeGeneratorRequest.parseFrom(inBytes)
  val KeyValueRegex = """([^=]+)=(.*)""".r
  val parameters = req.getParameter
    .split(",")
    .flatMap {
      case KeyValueRegex(key, value) => Some((key.toLowerCase, value))
      case _                         => None
    }
    .toMap

  private val languageScala: Boolean = parameters.get("language").map(_.equalsIgnoreCase("scala")).getOrElse(false)

  private val generateClient: Boolean =
    parameters.get("generate_client").map(!_.equalsIgnoreCase("false")).getOrElse(true)

  private val generateServer: Boolean =
    parameters.get("generate_server").map(!_.equalsIgnoreCase("false")).getOrElse(true)

  private val extraGenerators: List[String] =
    parameters.getOrElse("extra_generators", "").split(";").toList.filter(_ != "")

  private val logger = parameters.get("logfile").map(new FileLogger(_)).getOrElse(SilencedLogger)

  val out = {
    val codeGenerators =
      if (languageScala) {
        // Scala
        if (generateClient && generateServer)
          Seq(ScalaTraitCodeGenerator, ScalaClientCodeGenerator, ScalaServerCodeGenerator)
        else if (generateClient) Seq(ScalaTraitCodeGenerator, ScalaClientCodeGenerator)
        else if (generateServer) Seq(ScalaTraitCodeGenerator, ScalaServerCodeGenerator)
        else throw new IllegalArgumentException("At least one of generateClient or generateServer must be enabled")
      } else {
        // Java
        if (generateClient && generateServer)
          Seq(JavaInterfaceCodeGenerator, JavaClientCodeGenerator, JavaServerCodeGenerator)
        else if (generateClient) Seq(JavaInterfaceCodeGenerator, JavaClientCodeGenerator)
        else if (generateServer) Seq(JavaInterfaceCodeGenerator, JavaServerCodeGenerator)
        else throw new IllegalArgumentException("At least one of generateClient or generateServer must be enabled")
      }
    val loadedExtraGenerators =
      extraGenerators.map(cls => Class.forName(cls).getDeclaredConstructor().newInstance().asInstanceOf[CodeGenerator])

    (codeGenerators ++ loadedExtraGenerators).foreach { g =>
      val gout = g.run(req, logger)
      System.out.write(gout.toByteArray)
      System.out.flush()
    }
  }
}

Source File: Main.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc.scalapb

import java.io.ByteArrayOutputStream

import scalapb.ScalaPbCodeGenerator

object Main extends App {
  val inBytes: Array[Byte] = {
    val baos = new ByteArrayOutputStream(math.max(64, System.in.available()))
    val buffer = Array.ofDim[Byte](32 * 1024)

    var bytesRead = System.in.read(buffer)
    while (bytesRead >= 0) {
      baos.write(buffer, 0, bytesRead)
      bytesRead = System.in.read(buffer)
    }
    baos.toByteArray
  }

  val outBytes = ScalaPbCodeGenerator.run(inBytes)

  System.out.write(outBytes)
  System.out.flush()
}

Source File: TestingTypedCount.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
}

Source File: RawTextSender.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: RateLimitedOutputStreamSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes(StandardCharsets.UTF_8)) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
}

Source File: RBackendAuthHandler.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.api.r

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets.UTF_8

import io.netty.channel.{Channel, ChannelHandlerContext, SimpleChannelInboundHandler}

import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils


private class RBackendAuthHandler(secret: String)
  extends SimpleChannelInboundHandler[Array[Byte]] with Logging {

  override def channelRead0(ctx: ChannelHandlerContext, msg: Array[Byte]): Unit = {
    // The R code adds a null terminator to serialized strings, so ignore it here.
    val clientSecret = new String(msg, 0, msg.length - 1, UTF_8)
    try {
      require(secret == clientSecret, "Auth secret mismatch.")
      ctx.pipeline().remove(this)
      writeReply("ok", ctx.channel())
    } catch {
      case e: Exception =>
        logInfo("Authentication failure.", e)
        writeReply("err", ctx.channel())
        ctx.close()
    }
  }

  private def writeReply(reply: String, chan: Channel): Unit = {
    val out = new ByteArrayOutputStream()
    SerDe.writeString(new DataOutputStream(out), reply)
    chan.writeAndFlush(out.toByteArray())
  }

}

Source File: ByteBufferOutputStream.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


private[spark] class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
}

Source File: PythonRDDSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes(StandardCharsets.UTF_8), null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes(StandardCharsets.UTF_8)), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(Iterator(
      (null, null),
      ("a".getBytes(StandardCharsets.UTF_8), null),
      (null, "b".getBytes(StandardCharsets.UTF_8))), buffer)
  }
}

Source File: GenericAvroSerializerSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: TaskDescriptionSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{ByteArrayOutputStream, DataOutputStream, UTFDataFormatException}
import java.nio.ByteBuffer
import java.util.Properties

import scala.collection.mutable.HashMap

import org.apache.spark.SparkFunSuite

class TaskDescriptionSuite extends SparkFunSuite {
  test("encoding and then decoding a TaskDescription results in the same TaskDescription") {
    val originalFiles = new HashMap[String, Long]()
    originalFiles.put("fileUrl1", 1824)
    originalFiles.put("fileUrl2", 2)

    val originalJars = new HashMap[String, Long]()
    originalJars.put("jar1", 3)

    val originalProperties = new Properties()
    originalProperties.put("property1", "18")
    originalProperties.put("property2", "test value")
    // SPARK-19796 -- large property values (like a large job description for a long sql query)
    // can cause problems for DataOutputStream, make sure we handle correctly
    val sb = new StringBuilder()
    (0 to 10000).foreach(_ => sb.append("1234567890"))
    val largeString = sb.toString()
    originalProperties.put("property3", largeString)
    // make sure we've got a good test case
    intercept[UTFDataFormatException] {
      val out = new DataOutputStream(new ByteArrayOutputStream())
      try {
        out.writeUTF(largeString)
      } finally {
        out.close()
      }
    }

    // Create a dummy byte buffer for the task.
    val taskBuffer = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4))

    val originalTaskDescription = new TaskDescription(
      taskId = 1520589,
      attemptNumber = 2,
      executorId = "testExecutor",
      name = "task for test",
      index = 19,
      originalFiles,
      originalJars,
      originalProperties,
      taskBuffer
    )

    val serializedTaskDescription = TaskDescription.encode(originalTaskDescription)
    val decodedTaskDescription = TaskDescription.decode(serializedTaskDescription)

    // Make sure that all of the fields in the decoded task description match the original.
    assert(decodedTaskDescription.taskId === originalTaskDescription.taskId)
    assert(decodedTaskDescription.attemptNumber === originalTaskDescription.attemptNumber)
    assert(decodedTaskDescription.executorId === originalTaskDescription.executorId)
    assert(decodedTaskDescription.name === originalTaskDescription.name)
    assert(decodedTaskDescription.index === originalTaskDescription.index)
    assert(decodedTaskDescription.addedFiles.equals(originalFiles))
    assert(decodedTaskDescription.addedJars.equals(originalJars))
    assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties))
    assert(decodedTaskDescription.serializedTask.equals(taskBuffer))
  }
}

Source File: TestResults.scala From mimir with Apache License 2.0

5 votes

package mimir.util

import java.nio.file.Files
import java.nio.file.Paths
import java.nio.charset.Charset
import java.nio.charset.StandardCharsets
import java.io.ByteArrayOutputStream
import java.io.PrintWriter
import org.rogach.scallop.ScallopConf

object TestResults {
  def main(args: Array[String]) {
    val config = new TestResultConfig(args)
    println("running tests....")
    parseTestResults(config.sbtPath(),config.sbtCmd())
  }
  
  def parseTestResults(sbtPath:String = "/opt/local/bin/sbt", sbtCmd:String = "test") = {
    val procOutput = runCommand(Seq(sbtPath,sbtCmd))._2.replaceAll("""\x1b\[[0-9;]*[a-zA-Z]""", "")
    
    val pattern = """(?m)^.*\[info\] Total.*$|^.*\[info\] Finished.*$|^.*\[info\] [\d]+ examp.*$""".r
    
    val header = "test_name,seconds,examples,expectations,failures,errors,skipped\n"
    
    val pattern2 = """\[info\] Total for specification (\w+)\s+\[info\] Finished in (.+)\R\[info\] (.+)\R""".r
    val pattern3 = """([a-zA-Z]+): (?:(\d+) minutes? )?(?:(\d+) seconds?[,:] )?(?:(\d+) ms[,:] )?(\d+) examples?, (?:(\d+) expectations?, )?(\d+) failures?, (\d+) errors?(?:, (\d+) skipped)?""".r
    val string = pattern2.findAllMatchIn(procOutput).map(mat => s"${mat.group(1)}: ${mat.group(2)}: ${mat.group(3)}")
      .map(nline => nline match {
        case pattern3(test_name,minutes,seconds,ms,examples,expectations,failures,errors,skipped) => {
          val allseconds = (minutes match {
            case "" => 0
            case null => 0
            case x => x.toInt*60
          }) + (seconds match {
            case "" => 0
            case null => 0
            case x => x.toInt
          }) +  (ms match {
            case "" => 0.0
            case null => 0.0
            case x => x.toDouble/1000.0
          })
          s"$test_name,$allseconds,$examples,$expectations,$failures,$errors,$skipped"
        }
      }).mkString("\n")
    
    val outStr = header + string
      
    println(outStr)
    Files.write(Paths.get("test_output.csv"), outStr.getBytes(StandardCharsets.UTF_8))
  }
  
  import sys.process._
  def runCommand(cmd: Seq[String]): (Int, String, String) = {
    val stdoutStream = new ByteArrayOutputStream
    val stderrStream = new ByteArrayOutputStream
    val stdoutWriter = new PrintWriter(stdoutStream)
    val stderrWriter = new PrintWriter(stderrStream)
    val exitValue = cmd.!(ProcessLogger(stdoutWriter.println, stderrWriter.println))
    stdoutWriter.close()
    stderrWriter.close()
    (exitValue, stdoutStream.toString, stderrStream.toString)
  }
  
  
}

class TestResultConfig(arguments: Seq[String]) extends ScallopConf(arguments)
{
  val experimental = opt[List[String]]("X", default = Some(List[String]()))
  val sparkHost = opt[String]("sparkHost", descr = "The IP or hostname of the spark master",
    default = Some("spark-master.local"))
  val sparkPort = opt[String]("sparkPort", descr = "The port of the spark master",
    default = Some("7077"))
  val sbtPath = opt[String]("sbtPath", descr = "The path to sbt binary",
    default = Some("/opt/local/bin/sbt"))
  val sbtCmd = opt[String]("sbtCmd", descr = "The sbt command to run",
    default = Some("test"))
}

Source File: SageMakerProtobufWriter.scala From sagemaker-spark with Apache License 2.0

5 votes

package com.amazonaws.services.sagemaker.sparksdk.protobuf

import java.io.ByteArrayOutputStream

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, NullWritable}
import org.apache.hadoop.mapreduce.{RecordWriter, TaskAttemptContext}

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types.StructType


  def write(row: Row): Unit = {
    val labelColumnName = options.getOrElse("labelColumnName", "label")
    val featuresColumnName = options.getOrElse("featuresColumnName", "features")

    val record = ProtobufConverter.rowToProtobuf(row, featuresColumnName, Some(labelColumnName))
    record.writeTo(byteArrayOutputStream)

    recordWriter.write(NullWritable.get(), new BytesWritable(byteArrayOutputStream.toByteArray))
    byteArrayOutputStream.reset()
  }

  override def close(): Unit = {
    recordWriter.close(context)
  }
}

Source File: RecordIOOutputFormatTests.scala From sagemaker-spark with Apache License 2.0

5 votes

package com.amazonaws.services.sagemaker.sparksdk.protobuf

import java.io.ByteArrayOutputStream

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
import org.apache.hadoop.io.{BytesWritable, NullWritable}
import org.apache.hadoop.mapreduce.TaskAttemptContext
import org.mockito.Matchers.any
import org.mockito.Mockito.{verify, when}
import org.scalatest.{BeforeAndAfter, FlatSpec}
import org.scalatest.mock.MockitoSugar

import com.amazonaws.services.sagemaker.sparksdk.protobuf.RecordIOOutputFormat.SageMakerProtobufRecordWriter


class RecordIOOutputFormatTests extends FlatSpec with MockitoSugar with BeforeAndAfter {

  var sagemakerProtobufRecordWriter: SageMakerProtobufRecordWriter = _
  var mockOutputStream : FSDataOutputStream = _
  var byteArrayOutputStream: ByteArrayOutputStream = _
  var mockTaskAttemptContext: TaskAttemptContext = _
  var mockPath: Path = _
  var mockFileSystem: FileSystem = _

  before {
    byteArrayOutputStream = new ByteArrayOutputStream()
    mockOutputStream = mock[FSDataOutputStream]
    sagemakerProtobufRecordWriter = new SageMakerProtobufRecordWriter(mockOutputStream)
    mockTaskAttemptContext = mock[TaskAttemptContext]
    mockPath = mock[Path]
    mockFileSystem = mock[FileSystem]
  }

  it should "write an empty array of bytes" in {
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)

    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)
    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }


  it should "write an array of bytes" in {
    val byteArray = Array[Byte](0, 0, 0, 0)
    byteArrayOutputStream.write(byteArray)
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)
    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)

    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }

  it should "write an array of bytes, padding as necessary" in {
    byteArrayOutputStream.write(5)
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)
    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)

    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }

  it should "write an array of bytes, padding only as much as necessary" in {
    byteArrayOutputStream.write(Array[Byte](0, 0, 0, 0, 0))
    val bytesWritable = new BytesWritable(byteArrayOutputStream.toByteArray)
    val bytes = ProtobufConverter.byteArrayToRecordIOEncodedByteArray(bytesWritable.getBytes)

    sagemakerProtobufRecordWriter.write(NullWritable.get(), bytesWritable)

    verify(mockOutputStream).write(bytes, 0, bytes.length)
  }

  it should "create a record writer from a FSDataOutputStream created by the filesystem" in {
    val mockTaskAttemptContext = mock[TaskAttemptContext]
    val mockPath = mock[Path]
    val mockFileSystem = mock[FileSystem]
    when(mockPath.getFileSystem(any[Configuration])).thenReturn(mockFileSystem)
    new RecordIOOutputFormat() {
      override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
        mockPath
      }
    }.getRecordWriter(mockTaskAttemptContext)
    verify(mockFileSystem).create(mockPath, true)

  }

}

Source File: ProcessSpawner.scala From akka-http-health with MIT License

5 votes

package io.github.lhotari.akka.http.health

import java.io.ByteArrayOutputStream
import java.lang.System.getProperty
import java.net.{URL, URLClassLoader}

import org.apache.commons.io.IOUtils

import scala.collection.JavaConverters._
import scala.reflect.runtime.universe._

case class ProcessResult(retval: Integer, output: String)

trait ProcessSpawner {
  lazy val classpath = resolveClassPath()
  val sep = getProperty("file.separator")
  val javaExecutablePath = getProperty("java.home") + sep + "bin" + sep + "java"

  private def resolveClassPath() = {
    getClass.getClassLoader match {
      case urlClassLoader: URLClassLoader =>
        urlClassLoader.getURLs.collect {
          case url: URL => url.getFile
        }.mkString(getProperty("path.separator"))
      case _ =>
        getProperty("java.class.path")
    }
  }

  def executeInSeparateProcess[T](mainClassType: T, maxMemoryMB: Integer = 100, extraJvmOpts: Seq[String] = Nil, args: Seq[String] = Nil)(implicit tag: WeakTypeTag[T]): ProcessResult = {
    val className = tag.tpe.termSymbol.fullName
    val processBuilder = new ProcessBuilder(javaExecutablePath).redirectErrorStream(true)
    val commands = processBuilder.command()
    commands.add(s"-Xmx${maxMemoryMB}m")
    commands.addAll(extraJvmOpts.asJava)
    commands.add("-cp")
    commands.add(classpath)
    commands.add(className)
    commands.addAll(args.asJava)
    println(String.join(" ", commands))
    val process = processBuilder.start()
    val output = new ByteArrayOutputStream()
    IOUtils.copy(process.getInputStream, output)
    ProcessResult(process.waitFor(), output.toString())
  }
}

Source File: package.scala From pulsar4s with Apache License 2.0

5 votes

package com.sksamuel.pulsar4s

import java.io.ByteArrayOutputStream
import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets

import com.sksamuel.avro4s.AvroSchema
import com.sksamuel.avro4s.AvroInputStream
import com.sksamuel.avro4s.AvroOutputStream
import com.sksamuel.avro4s.Decoder
import com.sksamuel.avro4s.Encoder
import com.sksamuel.avro4s.SchemaFor
import org.apache.pulsar.client.api.Schema
import org.apache.pulsar.common.schema.{SchemaInfo, SchemaType}

import scala.annotation.implicitNotFound


package object avro {

  @implicitNotFound("No Avro Schema for type ${T} found.")
  implicit def avroSchema[T: Manifest: SchemaFor: Encoder: Decoder]: Schema[T] = new Schema[T] {

    val schema: org.apache.avro.Schema = AvroSchema[T]

    override def clone(): Schema[T] = this

    override def encode(t: T): Array[Byte] = {
      val baos = new ByteArrayOutputStream
      val aos = AvroOutputStream.binary[T].to(baos).build(schema)
      aos.write(t)
      aos.flush()
      aos.close()
      baos.toByteArray()
    }

    override def decode(bytes: Array[Byte]): T = {
      val bais = new ByteArrayInputStream(bytes)
      val ais = AvroInputStream.binary[T].from(bais).build(schema)
      val first = ais.iterator.next()
      ais.close()
      first
    }

    override def getSchemaInfo: SchemaInfo =
      new SchemaInfo()
        .setName(manifest[T].runtimeClass.getCanonicalName)
        .setType(SchemaType.AVRO)
        .setSchema(schema.toString.getBytes(StandardCharsets.UTF_8))
  }
}

Source File: RawTextSender.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: RateLimitedOutputStreamSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes("UTF-8")) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
}

Source File: PythonRDDSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes, null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(
      Iterator((null, null), ("a".getBytes, null), (null, "b".getBytes)), buffer)
  }
}

Source File: GenericAvroSerializerSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Output, Input}
import org.apache.avro.{SchemaBuilder, Schema}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SparkFunSuite, SharedSparkContext}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: DecoratorTest.scala From Elysium with MIT License

5 votes

package nz.daved.elysium.core

import java.io.{ByteArrayOutputStream, PrintStream}

import org.scalatest.{FlatSpec, Matchers}

class DecoratorTest extends FlatSpec with Matchers {

  "@before" should "deal with anonymous functions passed in" in {
    val out: ByteArrayOutputStream = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      DecoratorTestObject.world()
    }
    out.toString.stripLineEnd shouldBe "hello world"
  }

  "@after" should "deal with anonymous functions passed in" in {
    val out: ByteArrayOutputStream = new ByteArrayOutputStream()
    Console.withOut(new PrintStream(out)) {
      DecoratorTestObject.hello()
    }
    out.toString.stripLineEnd shouldBe "hello world"
  }
}

Source File: SerializationTestHelper.scala From xmlconfect with Apache License 2.0

5 votes

package com.mthaler.xmlconfect

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream }

object SerializationTestHelper {

  
  def serializeDeserialize[T](obj: T): T = {
    val bout = new ByteArrayOutputStream()
    val out = new ObjectOutputStream(bout)
    out.writeObject(obj)
    val bin = new ByteArrayInputStream(bout.toByteArray)
    val in = new ObjectInputStream(bin)
    in.readObject().asInstanceOf[T]
  }
}

Source File: LandmarkIOTests.scala From scalismo with Apache License 2.0

5 votes

package scalismo.io

import java.io.{ByteArrayOutputStream, File, InputStream}
import java.net.URLDecoder

import breeze.linalg.DenseVector
import scalismo.ScalismoTestSuite
import scalismo.geometry._
import scalismo.statisticalmodel.MultivariateNormalDistribution

import scala.io.Source
import scala.language.implicitConversions
import scala.collection.immutable.Seq

class LandmarkIOTests extends ScalismoTestSuite {

  implicit def doubleToFloat(d: Double): Float = d.toFloat

  implicit def inputStreamToSource(s: InputStream): Source = Source.fromInputStream(s)

  describe("Spray LandmarkIO") {

    val csvName = "/landmarks.csv"
    def csvStream() = getClass.getResourceAsStream(csvName)

    val jsonName = "/landmarks.json"
    def jsonStream() = getClass.getResourceAsStream(jsonName)

    

    def distWithDefaultVectors(d1: Double, d2: Double, d3: Double): MultivariateNormalDistribution = {
      val axes = List(DenseVector[Double](1, 0, 0), DenseVector[Double](0, 1, 0), DenseVector[Double](0, 0, 1))
      val devs = List(d1, d2, d3)
      val data = axes zip devs
      MultivariateNormalDistribution(DenseVector[Double](0, 0, 0), data)
    }

    val jsonLm1 = Landmark("one", Point(1, 2, 3))
    val jsonLm2 = Landmark("two", Point(2, 3, 4), Some("Landmark two"), Some(distWithDefaultVectors(1, 4, 9)))
    val jsonLms = List(jsonLm1, jsonLm2)

    it("can serialize and deserialize simple landmarks using JSON") {
      val out = new ByteArrayOutputStream()
      LandmarkIO.writeLandmarksJsonToStream(jsonLms, out)
      val written = new String(out.toByteArray)
      val read = LandmarkIO.readLandmarksJsonFromSource[_3D](Source.fromString(written)).get
      read should equal(jsonLms)
    }

    it("can read simple landmarks from a JSON Stream") {
      val read = LandmarkIO.readLandmarksJsonFromSource[_3D](jsonStream()).get
      read should equal(jsonLms)
    }

  }
}

Source File: RichSparkFunctionsSpec.scala From lighthouse with Apache License 2.0

5 votes

package be.dataminded.lighthouse.pipeline

import java.io.ByteArrayOutputStream

import be.dataminded.lighthouse.testing.SharedSparkSession
import better.files._
import org.apache.spark.sql.Dataset
import org.apache.spark.storage.StorageLevel
import org.scalatest.BeforeAndAfter
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers

class RichSparkFunctionsSpec extends AnyFunSpec with Matchers with SharedSparkSession with BeforeAndAfter {

  import spark.implicits._

  describe("SparkFunctions with a DataSet inside should have extra functionality") {

    val function = SparkFunction.of(Seq(1, 2, 3, 4, 5).toDS())

    it("can cache") {
      function.cache().run(spark).storageLevel should equal(StorageLevel.MEMORY_ONLY)
    }

    it("can drop the cache") {
      function.cache().dropCache().run(spark).storageLevel should equal(StorageLevel.NONE)
    }

    it("can be written to a sink") {
      function.write(OrcSink("target/output/orc")).run(spark)

      file"target/output/orc".exists should be(true)
    }

    it("can be written to multiple sinks") {
      function.write(OrcSink("target/output/orc"), OrcSink("target/output/orc2")).run(spark)

      file"target/output/orc".exists should be(true)
      file"target/output/orc2".exists should be(true)
    }

    it("is being cached when writing to multiple sinks for performance") {
      val result = function.write(OrcSink("target/output/orc"), OrcSink("target/output/orc2")).run(spark)

      result.storageLevel should equal(StorageLevel.MEMORY_ONLY)
    }

    it("can easily be counted") {
      function.count().run(spark) should equal(5)
    }

    it("can print the schema") {
      val stream = new ByteArrayOutputStream()
      Console.withOut(stream) {
        function.printSchema().run(spark)
      }
      stream.toString() should include("value: integer (nullable = false)")
    }

    it("can be be used as a Dataset") {
      function.as[Int].run(spark) shouldBe a[Dataset[_]]
    }
  }

  after {
    file"target/output/orc".delete(true)
    file"target/output/orc2".delete(true)
  }
}

Source File: SparkAvroDecoder.scala From cloudflow with Apache License 2.0

5 votes

package cloudflow.spark.avro

import org.apache.log4j.Logger

import java.io.ByteArrayOutputStream

import scala.reflect.runtime.universe._

import org.apache.avro.generic.{ GenericDatumReader, GenericDatumWriter, GenericRecord }
import org.apache.avro.io.{ DecoderFactory, EncoderFactory }
import org.apache.spark.sql.{ Dataset, Encoder, Row }
import org.apache.spark.sql.catalyst.encoders.{ encoderFor, ExpressionEncoder, RowEncoder }
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.types.StructType
import org.apache.avro.Schema

import cloudflow.spark.sql.SQLImplicits._

case class EncodedKV(key: String, value: Array[Byte])

case class SparkAvroDecoder[T: Encoder: TypeTag](avroSchema: String) {

  val encoder: Encoder[T]                           = implicitly[Encoder[T]]
  val sqlSchema: StructType                         = encoder.schema
  val encoderForDataColumns: ExpressionEncoder[Row] = RowEncoder(sqlSchema)
  @transient lazy val _avroSchema                   = new Schema.Parser().parse(avroSchema)
  @transient lazy val rowConverter                  = SchemaConverters.createConverterToSQL(_avroSchema, sqlSchema)
  @transient lazy val datumReader                   = new GenericDatumReader[GenericRecord](_avroSchema)
  @transient lazy val decoder                       = DecoderFactory.get
  def decode(bytes: Array[Byte]): Row = {
    val binaryDecoder = decoder.binaryDecoder(bytes, null)
    val record        = datumReader.read(null, binaryDecoder)
    rowConverter(record).asInstanceOf[GenericRow]
  }

}


case class SparkAvroEncoder[T: Encoder: TypeTag](avroSchema: String) {

  @transient lazy val log = Logger.getLogger(getClass.getName)

  val BufferSize = 5 * 1024 // 5 Kb

  val encoder                     = implicitly[Encoder[T]]
  val sqlSchema                   = encoder.schema
  @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema)

  val recordName                = "topLevelRecord" // ???
  val recordNamespace           = "recordNamespace" // ???
  @transient lazy val converter = AvroConverter.createConverterToAvro(sqlSchema, recordName, recordNamespace)

  // Risk: This process is memory intensive. Might require thread-level buffers to optimize memory usage
  def rowToBytes(row: Row): Array[Byte] = {
    val genRecord = converter(row).asInstanceOf[GenericRecord]
    if (log.isDebugEnabled) log.debug(s"genRecord = $genRecord")
    val datumWriter   = new GenericDatumWriter[GenericRecord](_avroSchema)
    val avroEncoder   = EncoderFactory.get
    val byteArrOS     = new ByteArrayOutputStream(BufferSize)
    val binaryEncoder = avroEncoder.binaryEncoder(byteArrOS, null)
    datumWriter.write(genRecord, binaryEncoder)
    binaryEncoder.flush()
    byteArrOS.toByteArray
  }

  def encode(dataset: Dataset[T]): Dataset[Array[Byte]] =
    dataset.toDF().mapPartitions(rows ⇒ rows.map(rowToBytes)).as[Array[Byte]]

  // Note to self: I'm not sure how heavy this chain of transformations is
  def encodeWithKey(dataset: Dataset[T], keyFun: T ⇒ String): Dataset[EncodedKV] = {
    val encoder             = encoderFor[T]
    implicit val rowEncoder = RowEncoder(encoder.schema).resolveAndBind()
    dataset.map { value ⇒
      val key         = keyFun(value)
      val internalRow = encoder.toRow(value)
      val row         = rowEncoder.fromRow(internalRow)
      val bytes       = rowToBytes(row)
      EncodedKV(key, bytes)
    }
  }

}

Source File: BooApp.scala From boopickle with Apache License 2.0

5 votes

package boopickle.perftests

import java.io.ByteArrayOutputStream
import java.util.zip.GZIPOutputStream

import boopickle.BufferPool

object BooApp {

  def main(args: Array[String]): Unit = {
    runTests()
  }

  def runTests(): Unit = {
    Tests.suites.zipWithIndex.foreach {
      case (suite, idx) =>
        val header = s"${1 + idx}/${Tests.suites.size} : ${suite.name}"
        println(header)
        println("=" * header.length)
        println(f"${"Library"}%-10s ${"ops/s"}%-10s ${"%"}%-10s ${"size"}%-10s ${"%"}%-10s ${"size.gz"}%-10s ${"%"}%-10s")
        val tester = new PerfTester(suite)
        val res    = tester.runSuite
        // zip result data to see how small it gets
        val resSizes = res.results.map { r =>
          val rawSize = r.data.length
          val bs      = new ByteArrayOutputStream()
          val gs      = new GZIPOutputStream(bs)
          gs.write(r.data)
          gs.finish()
          bs.flush()
          val gzipped = bs.toByteArray.length
          (r, rawSize, gzipped)
        }
        val maxCount  = resSizes.map(_._1.count).max
        val minSize   = resSizes.map(_._2).min
        val minGZSize = resSizes.map(_._3).min
        resSizes.foreach { r =>
          println(
            f"${r._1.name}%-10s ${r._1.count}%-10d ${f"${r._1.count * 100.0 / maxCount}%.1f%%"}%-10s ${r._2}%-10d ${f"${r._2 * 100.0 / minSize}%.0f%%"}%-10s ${r._3}%-10d ${f"${r._3 * 100.0 / minGZSize}%.0f%%"}%-10s")
        }
        println()
        // print out buffer pool usage
        println(s"""BufferPool:
           |  allocations = ${BufferPool.allocOk}
           |  misses      = ${BufferPool.allocMiss}
           """.stripMargin)
    }
  }
}

Source File: package.scala From sttp with Apache License 2.0

5 votes

package sttp.client

import java.io.{ByteArrayOutputStream, InputStream, OutputStream}
import java.nio.{Buffer, ByteBuffer}

import scala.annotation.{implicitNotFound, tailrec}

package object internal {
  private[client] def contentTypeWithCharset(ct: String, charset: String): String =
    s"$ct; charset=$charset"

  private[client] def charsetFromContentType(ct: String): Option[String] =
    ct.split(";").map(_.trim.toLowerCase).collectFirst {
      case s if s.startsWith("charset=") && s.substring(8).trim != "" => s.substring(8).trim
    }

  private[client] def transfer(is: InputStream, os: OutputStream): Unit = {
    var read = 0
    val buf = new Array[Byte](1024)

    @tailrec
    def transfer(): Unit = {
      read = is.read(buf, 0, buf.length)
      if (read != -1) {
        os.write(buf, 0, read)
        transfer()
      }
    }

    transfer()
  }

  private[client] def toByteArray(is: InputStream): Array[Byte] = {
    val os = new ByteArrayOutputStream
    transfer(is, os)
    os.toByteArray
  }

  private[client] def concatByteBuffers(bb1: ByteBuffer, bb2: ByteBuffer): ByteBuffer = {
    val buf = ByteBuffer
      .allocate(bb1.array().length + bb2.array().length)
      .put(bb1)
      .put(bb2)
    // rewind() returns Buffer in Java8, and ByteBuffer in Java11
    // calling the method from the base class to avoid NoSuchMethodError
    (buf: Buffer).rewind()
    buf
  }

  
  private[client] def sanitizeCharset(charset: String): String = {
    val c2 = charset.trim()
    val c3 = if (c2.startsWith("\"")) c2.substring(1) else c2
    if (c3.endsWith("\"")) c3.substring(0, c3.length - 1) else c3
  }

  @implicitNotFound(
    "This is a partial request, the method & url are not specified. Use " +
      ".get(...), .post(...) etc. to obtain a non-partial request."
  )
  private[client] type IsIdInRequest[U[_]] = U[Unit] =:= Identity[Unit]

  private[client] val Utf8 = "utf-8"
  private[client] val Iso88591 = "iso-8859-1"
  private[client] val CrLf = "\r\n"
}

Source File: CuModule.scala From neuroflow with Apache License 2.0

5 votes

package neuroflow.cuda

import jcuda.driver.{CUfunction, CUmodule}
import jcuda.driver.JCudaDriver._
import breeze.macros.arityize
import java.io.{ByteArrayOutputStream, InputStream}
import jcuda.{CudaException, Pointer}


  private def loadData(inputStream: InputStream): Array[Byte] = {
    val baos: ByteArrayOutputStream = new ByteArrayOutputStream
    try {
      val buffer = new Array[Byte](8192)
      var done = false
      while (!done) {
        val read: Int = inputStream.read(buffer)
        if (read == -1) {
          done = true
        } else {
          baos.write(buffer, 0, read)
        }
      }
      baos.write('\0')
      baos.flush()
      baos.toByteArray
    } finally {
      baos.close()
    }
  }
}

Source File: NumPyTest.scala From featran with Apache License 2.0

5 votes

package com.spotify.featran.numpy

import java.io.{ByteArrayOutputStream, OutputStream}

import org.scalatest._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class NumPyTest extends AnyFlatSpec with Matchers {
  private def test(f: OutputStream => Unit)(expectedFile: String): Unit = {
    val actual = {
      val baos = new ByteArrayOutputStream()
      f(baos)
      baos.toByteArray
    }

    val expected = {
      val in = this.getClass.getResourceAsStream(expectedFile)
      val out = new ByteArrayOutputStream(math.max(32, in.available()))
      val buf = new Array[Byte](8192)
      var r = in.read(buf)
      while (r != -1) {
        out.write(buf, 0, r)
        r = in.read(buf)
      }
      out.toByteArray
    }

    actual shouldBe expected
  }

  "NumPy" should "work with 1-dimensional arrays" in {
    val a1d = (0 until 10).toArray
    test(NumPy.write(_, a1d))("/a1d-int.npy")
    test(NumPy.write(_, a1d.map(_.toLong)))("/a1d-long.npy")
    test(NumPy.write(_, a1d.map(_.toFloat)))("/a1d-float.npy")
    test(NumPy.write(_, a1d.map(_.toDouble)))("/a1d-double.npy")

    // scalastyle:off no.whitespace.before.left.bracket
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a1d, Seq(20)))("/a1d-int.npy")
    } should have message "requirement failed: Invalid shape, 20 != 10"
    // scalastyle:on no.whitespace.before.left.bracket
  }

  it should "work with 2-dimensional arrays" in {
    val a2d = (for {
      i <- 0 until 10
      j <- 0 until 5
    } yield i * 10 + j).toArray
    test(NumPy.write(_, a2d, Seq(10, 5)))("/a2d-int.npy")
    test(NumPy.write(_, a2d.map(_.toLong), Seq(10, 5)))("/a2d-long.npy")
    test(NumPy.write(_, a2d.map(_.toFloat), Seq(10, 5)))("/a2d-float.npy")
    test(NumPy.write(_, a2d.map(_.toDouble), Seq(10, 5)))("/a2d-double.npy")

    // scalastyle:off no.whitespace.before.left.bracket
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d, Seq(20, 5)))("/a1d-int.npy")
    } should have message "requirement failed: Invalid shape, 20 * 5 != 50"
    // scalastyle:on no.whitespace.before.left.bracket
  }

  it should "work with iterators" in {
    val a2d = (0 until 10).map(i => (0 until 5).map(j => i * 10 + j).toArray)
    test(NumPy.write(_, a2d.iterator, 10, 5))("/a2d-int.npy")
    test(NumPy.write(_, a2d.iterator.map(_.map(_.toLong)), 10, 5))("/a2d-long.npy")
    test(NumPy.write(_, a2d.iterator.map(_.map(_.toFloat)), 10, 5))("/a2d-float.npy")
    test(NumPy.write(_, a2d.iterator.map(_.map(_.toDouble)), 10, 5))("/a2d-double.npy")

    // scalastyle:off no.whitespace.before.left.bracket
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d.iterator, 10, 10))("/a2d-int.npy")
    } should have message "requirement failed: Invalid row size, expected: 10, actual: 5"

    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d.iterator, 20, 5))("/a2d-int.npy")
    } should have message "requirement failed: Invalid number of rows, expected: 20, actual: 10"

    // hit the header.length % 16 == 0 condition
    the[IllegalArgumentException] thrownBy {
      test(NumPy.write(_, a2d.iterator, 1000000000, 50))("/a2d-int.npy")
    } should have message "requirement failed: Invalid row size, expected: 50, actual: 5"
    // scalastyle:on no.whitespace.before.left.bracket
  }
}

Source File: GZip.scala From polynote with Apache License 2.0

5 votes

package polynote.util

import java.io.ByteArrayOutputStream
import java.util.zip.GZIPOutputStream

import zio.RIO
import zio.blocking.{Blocking, effectBlocking}
import zio.ZIO.effectTotal

object GZip {
  def apply(bytes: => Array[Byte]): RIO[Blocking, Array[Byte]] = effectTotal(new ByteArrayOutputStream()).bracket(os => effectTotal(os.close())) {
    bos => effectBlocking {
      val os = new GZIPOutputStream(bos, true)
      os.write(bytes)
      os.flush()
      os.close()
      bos.toByteArray
    }
  }
}

Source File: StatsController.scala From recogito2 with Apache License 2.0

5 votes

package controllers.document.stats

import com.mohiva.play.silhouette.api.Silhouette
import controllers.{BaseOptAuthController, Security, HasVisitLogging, HasPrettyPrintJSON}
import java.io.{ByteArrayOutputStream, PrintWriter}
import javax.inject.{Inject, Singleton}
import kantan.csv._
import kantan.csv.ops._
import kantan.csv.CsvConfiguration.{Header, QuotePolicy}
import kantan.csv.engine.commons._
import services.annotation.AnnotationService
import services.document.DocumentService
import services.user.UserService
import services.user.Roles._
import services.visit.VisitService
import org.webjars.play.WebJarsUtil
import play.api.Configuration
import play.api.mvc.{AnyContent, Request, Result, ControllerComponents}
import play.api.libs.json._
import play.api.libs.functional.syntax._
import play.api.i18n.I18nSupport
import plugins.PluginRegistry
import scala.concurrent.{ExecutionContext, Future}

@Singleton
class StatsController @Inject() (
  val components: ControllerComponents,
  val config: Configuration,
  val documents: DocumentService,
  val annotations: AnnotationService,
  val users: UserService,
  val silhouette: Silhouette[Security.Env],
  implicit val visitService: VisitService,
  implicit val webjars: WebJarsUtil,
  implicit val ctx: ExecutionContext
) extends BaseOptAuthController(components, config, documents, users) 
    with HasVisitLogging 
    with HasPrettyPrintJSON 
    with I18nSupport {
  
  private val CSV_CONFIG = CsvConfiguration(',', '"', QuotePolicy.WhenNeeded, Header.None)
    
  implicit val tuple2Writes: Writes[Tuple2[String, Long]] = (
    (JsPath \ "value").write[String] and
    (JsPath \ "count").write[Long]
  )(t => (t._1, t._2))
  
  private def toCSV(stats: Seq[(String, Long)]): String = {
    val out = new ByteArrayOutputStream()
    val writer = out.asCsvWriter[(String, Long)](CSV_CONFIG)
    stats.foreach(writer.write(_))
    writer.close()
    new String(out.toByteArray, "UTF-8")
  }
  
  def showDocumentStats(documentId: String, tab: Option[String]) = silhouette.UserAwareAction.async { implicit request =>
    documentReadResponse(documentId, request.identity,  { case (doc, accesslevel) =>
      logDocumentView(doc.document, None, accesslevel)      
      tab.map(_.toLowerCase) match {
        case Some(t) if t == "activity" =>  
          val plugins = PluginRegistry.listConfigs("document.stats.activity")
          Future.successful(Ok(views.html.document.stats.activity(doc, request.identity, accesslevel, plugins)))
          
        case Some(t) if t == "entities" =>
          val plugins = PluginRegistry.listConfigs("document.stats.entities")
          Future.successful(Ok(views.html.document.stats.entities(doc, request.identity, accesslevel, plugins)))
          
        case Some(t) if t == "tags" =>
          val plugins = PluginRegistry.listConfigs("document.stats.tags")
          Future.successful(Ok(views.html.document.stats.tags(doc, request.identity, accesslevel, plugins)))
          
        case _ =>
          val plugins = PluginRegistry.listConfigs("document.stats.activity")
          Future.successful(Ok(views.html.document.stats.activity(doc, request.identity, accesslevel, plugins)))
      }
    })
  }
  
  private def getTags(documentId: String)(action: (Seq[(String, Long)], Request[AnyContent]) => Result) =
    silhouette.UserAwareAction.async { implicit request =>
      documentReadResponse(documentId, request.identity,  { case (doc, accesslevel) =>
          annotations.getTagStats(documentId).map { buckets =>
            action(buckets, request.request)
          }
        }
      )
    }
  
  def getTagsAsJSON(documentId: String) = getTags(documentId) { case (buckets, request) =>
    jsonOk(Json.toJson(buckets))(request)
  }
  
  def getTagsAsCSV(documentId: String) = getTags(documentId) { case(buckets, request) =>
    Ok(toCSV(buckets)).withHeaders(CONTENT_DISPOSITION -> { s"attachment; filename=${documentId}_tags.csv" })
  }

}

Source File: DefineMacroCmd.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.op.cmd

import java.io.{ObjectInputStream, ByteArrayInputStream, ObjectOutputStream, ByteArrayOutputStream}
import dbis.piglet.plan.DataflowPlan
import scala.collection.mutable.ListBuffer
import dbis.piglet.op.{Pipe,PigOperator}


case class DefineMacroCmd(
    out: Pipe, 
    macroName: String, 
    params: Option[List[String]], 
    stmts: List[PigOperator]
  ) extends PigOperator(out) {

  var subPlan: Option[DataflowPlan] = None
  var inPipes = List[Pipe]()

  def deepClone(): DefineMacroCmd = {
      val baos = new ByteArrayOutputStream()
      val oos = new ObjectOutputStream(baos)
      oos.writeObject(this)
      val bais = new ByteArrayInputStream(baos.toByteArray())
      val ois = new ObjectInputStream(bais)
      ois.readObject().asInstanceOf[DefineMacroCmd]
  }

  override def preparePlan: Unit = {
    
  def pipeParamPositions(): List[Int] = {
    val l = ListBuffer[Int]()
    inPipes.foreach(i => {
      val pos = params.get.indexOf(i.name.substring(1))
      if (pos >= 0) l += pos
    })
    l.toList
  }
}

Source File: package.scala From pbdirect with MIT License

5 votes

import java.io.ByteArrayOutputStream
import java.util

import cats.data.{NonEmptyList => NEL}
import com.google.protobuf.{CodedInputStream, CodedOutputStream}

package object pbdirect {
  implicit class PBWriterOps[A <: AnyRef](private val a: A) extends AnyVal {

    def toPB(implicit writer: PBWriter[A]): Array[Byte] = {
      val out = new ByteArrayOutputStream()
      val pbOut = CodedOutputStream.newInstance(out)
      val sizes = IdentityMaps.emptyJavaIdentityMap[Any, Int]
      writer.writeTo(NEL.one(1), a, pbOut, sizes)
      pbOut.flush()
      val bytes = out.toByteArray
      // remove the tag and return the content
      val input = CodedInputStream.newInstance(bytes)
      input.readTag()
      input.readByteArray()
    }
  }
  implicit class PBParserOps(private val bytes: Array[Byte]) extends AnyVal {

    def pbTo[A](implicit reader: PBParser[A]): A = {
      // wraps the bytes into a protobuf single field message
      val out = new ByteArrayOutputStream()
      val pbOut = CodedOutputStream.newInstance(out)
      pbOut.writeByteArray(1, bytes)
      pbOut.flush()
      reader.parse(NEL.one(1), out.toByteArray)
    }
  }
}

Source File: Avro4sJsonSupport.scala From kafka-serde-scala with Apache License 2.0

5 votes

package io.github.azhur.kafkaserdeavro4s

import java.io.ByteArrayOutputStream
import java.util

import com.sksamuel.avro4s.{
  AvroJsonInputStream,
  AvroOutputStream,
  FromRecord,
  SchemaFor,
  ToRecord
}
import org.apache.avro.file.SeekableByteArrayInput
import org.apache.kafka.common.errors.SerializationException
import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serializer }

import scala.language.implicitConversions
import scala.util.control.NonFatal
import scala.util.{ Failure, Success }

trait Avro4sJsonSupport {
  implicit def toSerializer[T >: Null](implicit schemaFor: SchemaFor[T],
                                       toRecord: ToRecord[T]): Serializer[T] =
    new Serializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serialize(topic: String, data: T): Array[Byte] =
        if (data == null) null
        else {
          val baos = new ByteArrayOutputStream()
          try {
            val output = AvroOutputStream.json[T](baos)
            try {
              output.write(data)
            } finally {
              output.close()
            }
            baos.toByteArray
          } catch {
            case NonFatal(e) => throw new SerializationException(e)
          } finally {
            baos.close()
          }
        }
    }

  implicit def toDeserializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Deserializer[T] =
    new Deserializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def deserialize(topic: String, data: Array[Byte]): T =
        if (data == null) null
        else
          new AvroJsonInputStream[T](new SeekableByteArrayInput(data),
                                     schemas.writerSchema,
                                     schemas.readerSchema).singleEntity match {
            case Success(json)  => json
            case Failure(error) => throw new SerializationException(error)
          }
    }

  implicit def toSerde[T >: Null](
      implicit schemaFor: SchemaFor[T],
      toRecord: ToRecord[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Serde[T] =
    new Serde[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serializer(): Serializer[T]                                   = toSerializer[T]
      override def deserializer(): Deserializer[T]                               = toDeserializer[T]
    }
}

object Avro4sJsonSupport extends Avro4sJsonSupport

Source File: Avro4sDataSupport.scala From kafka-serde-scala with Apache License 2.0

5 votes

package io.github.azhur.kafkaserdeavro4s

import java.io.ByteArrayOutputStream
import java.util

import com.sksamuel.avro4s.{
  AvroDataInputStream,
  AvroDataOutputStream,
  FromRecord,
  SchemaFor,
  ToRecord
}
import org.apache.avro.file.{ CodecFactory, SeekableByteArrayInput }
import org.apache.kafka.common.errors.SerializationException
import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serializer }

import scala.language.implicitConversions
import scala.util.control.NonFatal
import scala.util.{ Failure, Success }

trait Avro4sDataSupport {
  implicit def toSerializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      toRecord: ToRecord[T],
      codec: CodecFactory = CodecFactory.nullCodec()
  ): Serializer[T] =
    new Serializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serialize(topic: String, data: T): Array[Byte] =
        if (data == null) null
        else {
          val baos = new ByteArrayOutputStream()
          try {
            val output = AvroDataOutputStream[T](baos, codec)
            try {
              output.write(data)
            } finally {
              output.close()
            }
            baos.toByteArray
          } catch {
            case NonFatal(e) => throw new SerializationException(e)
          } finally {
            baos.close()
          }
        }
    }

  implicit def toDeserializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Deserializer[T] =
    new Deserializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def deserialize(topic: String, data: Array[Byte]): T =
        if (data == null) null
        else {
          val it = new AvroDataInputStream[T](new SeekableByteArrayInput(data),
                                              schemas.writerSchema,
                                              schemas.readerSchema).tryIterator
          if (it.hasNext) {
            it.next() match {
              case Success(record) => record
              case Failure(err)    => throw new SerializationException(err)
            }
          } else {
            throw new SerializationException("Empty avro4s data iterator")
          }
        }

    }

  implicit def toSerde[T >: Null](implicit schemaFor: SchemaFor[T],
                                  toRecord: ToRecord[T],
                                  fromRecord: FromRecord[T],
                                  codec: CodecFactory = CodecFactory.nullCodec()): Serde[T] =
    new Serde[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serializer(): Serializer[T]                                   = toSerializer[T]
      override def deserializer(): Deserializer[T]                               = toDeserializer[T]
    }
}

object Avro4sDataSupport extends Avro4sDataSupport

Source File: Avro4sBinarySupport.scala From kafka-serde-scala with Apache License 2.0

5 votes

package io.github.azhur.kafkaserdeavro4s

import java.io.ByteArrayOutputStream
import java.util

import com.sksamuel.avro4s.{
  AvroBinaryInputStream,
  AvroOutputStream,
  FromRecord,
  SchemaFor,
  ToRecord
}
import org.apache.avro.file.SeekableByteArrayInput
import org.apache.kafka.common.errors.SerializationException
import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serializer }

import scala.language.implicitConversions
import scala.util.{ Failure, Success }
import scala.util.control.NonFatal

trait Avro4sBinarySupport {
  implicit def toSerializer[T >: Null](implicit schemaFor: SchemaFor[T],
                                       toRecord: ToRecord[T]): Serializer[T] =
    new Serializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serialize(topic: String, data: T): Array[Byte] =
        if (data == null) null
        else {
          val baos = new ByteArrayOutputStream()
          try {
            val output = AvroOutputStream.binary[T](baos)
            try {
              output.write(data)
            } finally {
              output.close()
            }
            baos.toByteArray
          } catch {
            case NonFatal(e) => throw new SerializationException(e)
          } finally {
            baos.close()
          }
        }
    }

  implicit def toDeserializer[T >: Null](
      implicit schemaFor: SchemaFor[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Deserializer[T] =
    new Deserializer[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def deserialize(topic: String, data: Array[Byte]): T =
        if (data == null) null
        else {
          val it = new AvroBinaryInputStream[T](new SeekableByteArrayInput(data),
                                                schemas.writerSchema,
                                                schemas.readerSchema).tryIterator
          if (it.hasNext) {
            it.next() match {
              case Success(record) => record
              case Failure(err)    => throw new SerializationException(err)
            }
          } else {
            throw new SerializationException("Empty avro4s binary iterator")
          }
        }

    }

  implicit def toSerde[T >: Null](
      implicit schemaFor: SchemaFor[T],
      toRecord: ToRecord[T],
      fromRecord: FromRecord[T],
      schemas: WriterReaderSchemas = WriterReaderSchemas()
  ): Serde[T] =
    new Serde[T] {
      override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
      override def close(): Unit                                                 = {}
      override def serializer(): Serializer[T]                                   = toSerializer[T]
      override def deserializer(): Deserializer[T]                               = toDeserializer[T]
    }
}

object Avro4sBinarySupport extends Avro4sBinarySupport

Source File: Serdes.scala From tamer with MIT License

5 votes

package tamer

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer

import com.sksamuel.avro4s._
import org.apache.avro.Schema
import tamer.registry._
import zio.{RIO, Task}
import zio.kafka.client.serde.{Deserializer, Serializer}

sealed trait Serde[A] extends Any {
  def isKey: Boolean
  def schema: Schema
  def deserializer: Deserializer[Registry with Topic, A]
  def serializer: Serializer[Registry with Topic, A]
  final def serde: ZSerde[Registry with Topic, A] = ZSerde(deserializer)(serializer)
}

object Serde {
  private[this] final val Magic: Byte = 0x0
  private[this] final val intByteSize = 4

  final def apply[A <: Product: Decoder: Encoder: SchemaFor](isKey: Boolean = false) =
    new RecordSerde[A](isKey, SchemaFor[A].schema(DefaultFieldMapper))

  final class RecordSerde[A: Decoder: Encoder](override final val isKey: Boolean, override final val schema: Schema) extends Serde[A] {
    private[this] def subject(topic: String): String = s"$topic-${if (isKey) "key" else "value"}"
    override final val deserializer: Deserializer[Registry with Topic, A] = Deserializer.byteArray.mapM { ba =>
      val buffer = ByteBuffer.wrap(ba)
      if (buffer.get() != Magic) RIO.fail(SerializationError("Unknown magic byte!"))
      else {
        val id = buffer.getInt()
        for {
          env <- RIO.environment[Registry]
          _   <- env.registry.verifySchema(id, schema)
          res <- RIO.fromTry {
            val length  = buffer.limit() - 1 - intByteSize
            val payload = new Array[Byte](length)
            buffer.get(payload, 0, length)
            AvroInputStream.binary[A].from(payload).build(schema).tryIterator.next
          }
        } yield res
      }
    }
    override final val serializer: Serializer[Registry with Topic, A] = Serializer.byteArray.contramapM { a =>
      for {
        env <- RIO.environment[Registry with Topic]
        id  <- env.registry.getOrRegisterId(subject(env.topic), schema)
        arr <- Task {
          val baos = new ByteArrayOutputStream
          baos.write(Magic.toInt)
          baos.write(ByteBuffer.allocate(intByteSize).putInt(id).array())
          val ser = AvroOutputStream.binary[A].to(baos).build(schema)
          ser.write(a)
          ser.close()
          baos.toByteArray
        }
      } yield arr
    }
  }
}

Source File: TestUtils.scala From cats-effect with Apache License 2.0

5 votes

package cats.effect.internals

import java.io.{ByteArrayOutputStream, OutputStream, PrintStream}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal


  def catchSystemErrInto[T](outStream: OutputStream)(thunk: => T): T = synchronized {
    val oldErr = System.err
    val fakeErr = new PrintStream(outStream)
    System.setErr(fakeErr)
    try {
      thunk
    } finally {
      System.setErr(oldErr)
      fakeErr.close()
    }
  }
}

Source File: CancelUtilsTests.scala From cats-effect with Apache License 2.0

5 votes

package cats.effect.internals

import java.io.ByteArrayOutputStream
import cats.effect.IO
import org.scalatest.matchers.should.Matchers
import org.scalatest.funsuite.AnyFunSuite
import scala.util.control.NonFatal

class CancelUtilsTests extends AnyFunSuite with Matchers with TestUtils {
  test("cancelAll works for zero references") {
    CancelUtils.cancelAll().unsafeRunSync()
  }

  test("cancelAll works for one reference") {
    var wasCanceled = false
    CancelUtils.cancelAll(IO { wasCanceled = true }).unsafeRunSync()
    wasCanceled shouldBe true
  }

  test("cancelAll catches error from one reference") {
    val dummy = new RuntimeException("dummy")
    var wasCanceled1 = false
    var wasCanceled2 = false

    val io = CancelUtils.cancelAll(
      IO { wasCanceled1 = true },
      IO(throw dummy),
      IO { wasCanceled2 = true }
    )

    try {
      io.unsafeRunSync()
      fail("should have throw exception")
    } catch {
      case `dummy` =>
        wasCanceled1 shouldBe true
        wasCanceled2 shouldBe true
    }
  }

  test("cancelAll catches the first error and logs the rest") {
    val dummy1 = new RuntimeException("dummy1")
    val dummy2 = new RuntimeException("dummy2")
    var wasCanceled1 = false
    var wasCanceled2 = false

    val io = CancelUtils.cancelAll(
      IO { wasCanceled1 = true },
      IO(throw dummy1),
      IO(throw dummy2),
      IO { wasCanceled2 = true }
    )

    val sysErr = new ByteArrayOutputStream()
    try {
      catchSystemErrInto(sysErr) {
        io.unsafeRunSync()
      }
      fail("should have throw exception")
    } catch {
      case NonFatal(error) =>
        error shouldBe dummy1
        sysErr.toString("utf-8") should include("dummy2")
        dummy1.getSuppressed shouldBe empty // ensure memory isn't leaked with addSuppressed
        dummy2.getSuppressed shouldBe empty // ensure memory isn't leaked with addSuppressed
    }
  }
}

Source File: JVMReprSpec.scala From incubator-toree with Apache License 2.0

5 votes

package integration.interpreter.scala

import java.util
import java.io.ByteArrayOutputStream
import jupyter.{Displayer, Displayers, MIMETypes}
import org.apache.toree.global.StreamState
import org.apache.toree.interpreter.Interpreter
import org.apache.toree.interpreter.Results.Success
import org.apache.toree.kernel.api.{DisplayMethodsLike, KernelLike}
import org.apache.toree.kernel.interpreter.scala.ScalaInterpreter
import org.mockito.Mockito.doReturn
import org.scalatest.{BeforeAndAfter, FunSpec, Matchers}
import org.scalatest.mock.MockitoSugar
import scala.util.Random

class JVMReprSpec extends FunSpec with Matchers with MockitoSugar with BeforeAndAfter {

  private val outputResult = new ByteArrayOutputStream()
  private var interpreter: Interpreter = _

  before {
    val mockKernel = mock[KernelLike]
    val mockDisplayMethods = mock[DisplayMethodsLike]
    doReturn(mockDisplayMethods).when(mockKernel).display

    interpreter = new ScalaInterpreter().init(mockKernel)

    StreamState.setStreams(outputStream = outputResult)
  }

  after {
    interpreter.stop()
    outputResult.reset()
  }

  describe("ScalaInterpreter") {
    describe("#interpret") {
      it("should display Scala int as a text representation") {
        val (result, outputOrError) = interpreter.interpret("val a = 12")

        result should be(Success)
        outputOrError.isLeft should be(true)
        outputOrError.left.get should be(Map(MIMETypes.TEXT -> "12"))
      }

      it("should display Scala Some(str) as a text representation") {
        val (result, outputOrError) = interpreter.interpret("""val a = Some("str")""")

        result should be(Success)
        outputOrError.isLeft should be(true)
        outputOrError.left.get should be(Map(MIMETypes.TEXT -> "Some(str)"))
      }

      ignore("should use the Jupyter REPR API for display representation") {
        Displayers.register(classOf[DisplayerTest], new Displayer[DisplayerTest] {
          override def display(t: DisplayerTest): util.Map[String, String] = {
            val output = new util.HashMap[String, String]()
            output.put("text/plain", s"test object: ${t.id}")
            output.put("application/json", s"""{"id": ${t.id}""")
            output
          }
        })

        val inst = DisplayerTest()
        interpreter.bind("inst", classOf[DisplayerTest].getName, inst, List())

        val (result, outputOrError) = interpreter.interpret("""inst""")

        result should be(Success)
        outputOrError.isLeft should be(true)
        outputOrError.left.get should be(Map(
          MIMETypes.TEXT -> s"test object: ${inst.id}",
          "application/json" -> s"""{"id": ${inst.id}"""
        ))
      }
    }
  }
}

case class DisplayerTest(id: Long = new Random().nextLong())

Source File: AvroMessageConverter.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.jms.sink.converters

import java.io.ByteArrayOutputStream

import com.datamountaineer.streamreactor.connect.jms.config.JMSSetting
import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil
import com.datamountaineer.streamreactor.connect.serialization.AvroSerializer
import javax.jms.{BytesMessage, Session}
import org.apache.kafka.connect.sink.SinkRecord

class AvroMessageConverter extends JMSMessageConverter with ConverterUtil {

  override def convert(record: SinkRecord, session: Session, setting: JMSSetting): (String, BytesMessage) = {
    val converted =  super[ConverterUtil].convert(record, setting.fields, setting.ignoreField)
    val avroRecord = convertValueToGenericAvro(converted)
    val avroSchema = avroData.fromConnectSchema(converted.valueSchema())

    implicit  val os = new ByteArrayOutputStream()
    AvroSerializer.write(avroRecord, avroSchema)

    val message = session.createBytesMessage()
    message.writeBytes(os.toByteArray)
    (setting.source, message)
  }
}

Source File: AvroSerializer.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.bloomberg.avro

import java.io.ByteArrayOutputStream

import com.datamountaineer.streamreactor.connect.bloomberg.BloombergData
import com.datamountaineer.streamreactor.connect.bloomberg.avro.AvroSchemaGenerator._
import org.apache.avro.Schema
import org.apache.avro.generic.GenericData.Record
import org.apache.avro.generic.{GenericData, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.EncoderFactory

import scala.collection.JavaConverters._

object AvroSerializer {

  
    private def recursive(record: GenericData.Record, schema: Schema, fieldName: String, value: Any): Unit = {
      value match {
        case _: Boolean => record.put(fieldName, value)
        case _: Int => record.put(fieldName, value)
        case _: Long => record.put(fieldName, value)
        case _: Double => record.put(fieldName, value)
        case _: Char => record.put(fieldName, value)
        case _: Float => record.put(fieldName, value)
        case _: String =>
          record.put(fieldName, value)
        case list: java.util.List[_] =>
          val tmpSchema = schema.getField(fieldName).schema()
          val itemSchema = if (tmpSchema.getType == Schema.Type.UNION) tmpSchema.getTypes.get(1) else tmpSchema
          require(itemSchema.getType == Schema.Type.ARRAY)
          //we might have a record not a primitive
          if (itemSchema.getElementType.getType == Schema.Type.RECORD) {
            val items = new GenericData.Array[GenericData.Record](list.size(), itemSchema)
            list.asScala.foreach { i =>
              //only map is allowed
              val m = i.asInstanceOf[java.util.Map[String, Any]]
              items.add(m.toAvroRecord(itemSchema.getElementType))
            }
            record.put(fieldName, items)
          } else {
            val items = new GenericData.Array[Any](list.size(), itemSchema)
            items.addAll(list)
            record.put(fieldName, items)
          }

        case map: java.util.LinkedHashMap[String @unchecked, _] =>
          //record schema
          val fieldSchema = schema.getField(fieldName).schema()
          val nestedSchema = if (fieldSchema.getType == Schema.Type.UNION) fieldSchema.getTypes.get(1) else fieldSchema
          val nestedRecord = new Record(nestedSchema)
          map.entrySet().asScala.foreach(e =>
            recursive(nestedRecord, nestedSchema, e.getKey, e.getValue))
          record.put(fieldName, nestedRecord)
      }
    }
  }
}

Source File: CallableAction.scala From Waves with MIT License

5 votes

package com.wavesplatform.lang.v1.traits.domain

import com.wavesplatform.common.state.ByteStr
import com.wavesplatform.lang.v1.traits.domain.Recipient.Address

sealed trait CallableAction

case class AssetTransfer(
    recipient: Address,
    amount: Long,
    assetId: Option[ByteStr]
) extends CallableAction

case class Issue(
    id: ByteStr,
    compiledScript: Option[ByteStr],
    decimals: Int,
    description: String,
    isReissuable: Boolean,
    name: String,
    quantity: Long,
    nonce: Long
) extends CallableAction

object Issue {
  import java.io.ByteArrayOutputStream

  import com.wavesplatform.lang.utils.Serialize._
  import com.wavesplatform.lang.v1.BaseGlobal
  private val Global: BaseGlobal = com.wavesplatform.lang.Global // Hack for IDEA

  def create(
      compiledScript: Option[ByteStr],
      decimals: Int,
      description: String,
      isReissuable: Boolean,
      name: String,
      quantity: Long,
      nonce: Long,
      parent: ByteStr
  ): Issue = {
    val id = calculateId(decimals, description, isReissuable, name, quantity, nonce, parent)
    Issue(id, compiledScript, decimals, description, isReissuable, name, quantity, nonce)
  }

  def calculateId(
      decimals: Int,
      description: String,
      isReissuable: Boolean,
      name: String,
      quantity: Long,
      nonce: Long,
      parent: ByteStr
  ): ByteStr = {
    val out = new ByteArrayOutputStream()
    out.writeString(name)
    out.writeString(description)
    out.writeInt(decimals)
    out.writeLong(quantity)
    out.writeShort(if (isReissuable) 1 else 0)
    out.writeLong(nonce)
    out.write(parent.arr)
    ByteStr(Global.blake2b256(out.toByteArray))
  }
}

case class Reissue(
    assetId: ByteStr,
    isReissuable: Boolean,
    quantity: Long
) extends CallableAction

case class Burn(
    assetId: ByteStr,
    quantity: Long
) extends CallableAction

case class SponsorFee(
    assetId: ByteStr,
    minSponsoredAssetFee: Option[Long]
) extends CallableAction

sealed trait DataOp extends CallableAction {
  val key: String
}

sealed trait DataItem[T] extends DataOp {
  val value: T
}

object DataItem {
  case class Lng(k: String, v: Long)     extends DataItem[Long]    { val key = k; val value = v    }
  case class Bool(k: String, v: Boolean) extends DataItem[Boolean] { val key = k; val value = v    }
  case class Bin(k: String, v: ByteStr)  extends DataItem[ByteStr] { val key = k; val value = v    }
  case class Str(k: String, v: String)   extends DataItem[String]  { val key = k; val value = v    }
  case class Delete(key: String)         extends DataOp
}

Source File: Serialize.scala From Waves with MIT License

5 votes

package com.wavesplatform.lang.utils

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets

import com.wavesplatform.lang.v1.FunctionHeader
import com.wavesplatform.lang.v1.FunctionHeader.{Native, User}
import com.wavesplatform.lang.v1.Serde.{FH_NATIVE, FH_USER}

object Serialize {
  implicit class ByteBufferOps(val self: ByteBuffer) extends AnyVal {
    def getBytes: Array[Byte] = {
      val len = self.getInt
      if (self.limit() < len || len < 0) {
        throw new Exception(s"Invalid array size ($len)")
      }
      val bytes = new Array[Byte](len)
      self.get(bytes)
      bytes
    }

    def getString: String = new String(getBytes, StandardCharsets.UTF_8)

    def getFunctionHeader: FunctionHeader = self.get() match {
      case FH_NATIVE => Native(self.getShort)
      case FH_USER   => User(getString)
      case x         => throw new RuntimeException(s"Unknown function header type: $x")
    }
  }

  implicit class ByteArrayOutputStreamOps(val self: ByteArrayOutputStream) extends AnyVal {
    def writeShort(value: Short): ByteArrayOutputStream = writeNumber(value, 2)
    def writeInt(value: Int): ByteArrayOutputStream     = writeNumber(value, 4)
    def writeLong(value: Long): ByteArrayOutputStream   = writeNumber(value, 8)

    def writeNumber(n: Long, byteCount: Int): ByteArrayOutputStream = {
      (byteCount - 1 to 0 by -1).foreach { i =>
        self.write((n >> (8 * i) & 0xffL).toInt)
      }
      self
    }

    def writeString(x: String): ByteArrayOutputStream = {
      val bytes = x.getBytes(StandardCharsets.UTF_8)
      self.writeInt(bytes.length)
      self.write(bytes)
      self
    }

    def writeFunctionHeader(h: FunctionHeader): ByteArrayOutputStream = h match {
      case FunctionHeader.Native(id) =>
        self.write(FH_NATIVE)
        self.writeShort(id)
      case FunctionHeader.User(internalName, _) =>
        self.write(FH_USER)
        self.writeString(internalName)
    }
  }
}

Source File: BasicMessagesRepoSpec.scala From Waves with MIT License

5 votes

package com.wavesplatform.network

import java.io.ByteArrayOutputStream

import com.google.protobuf.{ByteString, CodedOutputStream, WireFormat}
import com.wavesplatform.TransactionGen
import com.wavesplatform.common.state.ByteStr
import com.wavesplatform.common.utils.EitherExt2
import com.wavesplatform.mining.MiningConstraints
import com.wavesplatform.protobuf.block._
import com.wavesplatform.protobuf.transaction._
import com.wavesplatform.transaction.Asset.IssuedAsset
import com.wavesplatform.transaction.smart.SetScriptTransaction
import com.wavesplatform.transaction.{DataTransaction, Proofs, TxVersion}
import org.scalatest._

class BasicMessagesRepoSpec extends FreeSpec with Matchers with TransactionGen {
  "PBBlockSpec max length" in {
    val maxSizedHeader = PBBlock.Header(
      Byte.MaxValue,
      ByteString.copyFrom(bytes64gen.sample.get),
      Long.MaxValue,
      ByteString.copyFrom(byteArrayGen(VanillaBlock.GenerationVRFSignatureLength).sample.get),
      Seq.fill(VanillaBlock.MaxFeaturesInBlock)(Short.MaxValue),
      Long.MaxValue,
      Byte.MaxValue,
      ByteString.copyFrom(bytes32gen.sample.get),
      Long.MaxValue,
      ByteString.copyFrom(bytes32gen.sample.get)
    )
    val maxSignature = ByteString.copyFrom(bytes64gen.sample.get)

    val headerSize    = maxSizedHeader.serializedSize
    val signatureSize = maxSignature.toByteArray.length

    val headerPBPrefix      = new ByteArrayOutputStream()
    val codedHeaderPBPrefix = CodedOutputStream.newInstance(headerPBPrefix)
    codedHeaderPBPrefix.writeTag(PBBlock.HEADER_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedHeaderPBPrefix.writeUInt32NoTag(headerSize)
    codedHeaderPBPrefix.flush()

    val signaturePBPrefix      = new ByteArrayOutputStream()
    val codedSignaturePBPrefix = CodedOutputStream.newInstance(signaturePBPrefix)
    codedSignaturePBPrefix.writeTag(PBBlock.SIGNATURE_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedSignaturePBPrefix.writeUInt32NoTag(maxSignature.toByteArray.length)
    codedSignaturePBPrefix.flush()

    val transactionPBPrefix               = new ByteArrayOutputStream()
    val codedTransactionMaxLengthPBPrefix = CodedOutputStream.newInstance(transactionPBPrefix)
    codedTransactionMaxLengthPBPrefix.writeTag(PBBlock.TRANSACTIONS_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedTransactionMaxLengthPBPrefix.writeUInt32NoTag(MiningConstraints.MaxTxsSizeInBytes)
    codedTransactionMaxLengthPBPrefix.flush()

    val minPossibleTransactionSize = PBTransactions
      .protobuf(
        SetScriptTransaction
          .selfSigned(
            TxVersion.V2,
            accountGen.sample.get,
            None,
            1L,
            0L
          )
          .explicitGet()
      )
      .serializedSize

    val maxSize =
      headerPBPrefix.toByteArray.length + headerSize +
        signaturePBPrefix.toByteArray.length + signatureSize +
        MiningConstraints.MaxTxsSizeInBytes +
        (transactionPBPrefix.toByteArray.length * MiningConstraints.MaxTxsSizeInBytes / minPossibleTransactionSize)

    maxSize should be <= PBBlockSpec.maxLength
  }

  "PBTransactionSpec max length" in {
    val maxSizeTransaction = PBSignedTransaction(
      Some(
        PBTransaction(
          Byte.MaxValue,
          ByteString.copyFrom(bytes32gen.sample.get),
          Some(PBAmounts.fromAssetAndAmount(IssuedAsset(ByteStr(bytes32gen.sample.get)), Long.MaxValue)),
          Long.MaxValue,
          Byte.MaxValue
        )
      ),
      Seq.fill(Proofs.MaxProofs)(ByteString.copyFrom(byteArrayGen(Proofs.MaxProofSize).sample.get))
    )

    val dataPBPrefix      = new ByteArrayOutputStream()
    val codedDataPBPrefix = CodedOutputStream.newInstance(dataPBPrefix)
    codedDataPBPrefix.writeTag(Transaction.DATA_TRANSACTION_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED)
    codedDataPBPrefix.writeUInt32NoTag(DataTransaction.MaxProtoBytes)
    codedDataPBPrefix.flush()

    val size = maxSizeTransaction.serializedSize + dataPBPrefix.toByteArray.length + DataTransaction.MaxProtoBytes

    size should be <= PBTransactionSpec.maxLength
  }
}

Source File: TextDisplay.scala From almond with BSD 3-Clause "New" or "Revised" License

5 votes

package almond.display

import java.io.{ByteArrayOutputStream, InputStream}
import java.net.{HttpURLConnection, URL, URLConnection}
import java.nio.charset.{Charset, StandardCharsets}

import scala.util.Try

abstract class TextDisplay extends UpdatableDisplay {

  def contentOrUrl: Either[URL, String]

  def content: Option[String] = contentOrUrl.right.toOption
  def url: Option[URL] = contentOrUrl.left.toOption

  def finalContent: String =
    contentOrUrl match {
      case Left(url) =>
        TextDisplay.urlContent(url)
      case Right(c) => c
    }

  def withContent(code: String): UpdatableDisplay
  def withUrl(url: String): UpdatableDisplay

}

object TextDisplay {

  type Builder[T] = Display.Builder[String, T]

  private[almond] def readFully(is: InputStream): Array[Byte] = {

    val buffer = new ByteArrayOutputStream
    val data = Array.ofDim[Byte](16384)

    var nRead = 0
    while ( {
      nRead = is.read(data, 0, data.length)
      nRead != -1
    })
      buffer.write(data, 0, nRead)

    buffer.flush()
    buffer.toByteArray
  }

  def urlContent(url: URL): String = {

    var conn: URLConnection = null
    val (rawContent, charsetOpt) = try {
      conn = url.openConnection()
      conn.setConnectTimeout(5000) // allow users to tweak that?
      val b = readFully(conn.getInputStream)
      val charsetOpt0 = conn match {
        case conn0: HttpURLConnection =>
          conn0
            .getContentType
            .split(';')
            .map(_.trim)
            .find(_.startsWith("charset="))
            .map(_.stripPrefix("charset="))
            .filter(Charset.isSupported)
            .map(Charset.forName)
        case _ =>
          None
      }
      (b, charsetOpt0)
    } finally {
      if (conn != null) {
        Try(conn.getInputStream.close())
        conn match {
          case conn0: HttpURLConnection =>
            Try(conn0.getErrorStream.close())
            Try(conn0.disconnect())
          case _ =>
        }
      }
    }

    new String(rawContent, charsetOpt.getOrElse(StandardCharsets.UTF_8))
  }
}

Source File: JupyterApiImpl.scala From almond with BSD 3-Clause "New" or "Revised" License

5 votes

package almond

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets

import almond.api.{FullJupyterApi, JupyterApi}
import almond.internals.HtmlAnsiOutputStream
import almond.interpreter.api.CommHandler
import ammonite.util.Ref
import pprint.{TPrint, TPrintColors}

import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.reflect.ClassTag


final class JupyterApiImpl(
  execute: Execute,
  commHandlerOpt: => Option[CommHandler],
  replApi: ReplApiImpl,
  silent0: Ref[Boolean]
) extends FullJupyterApi {

  protected def printOnChange[T](
    value: => T,
    ident: String,
    custom: Option[String],
    onChange: Option[(T => Unit) => Unit],
    onChangeOrError: Option[(Either[Throwable, T] => Unit) => Unit]
  )(implicit
    tprint: TPrint[T],
    tcolors: TPrintColors,
    classTagT: ClassTag[T]
  ): Iterator[String] =
    replApi.printSpecial(value, ident, custom, onChange, onChangeOrError, replApi.pprinter, Some(updatableResults))(tprint, tcolors, classTagT).getOrElse {
      replApi.Internal.print(value, ident, custom)(tprint, tcolors, classTagT)
    }

  override def silent(s: Boolean): Unit = silent0.update(s)
  override def silent: Boolean = silent0.apply()

  protected def ansiTextToHtml(text: String): String = {
    val baos = new ByteArrayOutputStream
    val haos = new HtmlAnsiOutputStream(baos)
    haos.write(text.getBytes(StandardCharsets.UTF_8))
    haos.close()
    baos.toString("UTF-8")
  }

  def stdinOpt(prompt: String, password: Boolean): Option[String] =
    for (m <- execute.currentInputManagerOpt)
      yield Await.result(m.readInput(prompt, password), Duration.Inf)

  override def changingPublish =
    execute.currentPublishOpt.getOrElse(super.changingPublish)
  override def commHandler =
    commHandlerOpt.getOrElse(super.commHandler)

  protected def updatableResults0: JupyterApi.UpdatableResults =
    execute.updatableResults
}

Source File: ByteBufferOutputStream.scala From aloha with Apache License 2.0

5 votes

package me.jrwang.aloha.rpc.serializer

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
}

Source File: MarkdownReporterTest.scala From drunken-data-quality with Apache License 2.0

5 votes

package de.frosner.ddq.reporters

import java.io.{ByteArrayOutputStream, PrintStream}

import de.frosner.ddq.constraints._
import de.frosner.ddq.core._
import de.frosner.ddq.testutils.{DummyConstraint, DummyConstraintResult}
import org.apache.spark.sql.DataFrame
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import org.scalatest.{FlatSpec, Matchers}

class MarkdownReporterTest extends FlatSpec with Matchers with MockitoSugar {

  "A Markdown reporter" should "produce correct output for a check with constraints" in {
    val baos = new ByteArrayOutputStream()
    val markdownReporter = new MarkdownReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val dfName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $dfName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."
    val message1 = "1"
    val status1 = ConstraintSuccess
    val constraint1 = DummyConstraint(message1, status1)
    val result1 = constraint1.fun(df)

    val message2 = "2"
    val status2 = ConstraintFailure
    val constraint2 = DummyConstraint(message2, status2)
    val result2 = constraint2.fun(df)

    val message3 = "3"
    val status3 = ConstraintError(new IllegalArgumentException())
    val constraint3 = DummyConstraint(message3, status3)
    val result3 = DummyConstraintResult(constraint3, message3, status3)

    val constraints = Map[Constraint, ConstraintResult[Constraint]](
      constraint1 -> result1,
      constraint2 -> result2,
      constraint3 -> result3
    )

    val check = Check(df, Some(dfName), Option.empty, constraints.keys.toSeq)

    markdownReporter.report(CheckResult(constraints, check, dfCount))
    val expectedOutput = s"""**$header**

$prologue

- *SUCCESS*: ${result1.message}
- *FAILURE*: ${result2.message}
- *ERROR*: ${result3.message}

"""

    baos.toString shouldBe expectedOutput
  }

  it should "produce correct output for a check without constraint" in {
    val baos = new ByteArrayOutputStream()
    val markdownReporter = new MarkdownReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val dfName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $dfName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."
    val check = Check(df, Some(dfName), Option.empty, Seq.empty)

    markdownReporter.report(CheckResult(Map.empty, check, dfCount))
    val expectedOutput = s"""**$header**

$prologue

Nothing to check!

"""

    baos.toString shouldBe expectedOutput
  }

}

Source File: ConsoleReporterTest.scala From drunken-data-quality with Apache License 2.0

5 votes

package de.frosner.ddq.reporters

import java.io.{ByteArrayOutputStream, PrintStream}

import de.frosner.ddq.constraints._
import de.frosner.ddq.core._
import de.frosner.ddq.testutils.{DummyConstraint, DummyConstraintResult}
import org.apache.spark.sql.DataFrame
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import org.scalatest.{FlatSpec, Matchers}

class ConsoleReporterTest extends FlatSpec with Matchers with MockitoSugar {

  "A Console reporter" should "produce correct output for a check with constraints" in {
    val baos = new ByteArrayOutputStream()
    val consoleReporter = new ConsoleReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val displayName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $displayName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."

    val message1 = "1"
    val status1 = ConstraintSuccess
    val constraint1 = DummyConstraint(message1, status1)
    val result1 = constraint1.fun(df)

    val message2 = "2"
    val status2 = ConstraintFailure
    val constraint2 = DummyConstraint(message2, status2)
    val result2 = constraint2.fun(df)

    val message3 = "3"
    val status3 = ConstraintError(new IllegalArgumentException())
    val constraint3 = DummyConstraint(message3, status3)
    val result3 = DummyConstraintResult(constraint3, message3, status3)

    val constraints = Map[Constraint, ConstraintResult[Constraint]](
      constraint1 -> result1,
      constraint2 -> result2,
      constraint3 -> result3
    )
    val check = Check(df, Some(displayName), Option.empty, constraints.keys.toSeq)

    consoleReporter.report(CheckResult(constraints, check, dfCount))
    val expectedOutput = s"""${Console.BLUE}$header${Console.RESET}
${Console.BLUE}$prologue${Console.RESET}
${Console.GREEN}- ${result1.message}${Console.RESET}
${Console.RED}- ${result2.message}${Console.RESET}
${Console.YELLOW}- ${result3.message}${Console.RESET}

"""

    baos.toString shouldBe expectedOutput
  }

  it should "produce correct output for a check without constraint" in {
    val baos = new ByteArrayOutputStream()
    val consoleReporter = new ConsoleReporter(new PrintStream(baos))

    val df = mock[DataFrame]
    val displayName = "myDf"
    val dfColumns = Array("1", "2")
    val dfCount = 5
    when(df.columns).thenReturn(dfColumns)

    val header = s"Checking $displayName"
    val prologue = s"It has a total number of ${dfColumns.size} columns and $dfCount rows."
    val check = Check(df, Some(displayName), Option.empty, Seq.empty)

    consoleReporter.report(CheckResult(Map.empty, check, dfCount))
    val expectedOutput = s"""${Console.BLUE}$header${Console.RESET}
${Console.BLUE}$prologue${Console.RESET}
${Console.BLUE}Nothing to check!${Console.RESET}

"""

    baos.toString shouldBe expectedOutput
  }

}

Source File: ClientSpec.scala From scala-ipfs-api with MIT License

5 votes

package io.ipfs.api

import java.io.{ByteArrayOutputStream, InputStream}
import java.nio.file.{Files, Paths, StandardOpenOption}
import java.util

import io.ipfs.api.ClientSpec._
import org.specs2.mutable._

import scala.util.Random

class ClientSpec extends Specification {
  isolated

  val client = new Client("localhost")
  "IPFS client" should {

    "show the version" in  {
      client.version mustEqual "0.4.2"
    }

    "have an ID" in {
      client.id.ID.length mustNotEqual 0
    }

    "store data" in {
      val name = randomName
      val add = store(name = name)
      add.length mustEqual 1
      val added = add(0)
      added.Name mustEqual name
      added.Hash.length mustNotEqual 0
    }

    "cat data" in {
      val data = randomBytes
      val added = store(data = data)(0)

      val in: InputStream = client.cat(added.Hash)
      util.Arrays.equals(toArray(in), data) mustEqual true
    }

    "dht put and get" in {
      val (key, value) = (random.nextString(10), random.nextString(10))
      val puts: Array[DHTResponse] = client.dhtPut(key, value)
      puts.length mustNotEqual 0

      client.dhtGet(key).Extra mustEqual value
    }
  }

  private def randomBytes = {
    val buffer = new Array[Byte](0x1500)
    random.nextBytes(buffer)
    buffer
  }

  private def store(name: String = randomName, data: Array[Byte] = randomBytes): Array[Add] = {
    val storePath = Paths.get(name)
    Files.write(storePath, data, StandardOpenOption.CREATE)
    client.add(Array(storePath))
  }
}

object ClientSpec {
  val random = new Random(666)
  def randomName: String = random.nextInt()+".test.dat"

  def toArray(in: InputStream): Array[Byte] = {
    val out = new ByteArrayOutputStream()
    try {
      val buff  = new Array[Byte](0x1000)
      var nRead = 0
      while ( {nRead = in.read(buff);nRead} != -1)
        out.write(buff, 0, nRead)
    } finally {
      in.close()
    }
    out.toByteArray
  }
}

Source File: BigBgenDatasource.scala From glow with Apache License 2.0

5 votes

package io.projectglow.bgen

import java.io.ByteArrayOutputStream

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLUtils}
import org.apache.spark.sql.sources.DataSourceRegister

import io.projectglow.common.logging.{HlsEventRecorder, HlsTagValues}
import io.projectglow.sql.BigFileDatasource
import io.projectglow.sql.util.ComDatabricksDataSource

class BigBgenDatasource extends BigFileDatasource with DataSourceRegister {

  override def shortName(): String = "bigbgen"

  override def serializeDataFrame(
      options: Map[String, String],
      data: DataFrame): RDD[Array[Byte]] = {
    BigBgenDatasource.serializeDataFrame(options, data)
  }

}

class ComDatabricksBigBgenDatasource extends BigBgenDatasource with ComDatabricksDataSource

object BigBgenDatasource extends HlsEventRecorder {

  import io.projectglow.common.BgenOptions._

  private def parseOptions(options: Map[String, String]): BigBgenOptions = {
    val bitsPerProb = options.getOrElse(BITS_PER_PROB_KEY, BITS_PER_PROB_DEFAULT_VALUE).toInt
    val maxPloidy = options.getOrElse(MAX_PLOIDY_KEY, MAX_PLOIDY_VALUE).toInt
    val defaultPloidy = options.getOrElse(DEFAULT_PLOIDY_KEY, DEFAULT_PLOIDY_VALUE).toInt
    val defaultPhasing = options.getOrElse(DEFAULT_PHASING_KEY, DEFAULT_PHASING_VALUE).toBoolean
    BigBgenOptions(bitsPerProb, maxPloidy, defaultPloidy, defaultPhasing)
  }

  private def logBgenWrite(parsedOptions: BigBgenOptions): Unit = {
    val logOptions = Map(
      BITS_PER_PROB_KEY -> parsedOptions.bitsPerProb,
      MAX_PLOIDY_KEY -> parsedOptions.maxPloidy,
      DEFAULT_PLOIDY_KEY -> parsedOptions.defaultPloidy,
      DEFAULT_PHASING_KEY -> parsedOptions.defaultPhasing
    )
    recordHlsEvent(HlsTagValues.EVENT_BGEN_WRITE, logOptions)
  }

  def serializeDataFrame(options: Map[String, String], data: DataFrame): RDD[Array[Byte]] = {

    val parsedOptions = parseOptions(options)
    logBgenWrite(parsedOptions)

    val dSchema = data.schema
    val numVariants = data.count
    val rawRdd = data.queryExecution.toRdd

    val inputRdd = if (rawRdd.getNumPartitions == 0) {
      logger.warn("Writing BGEN header only as the input DataFrame has zero partitions.")
      SQLUtils.createEmptyRDD(data.sparkSession)
    } else {
      rawRdd
    }

    inputRdd.mapPartitionsWithIndex {
      case (idx, it) =>
        val baos = new ByteArrayOutputStream()

        val writeHeader = idx == 0
        val writer = new BgenRecordWriter(
          baos,
          dSchema,
          writeHeader,
          numVariants,
          parsedOptions.bitsPerProb,
          parsedOptions.maxPloidy,
          parsedOptions.defaultPloidy,
          parsedOptions.defaultPhasing
        )

        it.foreach { row =>
          writer.write(row)
        }

        writer.close()
        Iterator(baos.toByteArray)
    }
  }
}

case class BigBgenOptions(
    bitsPerProb: Int,
    maxPloidy: Int,
    defaultPloidy: Int,
    defaultPhasing: Boolean)

Source File: ModelStateSerde.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0

5 votes

package com.lightbend.scala.kafkastreams.store.store

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.util

import com.lightbend.model.modeldescriptor.ModelDescriptor
import com.lightbend.scala.modelServer.model.PMML.PMMLModel
import com.lightbend.scala.modelServer.model.tensorflow.TensorFlowModel
import com.lightbend.scala.modelServer.model.{ModelToServeStats, ModelWithDescriptor}
import com.lightbend.scala.kafkastreams.store.StoreState
import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer}


class ModelStateSerde extends Serde[StoreState] {

  private val mserializer = new ModelStateSerializer()
  private val mdeserializer = new ModelStateDeserializer()

  override def deserializer() = mdeserializer

  override def serializer() = mserializer

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}

  override def close() = {}
}

object ModelStateDeserializer {
  val factories = Map(
    ModelDescriptor.ModelType.PMML.index -> PMMLModel,
    ModelDescriptor.ModelType.TENSORFLOW.index -> TensorFlowModel
  )
}

class ModelStateDeserializer extends Deserializer[StoreState] {

  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}

  override def deserialize(topic: String, data: Array[Byte]): StoreState = {
    if(data != null) {
      val input = new DataInputStream(new ByteArrayInputStream(data))
      new StoreState(ModelWithDescriptor.readModel(input), ModelWithDescriptor.readModel(input),
        ModelToServeStats.readServingInfo(input), ModelToServeStats.readServingInfo(input))
    }
    else new StoreState()
  }

  override def close(): Unit = {}

}

class ModelStateSerializer extends Serializer[StoreState] {

  private val bos = new ByteArrayOutputStream()

  override def serialize(topic: String, state: StoreState): Array[Byte] = {
    bos.reset()
    val output = new DataOutputStream(bos)
    ModelWithDescriptor.writeModel(output, state.currentModel.orNull)
    ModelWithDescriptor.writeModel(output, state.newModel.orNull)
    ModelToServeStats.writeServingInfo(output, state.currentState.orNull)
    ModelToServeStats.writeServingInfo(output, state.newState.orNull)
    try {
      output.flush()
      output.close()
    } catch {
      case t: Throwable =>
    }
    bos.toByteArray
  }

  override def close(): Unit = {}

  override def configure(configs: util.Map[String, _], isKey: Boolean) = {}
}

Source File: VwSparseMultilabelPredictorTest.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.models.vw.jni.multilabel

import java.io.{ByteArrayOutputStream, File, FileInputStream}

import com.eharmony.aloha.ModelSerializationTestHelper
import com.eharmony.aloha.io.sources.{Base64StringSource, ExternalSource, ModelSource}
import org.apache.commons.codec.binary.Base64
import org.apache.commons.io.IOUtils
import org.junit.Assert._
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.runners.BlockJUnit4ClassRunner
import vowpalWabbit.learner.{VWActionScoresLearner, VWLearners}


@RunWith(classOf[BlockJUnit4ClassRunner])
class VwSparseMultilabelPredictorTest extends ModelSerializationTestHelper {
  import VwSparseMultilabelPredictorTest._

  @Test def testSerializability(): Unit = {
    val predictor = getPredictor(getModelSource(), 3)
    val ds = serializeDeserializeRoundTrip(predictor)
    assertEquals(predictor, ds)
    assertEquals(predictor.vwParams(), ds.vwParams())
    assertNotNull(ds.vwModel)
  }

  @Test def testVwParameters(): Unit = {
    val numLabelsInTrainingSet = 3
    val predictor = getPredictor(getModelSource(), numLabelsInTrainingSet)

    predictor.vwParams() match {
      case Data(vwBinFilePath, ringSize) =>
        checkVwBinFile(vwBinFilePath)
        checkVwRingSize(numLabelsInTrainingSet, ringSize.toInt)
      case ps => fail(s"Unexpected VW parameters format.  Found string: $ps")
    }
  }
}

object VwSparseMultilabelPredictorTest {
  private val Data = """\s*-i\s+(\S+)\s+--ring_size\s+(\d+)\s+--testonly\s+--quiet""".r

  private def getModelSource(): ModelSource = {
    val f = File.createTempFile("i_dont", "care")
    f.deleteOnExit()
    val learner = VWLearners.create[VWActionScoresLearner](s"--quiet --csoaa_ldf mc --csoaa_rank -f ${f.getCanonicalPath}")
    learner.close()
    val baos = new ByteArrayOutputStream()
    IOUtils.copy(new FileInputStream(f), baos)
    val src = Base64StringSource(Base64.encodeBase64URLSafeString(baos.toByteArray))
    ExternalSource(src.localVfs)
  }

  private def getPredictor(modelSrc: ModelSource, numLabelsInTrainingSet: Int) =
    VwSparseMultilabelPredictor[Any](modelSrc, Nil, Nil, numLabelsInTrainingSet)

  private def checkVwBinFile(vwBinFilePath: String): Unit = {
    val vwBinFile = new File(vwBinFilePath)
    assertTrue("VW binary file should have been written to disk", vwBinFile.exists())
    vwBinFile.deleteOnExit()
  }

  private def checkVwRingSize(numLabelsInTrainingSet: Int, ringSize: Int): Unit = {
    assertEquals(
      "vw --ring_size parameter is incorrect:",
      numLabelsInTrainingSet + VwSparseMultilabelPredictor.AddlVwRingSize,
      ringSize.toInt
    )
  }
}

Source File: PrintProtosTest.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.cli.dataset

import java.io.{ByteArrayOutputStream, IOException}
import java.util.Arrays

import com.eharmony.aloha.test.proto.Testing.{PhotoProto, UserProto}
import com.eharmony.aloha.test.proto.Testing.GenderProto.{FEMALE, MALE}
import com.google.protobuf.GeneratedMessage
import org.apache.commons.codec.binary.Base64
import org.junit.runner.RunWith
import org.junit.runners.BlockJUnit4ClassRunner
import org.junit.{Ignore, Test}


@RunWith(classOf[BlockJUnit4ClassRunner])
@Ignore
class PrintProtosTest {
    @Test def testPrintProtos(): Unit = {
        System.out.println(alan)
        System.out.println(kate)
    }

    @throws(classOf[IOException])
    def alan: String = {
        val t = UserProto.newBuilder.
            setId(1).
            setName("Alan").
            setGender(MALE).
            setBmi(23).
            addAllPhotos(Arrays.asList(
                PhotoProto.newBuilder.
                    setId(1).
                    setAspectRatio(1).
                    setHeight(1).
                    build,
                PhotoProto.newBuilder.
                    setId(2).
                    setAspectRatio(2).
                    setHeight(2).build
            )).build
        b64(t)
    }

    def kate: String = {
        val t = UserProto.newBuilder.
            setId(1).
            setName("Kate").
            setGender(FEMALE).
            addAllPhotos(Arrays.asList(
                PhotoProto.newBuilder.
                    setId(3).
                    setAspectRatio(3).
                    setHeight(3).
                    build
            )).build
        b64(t)
    }

    def b64[M <: GeneratedMessage](p: M): String = {
        val baos: ByteArrayOutputStream = new ByteArrayOutputStream
        p.writeTo(baos)
        new String(Base64.encodeBase64(baos.toByteArray))
    }
}

Source File: ReadableByString.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.io

import java.io.{InputStreamReader, ByteArrayOutputStream, Reader, InputStream}
import org.apache.commons.io.IOUtils


    final def fromReader(r: Reader): A = {
        try {
            val baos = new ByteArrayOutputStream  // Don't need to close.
            IOUtils.copy(r, baos, inputCharset)
            fromString(new String(baos.toByteArray))
        }
        finally {
            IOUtils.closeQuietly(r)
        }
    }
}

Source File: ContainerReadableByString.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.io

import scala.language.higherKinds
import org.apache.commons.io.IOUtils
import java.lang.String
import java.io.{ByteArrayOutputStream, Reader, InputStreamReader, InputStream}


    def fromReader[A](r: Reader): C[A] = {
        try {
            val baos = new ByteArrayOutputStream  // Don't need to close.
            IOUtils.copy(r, baos, inputCharset)
            fromString[A](new String(baos.toByteArray))
        }
        finally {
            IOUtils.closeQuietly(r)
        }
    }
}

Source File: SchrodingerExceptionTest.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.ex

import org.junit.{Before, Test}
import org.junit.Assert._
import java.io.{PrintWriter, OutputStreamWriter, ByteArrayOutputStream, PrintStream}

class SchrodingerExceptionTest {

    
    private[this] var ex: SchrodingerException = _

    @Before def before() {
        ex = new SchrodingerException
    }

    @Test def testFillInStackTrace() {
        assertTrue(new SchrodingerException().fillInStackTrace().isInstanceOf[SchrodingerException])
    }

    @Test(expected = classOf[SchrodingerException]) def testGetMessage() {
        ex.getMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetStackTrace() {
        ex.getStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetCause() {
        ex.getCause()
    }

    @Test(expected = classOf[SchrodingerException]) def testSetStackTrace() {
        ex.setStackTrace(Array.empty)
    }

    @Test(expected = classOf[SchrodingerException]) def testGetLocalizedMessage() {
        ex.getLocalizedMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceEmpty() {
        ex.printStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceStream() {
        val baos = new ByteArrayOutputStream()
        val ps = new PrintStream(baos)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceWriter() {
        val baos = new ByteArrayOutputStream()
        val osw = new OutputStreamWriter(baos)
        val ps = new PrintWriter(osw)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testInitCause() {
        ex.initCause(new Throwable)
    }

    @Test(expected = classOf[SchrodingerException]) def testToString() {
        ex.toString()
    }

    @Test def testNoThrowForSchrodingerExceptionWithSchrodingerExceptionCause() {
        new SchrodingerException(new SchrodingerException)
    }

    @Test def testNoThrowForSchrodingerExceptionWithExceptionCause() {
        new SchrodingerException(new Exception)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForThrowableWithSchrodingerExceptionCause() {
        new Throwable(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForExceptionWithSchrodingerExceptionCause() {
        new Exception(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForRuntimeExceptionWithSchrodingerExceptionCause() {
        new RuntimeException(ex)
    }
}

Source File: FileBasedLedgerDataExportSpec.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.export

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.time.Instant

import com.daml.ledger.participant.state.v1
import com.google.protobuf.ByteString
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{Matchers, WordSpec}

class FileBasedLedgerDataExportSpec extends WordSpec with Matchers with MockitoSugar {
  // XXX SC remove in Scala 2.13; see notes in ConfSpec
  import scala.collection.GenTraversable, org.scalatest.enablers.Containing
  private[this] implicit def `fixed sig containingNatureOfGenTraversable`[
      E: org.scalactic.Equality,
      TRAV]: Containing[TRAV with GenTraversable[E]] =
    Containing.containingNatureOfGenTraversable[E, GenTraversable]

  "addParentChild" should {
    "add entry to correlation ID mapping" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")

      instance.correlationIdMapping should contain("child" -> "parent")
    }
  }

  "addToWriteSet" should {
    "append to existing data" in {
      val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream])
      instance.addParentChild("parent", "child")
      instance.addToWriteSet("child", Seq(keyValuePairOf("a", "b")))
      instance.addToWriteSet("child", Seq(keyValuePairOf("c", "d")))

      instance.bufferedKeyValueDataPerCorrelationId should contain(
        "parent" ->
          Seq(keyValuePairOf("a", "b"), keyValuePairOf("c", "d")))
    }
  }

  "finishedProcessing" should {
    "remove all data such as submission info, write-set and child correlation IDs" in {
      val dataOutputStream = new DataOutputStream(new ByteArrayOutputStream())
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        Instant.now(),
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      instance.inProgressSubmissions shouldBe empty
      instance.bufferedKeyValueDataPerCorrelationId shouldBe empty
      instance.correlationIdMapping shouldBe empty
    }
  }

  "serialized submission" should {
    "be readable back" in {
      val baos = new ByteArrayOutputStream()
      val dataOutputStream = new DataOutputStream(baos)
      val instance = new FileBasedLedgerDataExporter(dataOutputStream)
      val expectedRecordTimeInstant = Instant.now()
      val expectedParticipantId = v1.ParticipantId.assertFromString("id")
      instance.addSubmission(
        ByteString.copyFromUtf8("an envelope"),
        "parent",
        expectedRecordTimeInstant,
        v1.ParticipantId.assertFromString("id"))
      instance.addParentChild("parent", "parent")
      instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b")))

      instance.finishedProcessing("parent")

      val dataInputStream = new DataInputStream(new ByteArrayInputStream(baos.toByteArray))
      val (actualSubmissionInfo, actualWriteSet) = Serialization.readEntry(dataInputStream)
      actualSubmissionInfo.submissionEnvelope should be(ByteString.copyFromUtf8("an envelope"))
      actualSubmissionInfo.correlationId should be("parent")
      actualSubmissionInfo.recordTimeInstant should be(expectedRecordTimeInstant)
      actualSubmissionInfo.participantId should be(expectedParticipantId)
      actualWriteSet should be(Seq(keyValuePairOf("a", "b")))
    }
  }

  private def keyValuePairOf(key: String, value: String): (ByteString, ByteString) =
    ByteString.copyFromUtf8(key) -> ByteString.copyFromUtf8(value)
}

Source File: DevboxSetupMain.scala From devbox with Apache License 2.0

5 votes

package devbox.agent

import java.io.ByteArrayOutputStream

import scala.util.control.NonFatal


object DevboxSetupMain {

  def main(args: Array[String]): Unit = {
    val baos = new ByteArrayOutputStream()
    os.Internals.transfer(System.in, baos)
    val buffer = baos.toByteArray
    val allSetupFilesAndCommands =
      upickle.default.readBinary[Seq[Either[(String, Array[Byte]), String]]](buffer)

    val userName = sys.env.getOrElse("DEVBOX_USER", os.proc("whoami").call().out.trim)

    allSetupFilesAndCommands.foreach{
      case Left((destination, bytes)) =>

        // we run as root, so we need to expand ~ to DEVBOX_USER here
        val expandedDestination = destination match{
          case s"~/$rest" => os.root / "home" / userName / os.SubPath(rest)
          case dest => os.Path(dest)
        }
        try {
          os.write.over(expandedDestination, bytes, createFolders = true)
          os.perms.set(expandedDestination, "rwxrwxrwx")
        } catch {
          case NonFatal(e) =>
            println(s"Error writing file $destination: ${e.getMessage}")
        }
      case Right(cmd) =>
        println("Running remote command: " + cmd)
        os.proc("bash", "-c", cmd).call()
    }
  }
}

Source File: BytecodeUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor}
import org.apache.xbean.asm5.Opcodes._

import org.apache.spark.util.Utils


  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM5) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM5) {
          override def visitMethodInsn(
              op: Int, owner: String, name: String, desc: String, itf: Boolean) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
}

Source File: RawTextSender.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: RateLimitedOutputStreamSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit._

import org.apache.spark.SparkFunSuite

class RateLimitedOutputStreamSuite extends SparkFunSuite {

  private def benchmark[U](f: => U): Long = {
    val start = System.nanoTime
    f
    System.nanoTime - start
  }

  test("write") {
    val underlying = new ByteArrayOutputStream
    val data = "X" * 41000
    val stream = new RateLimitedOutputStream(underlying, desiredBytesPerSec = 10000)
    val elapsedNs = benchmark { stream.write(data.getBytes(StandardCharsets.UTF_8)) }

    val seconds = SECONDS.convert(elapsedNs, NANOSECONDS)
    assert(seconds >= 4, s"Seconds value ($seconds) is less than 4.")
    assert(seconds <= 30, s"Took more than 30 seconds ($seconds) to write data.")
    assert(underlying.toString("UTF-8") === data)
  }
}

Source File: ByteBufferOutputStream.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer


private[spark] class ByteBufferOutputStream(capacity: Int) extends ByteArrayOutputStream(capacity) {

  def this() = this(32)

  def getCount(): Int = count

  private[this] var closed: Boolean = false

  override def write(b: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b)
  }

  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
    require(!closed, "cannot write to a closed ByteBufferOutputStream")
    super.write(b, off, len)
  }

  override def reset(): Unit = {
    require(!closed, "cannot reset a closed ByteBufferOutputStream")
    super.reset()
  }

  override def close(): Unit = {
    if (!closed) {
      super.close()
      closed = true
    }
  }

  def toByteBuffer: ByteBuffer = {
    require(closed, "can only call toByteBuffer() after ByteBufferOutputStream has been closed")
    ByteBuffer.wrap(buf, 0, count)
  }
}

Source File: PortableDataStream.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.input

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import scala.collection.JavaConverters._

import com.google.common.io.{ByteStreams, Closeables}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit}


  def toArray(): Array[Byte] = {
    val stream = open()
    try {
      ByteStreams.toByteArray(stream)
    } finally {
      Closeables.close(stream, true)
    }
  }

  def getPath(): String = path
}

Source File: PythonRDDSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{ByteArrayOutputStream, DataOutputStream}
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite

class PythonRDDSuite extends SparkFunSuite {

  test("Writing large strings to the worker") {
    val input: List[String] = List("a"*100000)
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    PythonRDD.writeIteratorToStream(input.iterator, buffer)
  }

  test("Handle nulls gracefully") {
    val buffer = new DataOutputStream(new ByteArrayOutputStream)
    // Should not have NPE when write an Iterator with null in it
    // The correctness will be tested in Python
    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
    PythonRDD.writeIteratorToStream(Iterator("a".getBytes(StandardCharsets.UTF_8), null), buffer)
    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes(StandardCharsets.UTF_8)), buffer)
    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
    PythonRDD.writeIteratorToStream(Iterator(
      (null, null),
      ("a".getBytes(StandardCharsets.UTF_8), null),
      (null, "b".getBytes(StandardCharsets.UTF_8))), buffer)
  }
}

Source File: GenericAvroSerializerSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.ByteBuffer

import com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.generic.GenericData.Record

import org.apache.spark.{SharedSparkContext, SparkFunSuite}

class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext {
  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

  val schema : Schema = SchemaBuilder
    .record("testRecord").fields()
    .requiredString("data")
    .endRecord()
  val record = new Record(schema)
  record.put("data", "test data")

  test("schema compression and decompression") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema))))
  }

  test("record serialization and deserialization") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)

    val outputStream = new ByteArrayOutputStream()
    val output = new Output(outputStream)
    genericSer.serializeDatum(record, output)
    output.flush()
    output.close()

    val input = new Input(new ByteArrayInputStream(outputStream.toByteArray))
    assert(genericSer.deserializeDatum(input) === record)
  }

  test("uses schema fingerprint to decrease message size") {
    val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema)

    val output = new Output(new ByteArrayOutputStream())

    val beginningNormalPosition = output.total()
    genericSerFull.serializeDatum(record, output)
    output.flush()
    val normalLength = output.total - beginningNormalPosition

    conf.registerAvroSchemas(schema)
    val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema)
    val beginningFingerprintPosition = output.total()
    genericSerFinger.serializeDatum(record, output)
    val fingerprintLength = output.total - beginningFingerprintPosition

    assert(fingerprintLength < normalLength)
  }

  test("caches previously seen schemas") {
    val genericSer = new GenericAvroSerializer(conf.getAvroSchema)
    val compressedSchema = genericSer.compress(schema)
    val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema))

    assert(compressedSchema.eq(genericSer.compress(schema)))
    assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema))))
  }
}

Source File: SerializerPropertiesSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.serializer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.util.Random

import org.scalatest.Assertions

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset


class SerializerPropertiesSuite extends SparkFunSuite {

  import SerializerPropertiesSuite._

  test("JavaSerializer does not support relocation") {
    // Per a comment on the SPARK-4550 JIRA ticket, Java serialization appears to write out the
    // full class name the first time an object is written to an output stream, but subsequent
    // references to the class write a more compact identifier; this prevents relocation.
    val ser = new JavaSerializer(new SparkConf())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer supports relocation when auto-reset is enabled") {
    val ser = new KryoSerializer(new SparkConf)
    assert(ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

  test("KryoSerializer does not support relocation when auto-reset is disabled") {
    val conf = new SparkConf().set("spark.kryo.registrator",
      classOf[RegistratorWithoutAutoReset].getName)
    val ser = new KryoSerializer(conf)
    assert(!ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset())
    testSupportsRelocationOfSerializedObjects(ser, generateRandomItem)
  }

}

object SerializerPropertiesSuite extends Assertions {

  def generateRandomItem(rand: Random): Any = {
    val randomFunctions: Seq[() => Any] = Seq(
      () => rand.nextInt(),
      () => rand.nextString(rand.nextInt(10)),
      () => rand.nextDouble(),
      () => rand.nextBoolean(),
      () => (rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))),
      () => {
        val x = MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10)))
        (x, x)
      }
    )
    randomFunctions(rand.nextInt(randomFunctions.size)).apply()
  }

  def testSupportsRelocationOfSerializedObjects(
      serializer: Serializer,
      generateRandomItem: Random => Any): Unit = {
    if (!serializer.supportsRelocationOfSerializedObjects) {
      return
    }
    val NUM_TRIALS = 5
    val rand = new Random(42)
    for (_ <- 1 to NUM_TRIALS) {
      val items = {
        // Make sure that we have duplicate occurrences of the same object in the stream:
        val randomItems = Seq.fill(10)(generateRandomItem(rand))
        randomItems ++ randomItems.take(5)
      }
      val baos = new ByteArrayOutputStream()
      val serStream = serializer.newInstance().serializeStream(baos)
      def serializeItem(item: Any): Array[Byte] = {
        val itemStartOffset = baos.toByteArray.length
        serStream.writeObject(item)
        serStream.flush()
        val itemEndOffset = baos.toByteArray.length
        baos.toByteArray.slice(itemStartOffset, itemEndOffset).clone()
      }
      val itemsAndSerializedItems: Seq[(Any, Array[Byte])] = {
        val serItems = items.map {
          item => (item, serializeItem(item))
        }
        serStream.close()
        rand.shuffle(serItems)
      }
      val reorderedSerializedData: Array[Byte] = itemsAndSerializedItems.flatMap(_._2).toArray
      val deserializedItemsStream = serializer.newInstance().deserializeStream(
        new ByteArrayInputStream(reorderedSerializedData))
      assert(deserializedItemsStream.asIterator.toSeq === itemsAndSerializedItems.map(_._1))
      deserializedItemsStream.close()
    }
  }
}

private case class MyCaseClass(foo: Int, bar: String)

Source File: BookSerializer.scala From akka-serialization-test with Apache License 2.0

5 votes

package com.github.dnvriend.serializer.avro

import java.io.ByteArrayOutputStream

import com.github.dnvriend.domain.BookStore.{ ChangedBookV3, ChangedBookV2, ChangedBookV1 }
import com.sksamuel.avro4s.{ AvroBinaryOutputStream, AvroInputStream, AvroOutputStream }

abstract class BookSerializer[T] extends AvroSerializer[T] {

  final val Manifest = "ChangedBook"

}

class BookSerializerV1 extends BookSerializer[ChangedBookV1] {
  override def identifier: Int = 200011

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroOutputStream[ChangedBookV1](output)
    avro.write(o.asInstanceOf[ChangedBookV1])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    if (Manifest == manifest) {

      val is = AvroInputStream[ChangedBookV1](bytes)
      val events = is.iterator.toList
      is.close()

      events(0)

    } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
}

class BookSerializerV2 extends BookSerializer[ChangedBookV2] {
  override def identifier: Int = 200012

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroBinaryOutputStream[ChangedBookV2](output)
    avro.write(o.asInstanceOf[ChangedBookV2])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    // if (Manifest == manifest) {
    println("Manifest " + manifest)
    val is = AvroInputStream[ChangedBookV2](bytes)
    val events = is.iterator.toList
    is.close()

    events(0)

    // } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
}

class BookSerializerV3 extends BookSerializer[ChangedBookV3] {
  override def identifier: Int = 200013

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroOutputStream[ChangedBookV3](output)
    avro.write(o.asInstanceOf[ChangedBookV3])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    // if (Manifest == manifest) {
    println("Manifest " + manifest)
    val is = AvroInputStream[ChangedBookV3](bytes)
    val events = is.iterator.toList
    is.close()

    events(0)

    // } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
}

Source File: MovieChangedSerializer.scala From akka-serialization-test with Apache License 2.0

5 votes

package com.github.dnvriend.serializer.avro

import java.io.ByteArrayOutputStream

import com.github.dnvriend.domain.Movie.MovieChanged
import com.sksamuel.avro4s.{ AvroInputStream, AvroOutputStream }

class MovieChangedSerializer extends AvroSerializer[MovieChanged] {
  override def identifier: Int = 100011
  final val Manifest = classOf[MovieChanged].getName

  override def toBinary(o: AnyRef): Array[Byte] = {
    val output = new ByteArrayOutputStream
    val avro = AvroOutputStream[MovieChanged](output)
    avro.write(o.asInstanceOf[MovieChanged])
    avro.close()
    output.toByteArray
  }

  override def fromBinary(bytes: Array[Byte], manifest: String): AnyRef = {
    if (Manifest == manifest) {

      val is = AvroInputStream[MovieChanged](bytes)
      val events = is.iterator.toList
      is.close()

      events(0)

    } else throw new IllegalArgumentException(s"Unable to handle manifest $manifest, required $Manifest")
  }
}

Source File: PLYReadWriteTests.scala From scalismo-faces with Apache License 2.0

5 votes

package scalismo.faces.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter}
import java.nio.ByteOrder
import java.util.Scanner

import scalismo.faces.FacesTestSuite
import scalismo.faces.io.ply._

class PLYReadWriteTests extends FacesTestSuite {

  describe("Write-read cycles to string, big- and little endian") {

    def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val writer = new SequenceWriter[A]
      writer.write(toWrite, os, bo)

      val ba = os.toByteArray

      val is = new ByteArrayInputStream(ba)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, is, bo)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val osw = new OutputStreamWriter(os)
      val writer = new SequenceWriter[A]
      writer.write(toWrite, osw)
      osw.flush()

      val is = new ByteArrayInputStream(os.toByteArray)
      val isr = new Scanner(is)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, isr)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      testRWStringCycle(toWrite)
      testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN)
      testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN)
    }

    it("should result in the same sequence of bytes") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of char") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of short") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of int") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of long") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of float") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of double") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255)
      testAllThreeCycles(toWrite)
    }

  }

}

Source File: RewriteSwaggerConfigPlugin.scala From matcher with MIT License

5 votes

import java.io.{BufferedInputStream, ByteArrayOutputStream}
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import Dependencies.Version
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.io.IOUtils
import sbt.Keys._
import sbt._

// See https://github.com/swagger-api/swagger-ui/issues/5710
object RewriteSwaggerConfigPlugin extends AutoPlugin {
  override val trigger = PluginTrigger.NoTrigger
  override def projectSettings: Seq[Def.Setting[_]] =
    inConfig(Compile)(
      Seq(
        resourceGenerators += Def.task {
          val jarName       = s"swagger-ui-${Version.swaggerUi}.jar"
          val indexHtmlPath = s"META-INF/resources/webjars/swagger-ui/${Version.swaggerUi}/index.html"
          val outputFile    = resourceManaged.value / indexHtmlPath

          val html = (Compile / dependencyClasspath).value
            .find(_.data.getName == jarName)
            .flatMap(jar => fileContentFromJar(jar.data, indexHtmlPath))
            .map { new String(_, StandardCharsets.UTF_8) }

          val resource = s"$jarName:$indexHtmlPath"
          html match {
            case None => throw new RuntimeException(s"Can't find $resource")
            case Some(html) =>
              val doc = org.jsoup.parser.Parser.parse(html, "127.0.0.1")
              import scala.collection.JavaConverters._
              doc
                .body()
                .children()
                .asScala
                .find { el =>
                  el.tagName() == "script" && el.html().contains("SwaggerUIBundle")
                } match {
                case None => throw new RuntimeException("Can't patch script in index.html")
                case Some(el) =>
                  val update =
                    """
const ui = SwaggerUIBundle({
    url: "/api-docs/swagger.json",
    dom_id: '#swagger-ui',
    deepLinking: true,
    presets: [ SwaggerUIBundle.presets.apis ],
    plugins: [ SwaggerUIBundle.plugins.DownloadUrl ],
    layout: "BaseLayout",
    operationsSorter: "alpha"
});
window.ui = ui;
"""
                  // Careful! ^ will be inserted as one-liner
                  el.text(update)
              }

              Files.createDirectories(outputFile.getParentFile.toPath)
              IO.write(outputFile, doc.outerHtml())
          }

          Seq(outputFile)
        }.taskValue
      ))

  private def fileContentFromJar(jar: File, fileName: String): Option[Array[Byte]] = {
    val fs      = new BufferedInputStream(Files.newInputStream(jar.toPath))
    val factory = new ArchiveStreamFactory()
    val ais     = factory.createArchiveInputStream(fs)

    try Iterator
      .continually(ais.getNextEntry)
      .takeWhile(_ != null)
      .filter(ais.canReadEntryData)
      .find(_.getName == fileName)
      .map { _ =>
        val out = new ByteArrayOutputStream()
        IOUtils.copy(ais, out)
        out.toByteArray
      } finally fs.close()
  }
}

Source File: Block.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.it.config.genesis

import java.io.ByteArrayOutputStream

import com.google.common.primitives.{Bytes, Ints, Longs}
import com.wavesplatform.dex.domain.account.{KeyPair, PublicKey}
import com.wavesplatform.dex.domain.bytes.ByteStr
import com.wavesplatform.dex.domain.crypto
import com.wavesplatform.dex.domain.crypto.Authorized
import com.wavesplatform.dex.domain.error.ValidationError.GenericError
import monix.eval.Coeval


case class Block(timestamp: Long,
                 version: Byte,
                 reference: ByteStr,
                 signerData: SignerData,
                 consensusData: NxtLikeConsensusBlockData,
                 transactionData: Seq[GenesisTransaction])
    extends Authorized {

  override val sender: PublicKey = signerData.generator
  private val maxLength: Int     = 150 * 1024

  private val transactionField: Array[Byte] = {

    val serTxCount = Array(transactionData.size.toByte)
    val byteBuffer = new ByteArrayOutputStream(transactionData.size * maxLength / 2)

    byteBuffer.write(serTxCount, 0, serTxCount.length)

    transactionData.foreach { tx =>
      val txBytes = tx.bytes()
      val txSize  = Bytes.ensureCapacity(Ints.toByteArray(txBytes.length), 4, 0)

      byteBuffer.write(txSize, 0, txSize.length)
      byteBuffer.write(txBytes, 0, txBytes.length)
    }

    byteBuffer.toByteArray
  }

  val bytes: Coeval[Array[Byte]] = Coeval.evalOnce {

    val txBytesSize = transactionField.length
    val txBytes     = Bytes.ensureCapacity(Ints.toByteArray(txBytesSize), 4, 0) ++ transactionField

    val consensusField = Bytes.ensureCapacity(Longs.toByteArray(consensusData.baseTarget), 8, 0) ++ consensusData.generationSignature.arr

    val cBytesSize = consensusField.length
    val cBytes     = Bytes.ensureCapacity(Ints.toByteArray(cBytesSize), 4, 0) ++ consensusField

    Array(version) ++
      Bytes.ensureCapacity(Longs.toByteArray(timestamp), 8, 0) ++
      reference.arr ++
      cBytes ++
      txBytes ++
      signerData.generator.arr ++
      signerData.signature.arr
  }
}

object Block {

  val MaxFeaturesInBlock: Int       = 64
  val GeneratorSignatureLength: Int = 32

  def build(version: Byte,
            timestamp: Long,
            reference: ByteStr,
            consensusData: NxtLikeConsensusBlockData,
            transactionData: Seq[GenesisTransaction],
            signerData: SignerData): Either[GenericError, Block] = {
    (for {
      _ <- Either.cond(reference.arr.length == crypto.SignatureLength, (), "Incorrect reference")
      _ <- Either.cond(consensusData.generationSignature.arr.length == GeneratorSignatureLength, (), "Incorrect consensusData.generationSignature")
      _ <- Either.cond(signerData.generator.length == crypto.KeyLength, (), "Incorrect signer")
    } yield Block(timestamp, version, reference, signerData, consensusData, transactionData)).left.map(GenericError(_))
  }

  def buildAndSign(version: Byte,
                   timestamp: Long,
                   reference: ByteStr,
                   consensusData: NxtLikeConsensusBlockData,
                   transactionData: Seq[GenesisTransaction],
                   signer: KeyPair): Either[GenericError, Block] =
    build(version, timestamp, reference, consensusData, transactionData, SignerData(signer, ByteStr.empty)).right
      .map(unsigned => unsigned.copy(signerData = SignerData(signer, ByteStr(crypto.sign(signer, unsigned.bytes.value)))))
}

Source File: Main.scala From jardiff with Apache License 2.0

5 votes

package scala.tools.jardiff

import java.io.{ByteArrayOutputStream, File, PrintWriter}
import java.nio.file._

import org.apache.commons.cli
import org.apache.commons.cli.{CommandLine, DefaultParser, HelpFormatter, Options}
import org.eclipse.jgit.util.io.NullOutputStream

import scala.collection.JavaConverters.collectionAsScalaIterableConverter
import scala.util.Try
import scala.util.control.NonFatal

object Main {
  def main(args: Array[String]): Unit = {
    run(args) match {
      case ShowUsage(msg) => System.err.println(msg); sys.exit(-1)
      case Error(err) => err.printStackTrace(System.err); sys.exit(-1)
      case Success(diffFound) => sys.exit(if (diffFound) 1 else 0)
    }
  }

  private object Opts {
    val Help = new cli.Option("h", "help", false, "Display this message")
    val Git = new cli.Option("g", "git", true, "Directory to output a git repository containing the diff")
    Git.setArgName("dir")
    val NoCode = new cli.Option("c", "suppress-code", false, "Suppress method bodies")
    val Raw = new cli.Option("r", "raw", false, "Disable sorting and filtering of classfile contents")
    val NoPrivates = new cli.Option("p", "suppress-privates", false, "Display only non-private members")
    val ContextLines = new cli.Option("U", "unified", true, "Number of context lines in diff")
    val Quiet = new cli.Option("q", "quiet", false, "Don't output diffs to standard out")
    val Ignore = new cli.Option("i", "ignore", true, "File pattern to ignore rendered files in gitignore format")
    Ignore.setArgs(cli.Option.UNLIMITED_VALUES)
    ContextLines.setArgName("n")
    def apply(): Options = {
      new cli.Options().addOption(Help).addOption(Git).addOption(ContextLines).addOption(NoCode).addOption(Raw).addOption(NoPrivates).addOption(Quiet).addOption(Ignore)
    }
  }
  private implicit class RichCommandLine(val self: CommandLine) {
    def has(o: cli.Option): Boolean = self.hasOption(o.getOpt)
    def get(o: cli.Option): String = self.getOptionValue(o.getOpt)
    def getOptInt(o: cli.Option): Option[Int] = Option(self.getOptionValue(o.getOpt)).map(x => Try(x.toInt).getOrElse(throw new cli.ParseException("--" + o.getLongOpt + " requires an integer")))
  }

  private def helpText: String = {
    val formatter = new HelpFormatter
    val baos = new ByteArrayOutputStream()
    val writer = new PrintWriter(baos)
    try {
      val footer = s" VERSION1 [VERSION2 ...]\n\nEach VERSION may designate a single file, a directory, JAR file or a `${File.pathSeparator}`-delimited classpath\n\n"
      formatter.printHelp(writer, 80, "jardiff", footer, Opts(), HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, "", true)
      writer.flush()
      baos.toString().replaceFirst("\\n", "")

    } finally {
      writer.close()
    }
  }

  def run(args: Array[String]): RunResult = {
    val parser = new DefaultParser

    try {
      val line = parser.parse(Opts(), args)
      val trailingArgs = line.getArgList
      if (line.has(Opts.Help)) {
        ShowUsage(helpText)
      } else {
        val gitRepo = if (line.has(Opts.Git)) Some(Paths.get(line.get(Opts.Git))) else None
        val diffOutputStream = if (line.has(Opts.Quiet)) NullOutputStream.INSTANCE else System.out
        val config = JarDiff.Config(gitRepo, !line.has(Opts.NoCode), line.has(Opts.Raw),
          !line.has(Opts.NoPrivates), line.getOptInt(Opts.ContextLines), diffOutputStream,
          Option(line.getOptionValues(Opts.Ignore.getOpt)).toList.flatten
        )
        val paths = trailingArgs.asScala.toList.map(JarDiff.expandClassPath)
        paths match {
          case Nil => ShowUsage(helpText)
          case _ =>
            val jarDiff = JarDiff(paths, config)
            val diffFound = jarDiff.diff()
            Success(diffFound)
        }
      }
    } catch {
      case exp: cli.ParseException => ShowUsage(helpText)
      case NonFatal(t) => Error(t)
    }
  }
}

sealed abstract class RunResult
case class ShowUsage(msg: String) extends RunResult
case class Error(err: Throwable) extends RunResult
case class Success(diffFound: Boolean) extends RunResult

Source File: JavaSerializationConverter.scala From scala-serialization with MIT License

5 votes

package com.komanov.serialization.converters

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.komanov.serialization.converters.IoUtils.using
import com.komanov.serialization.domain.{Site, SiteEvent, SiteEventData}

object JavaSerializationConverter extends MyConverter {

  override def toByteArray(site: Site): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(site)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def fromByteArray(bytes: Array[Byte]): Site = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[Site]
      }
    }
  }

  override def toByteArray(event: SiteEvent): Array[Byte] = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new ObjectOutputStream(baos)) { os =>
        os.writeObject(event)
        os.flush()
        baos.toByteArray
      }
    }
  }

  override def siteEventFromByteArray(clazz: Class[_], bytes: Array[Byte]): SiteEvent = {
    using(new ByteArrayInputStream(bytes)) { bais =>
      using(new ObjectInputStream(bais)) { os =>
        os.readObject().asInstanceOf[SiteEvent]
      }
    }
  }

}

Source File: EventsReportGenerator.scala From scala-serialization with MIT License

5 votes

package com.komanov.serialization.converters

import java.io.{ByteArrayOutputStream, File}
import java.nio.file.{Files, StandardOpenOption}
import java.util.zip.GZIPOutputStream

import com.komanov.serialization.converters.IoUtils._


object EventsReportGenerator extends App {

  val flush = false

  val dir = new File(new File(System.getProperty("user.home"), "123"), "events")
  require(!flush || dir.exists() || dir.mkdirs())

  val (raws, gzips, both) = (Seq.newBuilder[(String, Seq[Int])], Seq.newBuilder[(String, Seq[Int])], Seq.newBuilder[(String, Seq[Int])])

  for ((converterName, converter) <- Converters.all if converter ne ScroogeConverter if converter ne ScalaPbConverter) {
    val results = Seq.newBuilder[(Int, Int)]
    for ((name, site, events) <- TestData.all) {
      val bytes = converter.toByteArray(site)
      val gzipLen = getGzipByteLength(bytes)

      val eventsAndBytes = events.map(e => e -> converter.toByteArray(e.event))
      val eventsLen = eventsAndBytes.map(_._2.length).sum
      val eventsGzipLen = eventsAndBytes.map(_._2).map(getGzipByteLength).sum

      results += bytes.length -> gzipLen
      results += eventsLen -> eventsGzipLen

      if (flush) {
        val normalizedConverterName = converterName.toLowerCase().replace(" ", "-")
        Files.write(dir.getParentFile.toPath.resolve(s"site_${name}_$normalizedConverterName.bin"), bytes, StandardOpenOption.CREATE)
        for ((event, eventBytes) <- eventsAndBytes) {
          Files.write(dir.toPath.resolve(s"${name}_${normalizedConverterName}_${event.event.getClass.getSimpleName}.bin"), eventBytes, StandardOpenOption.CREATE)
        }
      }
    }

    raws += converterName -> results.result().map(_._1)
    gzips += converterName -> results.result().map(_._2)
    both += (converterName + " (rw)") -> results.result().map(_._1)
    both += (converterName + " (gz)") -> results.result().map(_._2)
  }

  println("Data Sizes (raw)")
  printHeaders
  printSizes(raws.result())

  println("Data Sizes (gzip)")
  printHeaders
  printSizes(gzips.result())

  println("Data Sizes")
  printHeaders
  printSizes(both.result())

  private def printHeaders: Any = {
    println("Converter," + TestData.sites.flatMap(t => Seq(t._1, "ev " + t._1)).mkString(","))
  }

  private def printSizes(all: Seq[(String, Seq[Int])]): Unit = {
    for ((name, list) <- all) {
      println(name + "," + list.mkString(","))
    }
  }

  private def getGzipByteLength(bytes: Array[Byte]): Int = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new GZIPOutputStream(baos)) { os =>
        os.write(bytes)
      }
      baos.toByteArray.length
    }
  }

}

Source File: ReportGenerator.scala From scala-serialization with MIT License

5 votes

package com.komanov.serialization.converters

import java.io.{ByteArrayOutputStream, File}
import java.nio.file.{Files, StandardOpenOption}
import java.util.zip.GZIPOutputStream

import com.komanov.serialization.converters.IoUtils._


object ReportGenerator extends App {

  val flush = true

  val dir = new File(System.getProperty("user.home"), "123")
  require(!flush || dir.exists() || dir.mkdirs())

  val (raws, gzips) = (Seq.newBuilder[(String, Seq[Int])], Seq.newBuilder[(String, Seq[Int])])

  for ((converterName, converter) <- Converters.all if converter ne ScalaPbConverter if converter ne ScroogeConverter) {
    val results = Seq.newBuilder[(Int, Int)]
    for ((name, site) <- TestData.sites) {
      val bytes = converter.toByteArray(site)
      val gzipLen = getGzipByteLength(bytes)

      results += bytes.length -> gzipLen

      if (flush) {
        val normalizedConverterName = converterName.toLowerCase().replace(" ", "-")
        Files.write(dir.toPath.resolve(s"site_${name}_$normalizedConverterName.bin"), bytes, StandardOpenOption.CREATE)
      }
    }

    raws += converterName -> results.result().map(_._1)
    gzips += converterName -> results.result().map(_._2)
  }

  println("Data Sizes (raw)")
  printHeaders
  printSizes(raws.result())

  println("Data Sizes (gzip)")
  printHeaders
  printSizes(gzips.result())

  private def printHeaders: Any = {
    println("Converter," + TestData.sites.map(_._1).mkString(","))
  }

  private def printSizes(all: Seq[(String, Seq[Int])]): Unit = {
    for ((name, list) <- all) {
      println(name + "," + list.mkString(","))
    }
  }

  private def getGzipByteLength(bytes: Array[Byte]): Int = {
    using(new ByteArrayOutputStream()) { baos =>
      using(new GZIPOutputStream(baos)) { os =>
        os.write(bytes)
      }
      baos.toByteArray.length
    }
  }

}

Source File: SerializationTest.scala From scala-serialization with MIT License

5 votes

package com.komanov.serialization.converters

import java.io.ByteArrayOutputStream

import com.komanov.serialization.domain.SiteEventData
import org.apache.commons.io.HexDump
import org.specs2.mutable.SpecificationWithJUnit
import org.specs2.specification.Scope
import org.specs2.specification.core.Fragments

class SerializationTest extends SpecificationWithJUnit {

  sequential

  doTest("JSON", JsonConverter)
  doTest("ScalaPB", ScalaPbConverter)
  doTest("Java Protobuf", JavaPbConverter)
  doTest("Java Thrift", JavaThriftConverter)
  doTest("Scrooge", ScroogeConverter)
  doTest("Serializable", JavaSerializationConverter)
  doTest("Pickling", PicklingConverter)
  doTest("BooPickle", BoopickleConverter)
  doTest("Chill", ChillConverter)

  "ScalaPB and Java Protobuf" should {
    Fragments.foreach(TestData.sites) { case (name, site) =>
      s"be interoperable for site of $name" in new ctx {
        val javaMessage = JavaPbConverter.toByteArray(site)
        val scalaMessage = ScalaPbConverter.toByteArray(site)
        toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
      }
    }

    Fragments.foreach(TestData.events) { case (name, events) =>
      s"be interoperable events of $name" in new ctx {
        for (SiteEventData(_, event, _) <- events) {
          val javaMessage = JavaPbConverter.toByteArray(event)
          val scalaMessage = ScalaPbConverter.toByteArray(event)
          toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
        }
      }
    }
  }

  "Scrooge and Java Thrift" should {
    Fragments.foreach(TestData.sites) { case (name, site) =>
      s"be interoperable for site of $name" in new ctx {
        val javaMessage = JavaThriftConverter.toByteArray(site)
        val scalaMessage = ScroogeConverter.toByteArray(site)
        toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
      }
    }

    Fragments.foreach(TestData.events) { case (name, events) =>
      s"be interoperable events of $name" in new ctx {
        for (SiteEventData(_, event, _) <- events) {
          val javaMessage = JavaThriftConverter.toByteArray(event)
          val scalaMessage = ScroogeConverter.toByteArray(event)
          toHexDump(javaMessage) must be_===(toHexDump(scalaMessage))
        }
      }
    }
  }

  class ctx extends Scope

  def toHexDump(arr: Array[Byte]): String = {
    if (arr.isEmpty) {
      ""
    } else {
      val baos = new ByteArrayOutputStream
      HexDump.dump(arr, 0, baos, 0)
      new String(baos.toByteArray)
    }
  }

  def doTest(converterName: String, converter: MyConverter) = {
    converterName should {
      Fragments.foreach(TestData.sites) { case (name, site) =>
        s"serialize-parse site of $name" in new ctx {
          val bytes = converter.toByteArray(site)
          val parsed = converter.fromByteArray(bytes)
          parsed must be_===(site)
        }
      }

      Fragments.foreach(TestData.events) { case (name, events) =>
        s"serialize-parse site events of $name" in new ctx {
          for (SiteEventData(_, event, _) <- events) {
            val bytes = converter.toByteArray(event)
            val parsed = converter.siteEventFromByteArray(event.getClass, bytes)
            parsed must be_===(event)
          }
        }
      }
    }
  }

}

Source File: avroMarshallers.scala From scalatest-embedded-kafka with MIT License

5 votes

package net.manub.embeddedkafka.avro

import java.io.ByteArrayOutputStream

import kafka.utils.VerifiableProperties
import org.apache.avro.Schema
import org.apache.avro.io._
import org.apache.avro.specific.{
  SpecificDatumReader,
  SpecificDatumWriter,
  SpecificRecord
}
import org.apache.kafka.common.serialization.{Deserializer, Serializer}

class KafkaAvroDeserializer[T <: SpecificRecord](schema: Schema)
    extends Deserializer[T]
    with NoOpConfiguration
    with NoOpClose {

  private val reader = new SpecificDatumReader[T](schema)

  override def deserialize(topic: String, data: Array[Byte]): T = {
    val decoder = DecoderFactory.get().binaryDecoder(data, null)
    reader.read(null.asInstanceOf[T], decoder)
  }
}

class KafkaAvroSerializer[T <: SpecificRecord]()
    extends Serializer[T]
    with NoOpConfiguration
    with NoOpClose {

  private def toBytes(nullableData: T): Array[Byte] =
    Option(nullableData).fold[Array[Byte]](null) { data =>
      val writer: DatumWriter[T] = new SpecificDatumWriter[T](data.getSchema)
      val out = new ByteArrayOutputStream()
      val encoder = EncoderFactory.get.binaryEncoder(out, null)

      writer.write(data, encoder)
      encoder.flush()
      out.close()

      out.toByteArray
    }

  override def serialize(topic: String, data: T): Array[Byte] =
    toBytes(data)
}

sealed trait NoOpConfiguration {
  def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = ()
}

sealed trait NoOpClose {
  def close(): Unit = ()
}

Source File: StreamingSpec.scala From seals with Apache License 2.0

5 votes

package com.example.streaming

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }

import shapeless.record._

import cats.effect.IO

import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpec

import fs2.Stream

import scodec.Codec
import scodec.bits.BitVector
import scodec.stream.CodecError

import dev.tauri.seals._
import dev.tauri.seals.scodec.Codecs._
import dev.tauri.seals.scodec.StreamCodecs._

class StreamingSpec extends AnyFlatSpec with Matchers {

  import Main.{ Animal, Elephant, Quokka, Quagga, Grey }

  val animals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f),
    Quokka("Nellie"),
    Quagga("Ford", speed = 120.0)
  )

  val transformedAnimals = Vector[Animal](
    Elephant("Dumbo", tuskLength = 35.0f + 17.0f),
    Quokka("Nellie", Grey)
  )

  val animalStream = Stream.emits[IO, Animal](animals)

  val encoder = streamEncoderFromReified[Animal]
  val decoder = streamDecoderFromReified[Animal]

  "Encoding/decoding" should "work correctly" in {
    val tsk: IO[Unit] = for {
      bv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (animals)
    }
    tsk.unsafeRunSync()
  }

  it should "fail with incompatible models" in {
    val mod = Reified[Record.`'Elephant -> Elephant, 'Quokka -> Quokka`.T].model
    val bv: BitVector = Codec[Model].encode(mod).getOrElse(fail)
    val tsk: IO[Unit] = for {
      as <- decoder.decode[IO](Stream(bv)).compile.toVector
    } yield {
      as should === (Vector.empty)
    }

    val ex = intercept[CodecError] {
      tsk.unsafeRunSync()
    }
    ex.err.message should include ("incompatible models")
  }

  "Transformation" should "work correctly" in {
    val tsk: IO[Unit] = for {
      ibv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _)
      is = new ByteArrayInputStream(ibv.toByteArray)
      os = new ByteArrayOutputStream
      _ <- Main.transform(is, os)(Main.transformer)
      obv = BitVector(os.toByteArray())
      transformed <- decoder.decode[IO](Stream(obv)).compile.fold(Vector.empty[Animal])(_ :+ _)
    } yield {
      transformed should === (transformedAnimals)
    }
    tsk.unsafeRunSync()
  }
}

Source File: StreamHandlerTest.scala From scala-js-java-logging with BSD 3-Clause "New" or "Revised" License

5 votes

package org.scalajs.testsuite.javalib.util.logging

import java.io.ByteArrayOutputStream
import java.util.logging._

import org.junit.Test
import org.junit.Assert._

class StreamHandlerTest {
  object TestFormatter extends SimpleFormatter {
    override def getHead(h: Handler): String = "header"

    override def getTail(h: Handler): String = "footer"
  }

  @Test def test_logging():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
    assertTrue(o.toString.contains("message"))
  }

  @Test def test_default_level():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    // Defaults to level INFO
    sh.publish(new LogRecord(Level.FINER, "message"))
    sh.flush()
    assertFalse(o.toString.contains("message"))
  }

  @Test def test_default_config():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    assertNull(sh.getEncoding)
    assertNull(sh.getFilter)
    assertNotNull(sh.getFormatter)
    assertNotNull(sh.getErrorManager)
  }

  @Test def test_default_constructor_config():Unit = {
    val sh = new StreamHandler()
    assertNull(sh.getEncoding)
    assertNull(sh.getFilter)
    assertNotNull(sh.getFormatter)
    assertNotNull(sh.getErrorManager)
  }

  @Test def test_no_logging_for_level():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, new SimpleFormatter())
    sh.setLevel(Level.WARNING)
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
    // No output under the given level
    assertTrue(o.toString.isEmpty)
  }

  @Test def test_no_errors_if_no_stream():Unit = {
    val sh = new StreamHandler()
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
  }

  @Test def test_print_head():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, TestFormatter)
    assertTrue(o.toString.isEmpty)
    sh.publish(new LogRecord(Level.INFO, "message"))
    sh.flush()
    assertTrue(o.toString.contains("header"))
    assertTrue(!o.toString.contains("footer"))
  }

  @Test def test_print_tail():Unit = {
    val o = new ByteArrayOutputStream()
    val sh = new StreamHandler(o, TestFormatter)
    assertTrue(o.toString.isEmpty)
    sh.close()
    assertTrue(o.toString.contains("header"))
    assertTrue(o.toString.contains("footer"))
  }
}

Source File: CaptureOutputStream.scala From spatial with MIT License

5 votes

package utils.io

import java.io.{ByteArrayOutputStream, OutputStream, PrintStream}

class CaptureOutputStream extends OutputStream {
  val data = new ByteArrayOutputStream()

  override def write(b: Int): Unit = data.write(b)
  override def write(b: Array[Byte]): Unit = data.write(b)
  override def write(b: Array[Byte], off: Int, len: Int): Unit = data.write(b,off,len)

  def dump: String = new java.lang.String(data.toByteArray, java.nio.charset.StandardCharsets.UTF_8)
}

class CaptureStream(__out: CaptureOutputStream, paired: PrintStream) extends PrintStream(__out) {
  def this(paired: PrintStream) = this(new CaptureOutputStream(), paired)
  def dump: String = __out.dump
  //TODO[5]: For some reason this duplicates the printing
  //override def print(s: String): Unit = { paired.print(s); super.print(s) }
  //override def println(s: String): Unit = { paired.println(s); super.println(s) }
}

Source File: ShowSchemaMainTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.cli

import java.io.{ByteArrayOutputStream, PrintStream}

import org.scalatest.{Matchers, WordSpec}

class ShowSchemaMainTest extends WordSpec with Matchers {

  "SchemaMain" should {
    "display schema for specified avro source" in {
      val baos = new ByteArrayOutputStream
      val out = new PrintStream(baos)
      ShowSchemaMain(Seq("--source", "avro:" + getClass.getResource("/test.avro").getFile), out)
      new String(baos.toByteArray).trim shouldBe """{"type":"record","name":"row","namespace":"namespace","fields":[{"name":"name","type":"string"},{"name":"job","type":"string"},{"name":"location","type":"string"}]}"""
    }
  }
}

Source File: ArrowConverters.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.arrow

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels

import org.apache.arrow.memory.BufferAllocator
import org.apache.arrow.vector._
import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
import org.apache.spark.TaskContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.types._
import com.twosigma.flint.util.Utils
import org.apache.arrow.vector.ipc.{ ArrowFileReader, ArrowFileWriter }
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch

trait ClosableIterator[T] extends Iterator[T] with AutoCloseable

class ConcatClosableIterator[T](iters: Iterator[ClosableIterator[T]])
  extends ClosableIterator[T] {
  var curIter: ClosableIterator[T] = _

  private def advance(): Unit = {
    require(curIter == null || !curIter.hasNext, "Should not advance if curIter is not empty")
    require(iters.hasNext, "Should not advance if iters doesn't have next")
    closeCurrent()
    curIter = iters.next()
  }

  private def closeCurrent(): Unit = if (curIter != null) curIter.close()

  override def close(): Unit = closeCurrent()

  override def hasNext: Boolean = {
    if (curIter == null || !curIter.hasNext) {
      if (iters.hasNext) {
        advance()
        hasNext
      } else {
        false
      }
    } else {
      true
    }
  }

  override def next(): T = curIter.next()
}


  def byteArrayToBatch(
    batchBytes: Array[Byte],
    allocator: BufferAllocator
  ): ArrowRecordBatch = {
    val in = new ByteArrayReadableSeekableByteChannel(batchBytes)
    val reader = new ArrowFileReader(in, allocator)

    // Read a batch from a byte stream, ensure the reader is closed
    Utils.tryWithSafeFinally {
      val root = reader.getVectorSchemaRoot
      // throws IOException
      val unloader = new VectorUnloader(root)
      reader.loadNextBatch() // throws IOException
      unloader.getRecordBatch
    } {
      reader.close()
    }
  }
}

Source File: ArrowSummarizer.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd.function.summarize.summarizer

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels
import java.util

import com.twosigma.flint.arrow.{ ArrowFieldWriter, ArrowPayload, ArrowUtils, ArrowWriter }
import org.apache.arrow.memory.{ BufferAllocator, RootAllocator }
import org.apache.arrow.vector.VectorSchemaRoot
import org.apache.arrow.vector.ipc.ArrowFileWriter
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.GenericArrayData
import org.apache.spark.sql.types.StructType

import scala.collection.JavaConverters._


case class ArrowSummarizer(inputSchema: StructType, outputSchema: StructType, includeBaseRows: Boolean)
  extends Summarizer[InternalRow, ArrowSummarizerState, ArrowSummarizerResult] {
  private[this] val size = outputSchema.size
  require(size > 0, "Cannot create summarizer with no input columns")

  // This function will allocate memory from the BufferAllocator to initialize arrow vectors.
  override def zero(): ArrowSummarizerState = {
    new ArrowSummarizerState(false, null, null, null, null)
  }

  private def init(u: ArrowSummarizerState): Unit = {
    if (!u.initialized) {
      val arrowSchema = ArrowUtils.toArrowSchema(outputSchema)
      val allocator = new RootAllocator(Int.MaxValue)
      val root = VectorSchemaRoot.create(arrowSchema, allocator)
      val arrowWriter = ArrowWriter.create(inputSchema, outputSchema, root)

      u.initialized = true
      u.baseRows = new util.ArrayList[InternalRow]()
      u.allocator = allocator
      u.root = root
      u.arrowWriter = arrowWriter
    }
  }

  override def add(u: ArrowSummarizerState, row: InternalRow): ArrowSummarizerState = {
    if (!u.initialized) {
      init(u)
    }

    if (includeBaseRows) {
      u.baseRows.add(row)
    }
    u.arrowWriter.write(row)
    u
  }

  override def merge(
    u1: ArrowSummarizerState,
    u2: ArrowSummarizerState
  ): ArrowSummarizerState = throw new UnsupportedOperationException()

  // This can only be called once
  override def render(u: ArrowSummarizerState): ArrowSummarizerResult = {
    if (u.initialized) {
      val out = new ByteArrayOutputStream()
      val writer = new ArrowFileWriter(u.root, null, Channels.newChannel(out))

      u.arrowWriter.finish()
      writer.writeBatch()

      writer.close()
      u.root.close()
      u.allocator.close()

      val rows = u.baseRows.toArray.asInstanceOf[Array[Any]]
      ArrowSummarizerResult(rows, out.toByteArray)
    } else {
      ArrowSummarizerResult(Array.empty, Array.empty)
    }
  }

  override def close(u: ArrowSummarizerState): Unit = {
    if (u.initialized) {
      u.arrowWriter.reset()
      u.root.close()
      u.allocator.close()
    }
  }
}

Source File: ConcatArrowAndExplodeSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.timeseries

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels
import java.util.concurrent.TimeUnit

import com.twosigma.flint.arrow.ArrowUtils
import org.apache.arrow.memory.RootAllocator
import org.apache.arrow.vector.ipc.ArrowFileWriter
import org.apache.arrow.vector.{ BigIntVector, Float8Vector, VectorSchemaRoot }
import org.apache.spark.sql.functions.{ array, col, lit, struct }
import org.apache.spark.sql.types._

class ConcatArrowAndExplodeSpec extends TimeSeriesSuite {

  "ConcatArrowAndExplode" should "work" in {

    val batchSize = 10

    var df = spark.range(1000, 2000, 1000).toDF("time")
    val columns = (0 until batchSize).map(v => struct((df("time") + v).as("time"), lit(v.toDouble).as("v")))
    df = df.withColumn("base_rows", array(columns: _*))

    val allocator = new RootAllocator(Long.MaxValue)

    val schema1 = StructType(Seq(StructField("v1", DoubleType)))
    val root1 = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(schema1), allocator)
    val vector1 = root1.getVector("v1").asInstanceOf[Float8Vector]
    vector1.allocateNew()

    for (i <- 0 until batchSize) {
      vector1.set(i, i + 10.0)
    }
    vector1.setValueCount(batchSize)
    val out1 = new ByteArrayOutputStream()
    val arrowWriter1 = new ArrowFileWriter(root1, null, Channels.newChannel(out1))
    arrowWriter1.writeBatch()
    arrowWriter1.close()
    root1.close()
    df = df.withColumn("f1_schema", struct(lit(0.0).as("v1")))
    df = df.withColumn("f1_data", lit(out1.toByteArray))

    val schema2 = StructType(Seq(StructField("v2", DoubleType), StructField("v3", LongType)))
    val root2 = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(schema2), allocator)
    val vector2 = root2.getVector("v2").asInstanceOf[Float8Vector]
    val vector3 = root2.getVector("v3").asInstanceOf[BigIntVector]
    vector2.allocateNew()
    vector3.allocateNew()

    for (i <- 0 until batchSize) {
      vector2.set(i, i + 20.0)
    }
    vector2.setValueCount(batchSize)

    for (i <- 0 until batchSize) {
      vector3.set(i, i + 30L)
    }
    vector3.setValueCount(batchSize)
    val out2 = new ByteArrayOutputStream()
    val arrowWriter2 = new ArrowFileWriter(root2, null, Channels.newChannel(out2))
    arrowWriter2.writeBatch()
    arrowWriter2.close()
    root2.close()
    df = df.withColumn("f2_schema", struct(lit(0.0).as("v2"), lit(0L).as("v3")))
    df = df.withColumn("f2_data", lit(out2.toByteArray))

    var tsrdd = TimeSeriesRDD.fromDF(df)(isSorted = false, timeUnit = TimeUnit.NANOSECONDS)
    tsrdd = tsrdd.concatArrowAndExplode("base_rows", Seq("f1_schema", "f2_schema"), Seq("f1_data", "f2_data"))
    tsrdd.toDF.show()

    var expected = spark.range(1000, 1000 + batchSize).toDF("time")
    expected = expected.withColumn("v", col("time") - 1000.0)
    expected = expected.withColumn("v1", col("time") - 1000 + 10.0)
    expected = expected.withColumn("v2", col("time") - 1000 + 20.0)
    expected = expected.withColumn("v3", col("time") - 1000 + 30)

    val expectedTsrdd = TimeSeriesRDD.fromDF(expected)(isSorted = false, timeUnit = TimeUnit.NANOSECONDS)
    assertEquals(tsrdd, expectedTsrdd)
  }

}

Source File: get_features_from_peinfo.scala From gsoc_relationship with Apache License 2.0

5 votes

import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import Array.concat
import org.apache.spark.sql.types._
import org.apache.spark.ml.linalg.SQLDataTypes.VectorType 
import org.apache.spark.ml.linalg._
import org.apache.spark.sql.Row
import org.apache.spark.ml.feature.MinMaxScaler
import org.apache.spark.ml.linalg.DenseVector
import PreProcessingConfig._

case class peinfo_results_by_service_name_class(service_name: String, sha256: String)
case class peinfo_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class peinfo_join_results_class(sha256: String, service_name: String, results: String)
case class peinfo_int_final_array_rdd_class(sha256: String, array_results: Array[Double])
case class peinfo_binaray_final_array_rdd_class(sha256:String, array_results :Array[Double])
case class peinfo_final_array_rdd_class(sha256:String, array_results: Array[Double])

def unzip(x: Array[Byte]) : String = {      
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def findAllIntinpeinfo( peinfo_json_results : JsLookupResult, time: Double): Array[Double]= {
    val entropy = peinfo_json_results \\ "entropy" ; val virt_address = peinfo_json_results \\ "virt_address"; val virt_size = peinfo_json_results \\ "virt_size"; val size = peinfo_json_results \\ "size";
    var i= 0; var List  = Array.iterate(0.0,17)(a=>a*0)
    for (k <- ( peinfo_json_results \\ "section_name")){
        k.as[String] match {
            case ".text\u0000\u0000\u0000" => { List(0)=entropy(i).as[Double]; List(1)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(2)=virt_size(i).as[Double]; List(3)=size(i).as[Double] }
            case ".data\u0000\u0000\u0000" => { List(4)=entropy(i).as[Double]; List(5)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(6)=virt_size(i).as[Double]; List(7)=size(i).as[Double] }
            case ".rsrc\u0000\u0000\u0000" => { List(8)=entropy(i).as[Double]; List(9)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(10)=virt_size(i).as[Double]; List(11)=size(i).as[Double] }
            case ".rdata\u0000\u0000" => { List(12)=entropy(i).as[Double]; List(13)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(14)=virt_size(i).as[Double]; List(15)=size(i).as[Double] }
            case other => {}
        }
        i = i + 1
    }
    List(16)= time
    return List.toArray
}

val peinfo_results_by_service_name_meta = sc.cassandraTable[peinfo_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","peinfo")
val peinfo_results_by_service_name_rdd = peinfo_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val peinfo_results_by_sha256_meta = sc.cassandraTable[peinfo_results_by_sha256_class](keyspace,sha256_table)
val peinfo_results_by_sha256_rdd = peinfo_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val peinfo_join_results = peinfo_results_by_service_name_rdd.join(peinfo_results_by_sha256_rdd).map(x=> (new peinfo_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()

val peinfo_int_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "pe_sections"),{if ((Json.parse(x.results) \ "timestamp").isInstanceOf[JsUndefined]) 0.0 else (Json.parse(x.results) \ "timestamp" \\ "timestamp")(0).as[Double]})).filter(x=> !x._2.isInstanceOf[JsUndefined]).map(x=>new  peinfo_int_final_array_rdd_class(x._1,findAllIntinpeinfo(x._2,x._3)))

val peinfo_dllfunction_list= peinfo_join_results.map(x=>Json.parse(x.results) \ "imports").filter(x=> !x.isInstanceOf[JsUndefined]).flatMap(x=>x.as[List[Map[String, String]]].map(x=>(x("dll")+"."+x("function")))).toDF("func_name").groupBy("func_name").count.sort(desc("count")).filter("count > 10000").rdd.map(r => r.getString(0)).collect().toList
implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllBininpeinfo_dllfunction(peinfo_dllfunction : Seq[String]) : Array[Double] ={
    val forlist = for (family <- peinfo_dllfunction_list) yield {
        (peinfo_dllfunction.contains(family):Int).toDouble
    }
    return (forlist).toArray
}
val List502 = Array.iterate(0.0,502)(a=>0.0)
val peinfo_binaray_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "imports"))).map(x=>new  peinfo_binaray_final_array_rdd_class(x._1,{if (x._2.isInstanceOf[JsUndefined]) List502 else findAllBininpeinfo_dllfunction(x._2.as[Seq[Map[String, String]]].map(x=>(x("dll")+"."+x("function"))))}))

val peinfo_int_final_array_rdd_before_join = peinfo_int_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_binaray_final_array_rdd_before_join = peinfo_binaray_final_array_rdd.map(x=>(x.sha256,x.array_results))
val peinfo_array_rdd_by_join = peinfo_int_final_array_rdd_before_join.join(peinfo_binaray_final_array_rdd_before_join).map(x=> (x._1,concat(x._2._1,x._2._2)))
val peinfo_final_array_rdd = peinfo_array_rdd_by_join.map(x=>new peinfo_final_array_rdd_class(x._1,x._2))

val peinfo_schema = new StructType().add("sha256", StringType).add("peinfo",VectorType)
val peinfo_vector_rdd = peinfo_final_array_rdd.map(x=>(x.sha256,Vectors.dense(x.array_results)))
val peinfo_vector_rowrdd = peinfo_vector_rdd.map(p => Row(p._1,p._2))
val peinfo_vector_dataframe = spark.createDataFrame(peinfo_vector_rowrdd, peinfo_schema)
val peinfo_scaler = new MinMaxScaler()
  .setInputCol("peinfo")
  .setOutputCol("scaled_peinfo")
val peinfo_scalerModel = peinfo_scaler.fit(peinfo_vector_dataframe)
val peinfo_scaledData_df = peinfo_scalerModel.transform(peinfo_vector_dataframe)
val peinfo_scaledData_rdd = peinfo_scaledData_df.select("sha256","scaled_peinfo").rdd.map(row=>(row.getAs[String]("sha256"),row.getAs[DenseVector]("scaled_peinfo"))).map(x=>new peinfo_final_array_rdd_class(x._1,x._2.toArray))
peinfo_scaledData_rdd.toDF().write.format("parquet").save(peinfo_final_array_file)

Source File: get_features_from_objdump.scala From gsoc_relationship with Apache License 2.0

5 votes

import com.datastax.spark.connector._
import play.api.libs.json.Json
import play.api.libs.json._
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class objdump_results_by_service_name_class(service_name: String, sha256: String)
case class objdump_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte])
case class objdump_join_results_class(sha256: String, service_name: String, results: String)
case class objdump_binaray_final_array_rdd_class(sha256: String, array_results: Array[Double])
 
val objdump_main_list = sc.textFile(objdump_x86Opcodes_file).collect.toList
def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def combineAllObjdumpInOne( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] ={
    if (malwarelist(0).toString() == "null") return List("null")
    var begin = malwarelist(0).as[List[String]]
    for (i <- 1 to (malwarelist.size-1)){
        if (malwarelist(i).toString() == "null") begin = begin
        else begin = begin ::: malwarelist(i).as[List[String]]
    }
    return  begin
}
def convertToList( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] = {
    if (malwarelist(0).toString() == "null") return List("null")
    else {
        return malwarelist(0).as[List[String]]
    } 
    
}
def findAllBininobjdump_main_list(malware :List[String]) : Array[Double] ={
    if (malware == List("null")) return (List.fill(10000)(0.0)).toArray
    else {
        val forlist = for ( one  <- malware ) yield {
            objdump_main_list.indexOf(one) + 1.0
        }
        if (forlist.size < 10000){
            return  (List.concat(forlist,List.fill(10000-forlist.size)(0.0))).toArray
        }
        else return forlist.toArray
    }
}

val objdump_results_by_service_name_meta = sc.cassandraTable[objdump_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","objdump")
val objdump_results_by_service_name_rdd = objdump_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val objdump_results_by_sha256_meta = sc.cassandraTable[objdump_results_by_sha256_class](keyspace,sha256_table)
val objdump_results_by_sha256_rdd = objdump_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val objdump_join_results = objdump_results_by_service_name_rdd.join(objdump_results_by_sha256_rdd).map(x=> (new objdump_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct()
val objdump_binaray_final_array_rdd = objdump_join_results.map(x=>(x.sha256,(Json.parse(x.results) \\ "opcodes"))).filter(x=> (x._2.size > 0)).map(x=>(x._1,if ( x._2.size == 1 ) convertToList(x._2) else combineAllObjdumpInOne(x._2))).map(x=>(x._1,findAllBininobjdump_main_list(x._2)))
objdump_binaray_final_array_rdd.toDF().write.format("parquet").save(objdump_binaray_final_array_file)

Source File: get_VT_signatures.scala From gsoc_relationship with Apache License 2.0

5 votes

import com.datastax.spark.connector._
import play.api.libs.json.Json
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import PreProcessingConfig._

case class VT_results_by_service_name_class(service_name: String, sha256: String)
case class VT_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte] )
case class VT_join_results_class(sha256: String, service_name: String, results: String)
case class VT_sample_signatures_initial_seq_rdd_class(sha256: String, seq_results: Seq[String])
case class VT_sample_signatures_final_array_rdd_class(sha256:String, array_results:Array[Double])

def unzip(x: Array[Byte]) : String = {		
    val inputStream = new GZIPInputStream(new ByteArrayInputStream(x))
    val output = scala.io.Source.fromInputStream(inputStream).mkString
    return output
}
def deleteNumberInSampleSignatures(x: String): Boolean = {
    val regex = "[0-9]".r
    return regex.findFirstIn(x).isEmpty
}

val VT_results_by_service_name_meta = sc.cassandraTable[VT_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","virustotal")
val VT_results_by_service_name_rdd = VT_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name))
val VT_results_by_sha256_meta = sc.cassandraTable[VT_results_by_sha256_class](keyspace,sha256_table)
val VT_results_by_sha256_rdd = VT_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name))
val VT_join_results = VT_results_by_service_name_rdd.join(VT_results_by_sha256_rdd).map(x => (new VT_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache()
val sample_signatures_rdd = VT_join_results.flatMap(x=>Json.parse(x.results) \ "scans" \\ "result").map(x=>Json.stringify(x)).filter( x=> !(x == "null"))
val sample_signatures_split_rdd = sample_signatures_rdd.flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())
val signatures_prefix_rdd = sc.textFile(VT_signatures_prefix_suffix_file).map(x=>x.toLowerCase())
val family_signatures_subtract_rdd = sample_signatures_split_rdd.subtract(signatures_prefix_rdd)
val family_signatures_sorted_rdd = sc.parallelize(family_signatures_subtract_rdd.countByValue().toSeq).filter(x=>(x._2>50)).sortBy(x=>x._2,false)
val family_signatures_list = family_signatures_sorted_rdd.keys.collect().toList
val VT_sample_signatures_rdd = VT_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "scans" \\ "result").map(_.toString).filter( s => !(s== "null")).flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase())))
val  VT_sample_signatures_initial_seq_rdd = VT_sample_signatures_rdd.map(x=>new VT_sample_signatures_initial_seq_rdd_class(x._1, x._2))

implicit def bool2int(b:Boolean) = if (b) 1 else 0
def findAllInFamilySignatures(sample_signatures_seq : Seq[String]) : Array[Double] ={
    val forlist = for (family <- family_signatures_list) yield {
        (sample_signatures_seq.contains(family):Int).toDouble
    }
    return forlist.toArray
}

val VT_sample_signatures_final_array_rdd = VT_sample_signatures_initial_seq_rdd.map(x=>new VT_sample_signatures_final_array_rdd_class(x.sha256,findAllInFamilySignatures(x.seq_results)))
VT_sample_signatures_final_array_rdd.toDF().write.format("parquet").save(VT_sample_signatures_final_array_file)

Source File: HelperMethods.scala From gsoc_relationship with Apache License 2.0

5 votes

package com.holmesprocessing.analytics.relationship.knowledgeBase

import play.api.libs.json.Json
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream


  def score(ruleset_1: String, ruleset_2:String) : Double = {

    val split_1 = ruleset_1.split(",").toSeq
    val split_2 = ruleset_2.split(",").toSeq
    if (split_1.length > 0 && split_2.length > 0) {
      return split_1.intersect(split_2).length.toDouble/split_1.union(split_2).distinct.length.toDouble
    } else {
      return 0
    }
  }

}

Source File: KryoInitSpec.scala From hydra with Apache License 2.0

5 votes

package hydra.core.akka

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.romix.scala.serialization.kryo.{
  EnumerationSerializer,
  ScalaImmutableAbstractMapSerializer,
  ScalaMutableMapSerializer
}
import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpecLike

class KryoInitSpec extends Matchers with AnyFlatSpecLike {

  "The custom KryoInit" should "register serializers" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    kryo.getDefaultSerializer(classOf[scala.Enumeration#Value]) shouldBe an[
      EnumerationSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.mutable.HashMap[_, _]]) shouldBe a[
      ScalaMutableMapSerializer
    ]
  }

  it should "serialize immutable maps" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    val map1 = Map(
      "Rome" -> "Italy",
      "London" -> "England",
      "Paris" -> "France",
      "New York" -> "USA",
      "Tokyo" -> "Japan",
      "Peking" -> "China",
      "Brussels" -> "Belgium"
    )
    val map2 = map1 + ("Moscow" -> "Russia")
    val map3 = map2 + ("Berlin" -> "Germany")
    val map4 = map3 + ("Germany" -> "Berlin", "Russia" -> "Moscow")
    roundTrip(map1, kryo)
    roundTrip(map2, kryo)
    roundTrip(map3, kryo)
    roundTrip(map4, kryo)
  }

  def roundTrip[T](obj: T, kryo: Kryo): T = {
    val outStream = new ByteArrayOutputStream()
    val output = new Output(outStream, 4096)
    kryo.writeClassAndObject(output, obj)
    output.flush()

    val input = new Input(new ByteArrayInputStream(outStream.toByteArray), 4096)
    val obj1 = kryo.readClassAndObject(input)

    assert(obj == obj1)

    obj1.asInstanceOf[T]
  }

}

Source File: BytecodeUtils.scala From graphx-algorithm with GNU General Public License v2.0

5 votes

package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.spark.util.Utils

import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._



  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM4) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM4) {
          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
}

Source File: RulesTxtDeploymentServiceSpec.scala From smui with Apache License 2.0

5 votes

package models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils
import org.scalatest.{FlatSpec, Matchers}

class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase {

  private lazy val service = injector.instanceOf[RulesTxtDeploymentService]
  private var inputIds: Seq[SearchInputId] = Seq.empty

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    createTestCores()
    inputIds = createTestRule()
  }

  private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith =>
                           |	SYNONYM: mercury
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |mercury =>
                           |	SYNONYM: aerosmith
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |shipping =>
                           |	DECORATE: REDIRECT http://xyz.com/shipping
                           |	@{
                           |	  "_log" : "${ruleIds.last}"
                           |	}@""".stripMargin

  "RulesTxtDeploymentService" should "generate rules files with correct file names" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    rulesTxt.solrIndexId shouldBe core1Id
    rulesTxt.decompoundRules shouldBe empty
    rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds)

    rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp"
    rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt"
  }

  it should "validate the rules files correctly" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty

    val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file"))
    service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction")
  }

  it should "provide a zip file with all rules files" in {
    val out = new ByteArrayOutputStream()
    service.writeAllRulesTxtFilesAsZipFileToStream(out)

    val bytes = out.toByteArray
    val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes))
    val firstEntry = zipStream.getNextEntry
    firstEntry.getName shouldBe "rules_core1.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds)
    val secondEntry = zipStream.getNextEntry
    secondEntry.getName shouldBe "rules_core2.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe ""
  }

}

Source File: AggregatorTest.scala From noether with Apache License 2.0

5 votes

package com.spotify.noether

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}

import com.twitter.algebird.Aggregator
import org.scalatest._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

trait AggregatorTest extends AnyFlatSpec with Matchers {
  def run[A, B, C](aggregator: Aggregator[A, B, C])(as: Seq[A]): C = {
    val bs = as.map(aggregator.prepare _ compose ensureSerializable)
    val b = ensureSerializable(aggregator.reduce(bs))
    ensureSerializable(aggregator.present(b))
  }

  private def serializeToByteArray(value: Any): Array[Byte] = {
    val buffer = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(buffer)
    oos.writeObject(value)
    buffer.toByteArray
  }

  private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = {
    val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue))
    ois.readObject()
  }

  private def ensureSerializable[T](value: T): T =
    deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T]
}

Source File: TestingTypedCount.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.execution

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
import org.apache.spark.sql.hive.execution.TestingTypedCount.State
import org.apache.spark.sql.types._

@ExpressionDescription(
  usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " +
          "but implements ObjectAggregateFunction.")
case class TestingTypedCount(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0)
  extends TypedImperativeAggregate[TestingTypedCount.State] {

  def this(child: Expression) = this(child, 0, 0)

  override def children: Seq[Expression] = child :: Nil

  override def dataType: DataType = LongType

  override def nullable: Boolean = false

  override def createAggregationBuffer(): State = TestingTypedCount.State(0L)

  override def update(buffer: State, input: InternalRow): State = {
    if (child.eval(input) != null) {
      buffer.count += 1
    }
    buffer
  }

  override def merge(buffer: State, input: State): State = {
    buffer.count += input.count
    buffer
  }

  override def eval(buffer: State): Any = buffer.count

  override def serialize(buffer: State): Array[Byte] = {
    val byteStream = new ByteArrayOutputStream()
    val dataStream = new DataOutputStream(byteStream)
    dataStream.writeLong(buffer.count)
    byteStream.toByteArray
  }

  override def deserialize(storageFormat: Array[Byte]): State = {
    val byteStream = new ByteArrayInputStream(storageFormat)
    val dataStream = new DataInputStream(byteStream)
    TestingTypedCount.State(dataStream.readLong())
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override val prettyName: String = "typed_count"
}

object TestingTypedCount {
  case class State(var count: Long)
}

Source File: SapSQLContextSuite.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.io.{ByteArrayOutputStream, ObjectOutputStream}

import org.apache.spark.sql.parser.SapParserException
import org.apache.spark.util.DummyRelationUtils._
import org.mockito.Mockito
import org.scalatest.FunSuite

class SapSQLContextSuite extends FunSuite with GlobalSapSQLContext {

  test("SQL contexts do not support hive functions") {
    val rdd = sc.parallelize(Seq(Row("1"), Row("2")))
    sqlc.createDataFrame(rdd, 'a.string, needsConversion = false)
      .registerTempTable("foo")

    intercept[AnalysisException] {
      sqlc.sql("SELECT int(a) FROM foo")
    }
  }

  test ("Check Spark Version"){
     val sap_sqlc = sqlContext.asInstanceOf[CommonSapSQLContext]
     // current spark runtime version shall be supported
     sap_sqlc.checkSparkVersion(List(org.apache.spark.SPARK_VERSION))

     // runtime exception for an unsupported version
     intercept[RuntimeException]{
      sap_sqlc.checkSparkVersion(List("some.unsupported.version"))
     }
  }

  test("Slightly different versions") {
    val sap_sqlc = sqlContext.asInstanceOf[CommonSapSQLContext]
    val spy_sap_sqlc = Mockito.spy(sap_sqlc)
    Mockito.when(spy_sap_sqlc.getCurrentSparkVersion())
      .thenReturn(org.apache.spark.SPARK_VERSION + "-CDH")

    // should not throw!
    spy_sap_sqlc.checkSparkVersion(spy_sap_sqlc.supportedVersions)

    Mockito.when(spy_sap_sqlc.getCurrentSparkVersion())
      .thenReturn("something- " + org.apache.spark.SPARK_VERSION)

    // should not throw!
    spy_sap_sqlc.checkSparkVersion(spy_sap_sqlc.supportedVersions)
  }

  test("Ensure SapSQLContext stays serializable"){
    // relevant for Bug 92818
    // Remember that all class references in SapSQLContext must be serializable!
    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
    oos.writeObject(sqlContext)
    oos.close()
  }

  test("Rand function") {
    sqlContext.sql(
      s"""
         |CREATE TABLE test (name varchar(20), age integer)
         |USING com.sap.spark.dstest
         |OPTIONS (
         |tableName "test"
         |)
       """.stripMargin)

    sqlContext.sql("SELECT * FROM test WHERE rand() < 0.1")
  }

  test("test version fields") {
    val sapSqlContext = sqlContext.asInstanceOf[CommonSapSQLContext]
    assert(sapSqlContext.EXTENSIONS_VERSION.isEmpty)
    assert(sapSqlContext.DATASOURCES_VERSION.isEmpty)
  }
}

Source File: SQLRunnerSuite.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package com.sap.spark.cli

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}

import org.apache.spark.SparkContext
import org.apache.spark.sql.{GlobalSapSQLContext, SQLContext}
import org.scalatest.{BeforeAndAfterEach, FunSuite, ShouldMatchers}



    // good call
    val goodOpts =
      SQLRunner.parseOpts(List("a.sql", "b.sql", "-o", "output.csv"))

    goodOpts.sqlFiles should be(List("a.sql", "b.sql"))
    goodOpts.output should be(Some("output.csv"))

    // bad call
    val badOpts = SQLRunner.parseOpts(List())

    badOpts.sqlFiles should be(List())
    badOpts.output should be(None)

    // ugly call
    val uglyOpts =
      SQLRunner.parseOpts(List("a.sql", "-o", "output.csv", "b.sql"))

    uglyOpts.sqlFiles should be(List("a.sql", "b.sql"))
    uglyOpts.output should be(Some("output.csv"))
  }

  def runSQLTest(input: String, expectedOutput: String): Unit = {
    val inputStream: InputStream = new ByteArrayInputStream(input.getBytes())
    val outputStream = new ByteArrayOutputStream()

    SQLRunner.sql(inputStream, outputStream)

    val output = outputStream.toString
    output should be(expectedOutput)
  }

  test("can run dummy query") {
    val input = "SELECT 1;"
    val output = "1\n"

    runSQLTest(input, output)
  }

  test("can run multiple dummy queries") {
    val input = """
        |SELECT 1;SELECT 2;
        |SELECT 3;
      """.stripMargin

    val output = "1\n2\n3\n"

    runSQLTest(input, output)
  }

  test("can run a basic example with tables") {
    val input = """
                  |SELECT * FROM DEMO_TABLE;
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }

  test("can run an example with comments") {
    val input = """
                  |SELECT * FROM DEMO_TABLE; -- this is the first query
                  |SELECT * FROM DEMO_TABLE LIMIT 1;
                  |-- now let's drop a table
                  |DROP TABLE DEMO_TABLE;
                """.stripMargin

    val output = "1,a\n2,b\n3,c\n1,a\n"

    runSQLTest(input, output)
  }
}

Source File: StyleChecker.scala From sbt-coursera with BSD 3-Clause "New" or "Revised" License

5 votes

package ch.epfl.lamp

import sbt.File
import java.io.ByteArrayOutputStream
import java.io.PrintStream
import org.scalastyle._
import Settings._

object StyleChecker {
  val maxResult = 100

  class CustomTextOutput[T <: FileSpec]() extends Output[T] {
    private val messageHelper = new MessageHelper(this.getClass().getClassLoader())

    var fileCount: Int = _
    override def message(m: Message[T]): Unit = m match {
      case StartWork() =>
      case EndWork() =>
      case StartFile(file) =>
        print("Checking file " + file + "...")
        fileCount = 0
      case EndFile(file) =>
        if (fileCount == 0) println(" OK!")
      case StyleError(file, clazz, key, level, args, line, column, customMessage) =>
        report(line, column, messageHelper.text(level.name),
          Output.findMessage(messageHelper, clazz, key, args, customMessage))
      case StyleException(file, clazz, message, stacktrace, line, column) =>
        report(line, column, "error", message)
    }

    private def report(line: Option[Int], column: Option[Int], level: String, message: String) {
      if (fileCount == 0) println("")
      fileCount += 1
      println("  " + fileCount + ". " + level + pos(line, column) + ":")
      println("     " + message)
    }

    private def pos(line: Option[Int], column: Option[Int]): String = line match {
      case Some(line) => " at line " + line + (column match {
        case Some(column) => " character " + column
        case None => ""
      })
      case None => ""
    }
  }

  def score(outputResult: OutputResult) = {
    val penalties = outputResult.errors + outputResult.warnings
    scala.math.max(maxResult - penalties, 0)
  }

  def assess(sources: Seq[File], styleSheetPath: String, courseId: String): (String, Int) = {
    val configFile = new File(styleSheetPath).getAbsolutePath

    val messages = new ScalastyleChecker().checkFiles(
      ScalastyleConfiguration.readFromXml(configFile),
      Directory.getFiles(None, sources))

    val output = new ByteArrayOutputStream()
    val outputResult = Console.withOut(new PrintStream(output)) {
      new CustomTextOutput().output(messages)
    }

    val msg =
      output.toString +
        "Processed " + outputResult.files + " file(s)\n" +
        "Found " + outputResult.errors + " errors\n" +
        "Found " + outputResult.warnings + " warnings\n" +
        (if (outputResult.errors + outputResult.warnings > 0) "Consult the style guide at %s/wiki/ScalaStyleGuide".format(baseURL(courseId)) else "")

    (msg, score(outputResult))
  }
}

Source File: RegisterNodeSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.RegisterNode
import justin.db.consistenthashing.NodeId
import org.scalatest.{FlatSpec, Matchers}

class RegisterNodeSerializerTest extends FlatSpec with Matchers {

  behavior of "RegisterNode Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[RegisterNode], RegisterNodeSerializer)

    // object
    val serializedData = RegisterNode(NodeId(1))

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[RegisterNode])

    serializedData shouldBe deserializedData
  }
}

Source File: DataSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class DataSerializerTest extends FlatSpec with Matchers {

  behavior of "Data Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[justin.db.Data], DataSerializer)

    // object
    val vClock         = VectorClock[NodeId](Map(NodeId(1) -> Counter(3)))
    val timestamp      = System.currentTimeMillis()
    val serializedData = Data(id = UUID.randomUUID(), value = "some value", vClock, timestamp)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[Data])

    serializedData shouldBe deserializedData
  }
}

Source File: StorageNodeWriteDataLocalSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.actors.protocol.StorageNodeWriteDataLocal
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeWriteDataLocalSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeWriteDataLocal Serializer"

  it should "serialize/deserialize StorageNodeWriteDataLocal" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer)

    // object
    val data = Data(
      id        = UUID.randomUUID(),
      value     = "some value",
      vclock    = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))),
      timestamp = System.currentTimeMillis()
    )
    val serializedData = StorageNodeWriteDataLocal(data)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeWriteDataLocal])

    serializedData shouldBe deserializedData
  }
}

Source File: StorageNodeLocalReadSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.StorageNodeLocalRead
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeLocalReadSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeLocalReader Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeLocalRead], StorageNodeLocalReadSerializer)

    // object
    val serializedData = StorageNodeLocalRead(UUID.randomUUID())

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeLocalRead])

    serializedData shouldBe deserializedData
  }
}

Source File: RocksDBStorage.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData}
import org.rocksdb.{FlushOptions, Options, RocksDB}

import scala.concurrent.Future

// TODO:
// Current version store every single data under one file (totally doesn't care about data originality).
// Data should be eventually splitted by ring partitionId.
// This might be an issue during possible data movements between nodes.
final class RocksDBStorage(dir: File) extends PluggableStorageProtocol {
  import RocksDBStorage._

  {
    RocksDB.loadLibrary()
  }

  private[this] val kryo = new Kryo()

  private[this] val db: RocksDB = {
    val options: Options = new Options().setCreateIfMissing(true)
    RocksDB.open(options, dir.getPath)
  }

  override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = {
    val key: Array[Byte] = uuid2bytes(kryo, id)
    val dataBytes: Array[Byte] = db.get(key)

    val justinDataOpt = Option(dataBytes).map { dataBytes =>
      val input = new Input(new ByteArrayInputStream(dataBytes))
      JustinDataSerializer.read(kryo, input, classOf[JustinData])
    }

    Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None))
  }

  override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = {
    val key: Array[Byte] = uuid2bytes(kryo, data.id)
    val dataBytes: Array[Byte] = {
      val output = new Output(new ByteArrayOutputStream())
      JustinDataSerializer.write(kryo, output, data)
      output.getBuffer
    }

    db.put(key, dataBytes)
    db.flush(new FlushOptions().setWaitForFlush(true))

    Ack.future
  }
}

object RocksDBStorage {

  def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = {
    val output = new Output(new ByteArrayOutputStream(), 16)
    UUIDSerializer.write(kryo, output, id)
    output.getBuffer
  }

  object UUIDSerializer extends Serializer[UUID] {
    override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = {
      new UUID(input.readLong, input.readLong)
    }

    override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = {
      output.writeLong(uuid.getMostSignificantBits)
      output.writeLong(uuid.getLeastSignificantBits)
    }
  }

  object JustinDataSerializer extends Serializer[JustinData] {
    override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = {
      JustinData(
        id        = UUIDSerializer.read(kryo, input, classOf[UUID]),
        value     = input.readString(),
        vclock    = input.readString(),
        timestamp = input.readLong()
      )
    }

    override def write(kryo: Kryo, output: Output, data: JustinData): Unit = {
      UUIDSerializer.write(kryo, output, data.id)
      output.writeString(data.value)
      output.writeString(data.vclock)
      output.writeLong(data.timestamp)
    }
  }
}

Source File: JustinDataSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.storage.RocksDBStorage.JustinDataSerializer
import org.scalatest.{FlatSpec, Matchers}

class JustinDataSerializerTest extends FlatSpec with Matchers {

  behavior of "JustinDataSerializer"

  it should "serialize/deserialize JustinData with Kryo" in {
    val kryo = new Kryo()
    val data = JustinData(
      id        = UUID.randomUUID,
      value     = "to jest przykladowa wartość",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )

    // serialize
    val output = new Output(new ByteArrayOutputStream())
    JustinDataSerializer.write(kryo, output, data)
    val dataBytes = output.getBuffer

    // deserialize
    val input = new Input(new ByteArrayInputStream(dataBytes))
    JustinDataSerializer.read(kryo, input, classOf[JustinData]) shouldBe data
  }
}

Source File: AvroRandomExtractor.scala From streamliner-examples with Apache License 2.0

5 votes

package com.memsql.spark.examples.avro

import com.memsql.spark.etl.api._
import com.memsql.spark.etl.utils.PhaseLogger
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.sql.{SQLContext, DataFrame, Row}
import org.apache.spark.sql.types._
import org.apache.avro.Schema
import org.apache.avro.generic.GenericData
import org.apache.avro.io.{DatumWriter, EncoderFactory}
import org.apache.avro.specific.SpecificDatumWriter

import java.io.ByteArrayOutputStream

// Generates an RDD of byte arrays, where each is a serialized Avro record.
class AvroRandomExtractor extends Extractor {
  var count: Int = 1
  var generator: AvroRandomGenerator = null
  var writer: DatumWriter[GenericData.Record] = null
  var avroSchema: Schema = null
  
  def schema: StructType = StructType(StructField("bytes", BinaryType, false) :: Nil)

  val parser: Schema.Parser = new Schema.Parser()

  override def initialize(ssc: StreamingContext, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Unit = {
    val userConfig = config.asInstanceOf[UserExtractConfig]
    val avroSchemaJson = userConfig.getConfigJsValue("avroSchema") match {
      case Some(s) => s
      case None => throw new IllegalArgumentException("avroSchema must be set in the config")
    }
    count = userConfig.getConfigInt("count").getOrElse(1)
    avroSchema = parser.parse(avroSchemaJson.toString)

    writer = new SpecificDatumWriter(avroSchema)
    generator = new AvroRandomGenerator(avroSchema)
  }

  override def next(ssc: StreamingContext, time: Long, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Option[DataFrame] = {
    val rdd = sqlContext.sparkContext.parallelize((1 to count).map(_ => Row({
      val out = new ByteArrayOutputStream
      val encoder = EncoderFactory.get().binaryEncoder(out, null)
      val avroRecord: GenericData.Record = generator.next().asInstanceOf[GenericData.Record]

      writer.write(avroRecord, encoder)
      encoder.flush
      out.close
      out.toByteArray
    })))

    Some(sqlContext.createDataFrame(rdd, schema))
  }
}

Source File: ResponseHelper.scala From ledger-manager-chrome with MIT License

5 votes

package co.ledger.wallet.core.net



import java.io.{ByteArrayOutputStream, StringWriter}
import java.nio.charset.Charset

import org.json.{JSONArray, JSONObject}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.scalajs.js
import scala.util.{Failure, Success}

object ResponseHelper {

  implicit class ResponseFuture(f: Future[HttpClient#Response]) {

    def json: Future[(JSONObject, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        (new JSONObject(body), response)
      }
    }

    def jsonArray: Future[(JSONArray, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        (new JSONArray(body), response)
      }
    }

    def string: Future[(String, HttpClient#Response)] = {
      f.bytes.map { case (body, response) =>
        val writer = new StringWriter(body.length)
        body foreach {(char) =>
          writer.append(char.toChar)
        }
        (writer.toString, response)
      }
    }

    def bytes: Future[(Array[Byte], HttpClient#Response)] = {
      f.map { response =>
        val input = response.body
        val output = new ByteArrayOutputStream()
        val buffer = new Array[Byte](4096)
        var read = 0
        while ({read = input.read(buffer); read} > 0) {
          output.write(buffer, 0, read)
        }
        val result = output.toByteArray
        input.close()
        output.close()
        (result, response)
      }
    }

    def noResponseBody: Future[HttpClient#Response] = {
      f.andThen {
        case Success(response) =>
          response.body.close()
          response
        case Failure(cause) =>
          throw cause
      }
    }

  }

}

Source File: utils.scala From spark-http-stream with BSD 2-Clause "Simplified" License

5 votes

package org.apache.spark.sql.execution.streaming.http

import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.TimestampType
import org.apache.spark.SparkConf
import org.apache.commons.io.IOUtils
import org.apache.spark.serializer.KryoSerializer
import java.io.InputStream
import com.esotericsoftware.kryo.io.Input
import java.io.ByteArrayOutputStream

class WrongArgumentException(name: String, value: Any)
		extends RuntimeException(s"wrong argument: $name=$value") {
}

class MissingRequiredArgumentException(map: Map[String, String], paramName: String)
		extends RuntimeException(s"missing required argument: $paramName, all parameters=$map") {
}

class InvalidSerializerNameException(serializerName: String)
		extends RuntimeException(s"invalid serializer name: $serializerName") {
}

object SchemaUtils {
	def buildSchema(schema: StructType, includesTimestamp: Boolean, timestampColumnName: String = "_TIMESTAMP_"): StructType = {
		if (!includesTimestamp)
			schema;
		else
			StructType(schema.fields.toSeq :+ StructField(timestampColumnName, TimestampType, false));
	}
}

object Params {
	
	def deserialize(bytes: Array[Byte]): Any = {
		val kryo = kryoSerializer.newKryo();
		val input = new Input();
		input.setBuffer(bytes);
		kryo.readClassAndObject(input);
	}
}

java.io.ByteArrayOutputStream Scala Examples