com.esotericsoftware.kryo.Kryo Scala Examples

The following examples show how to use com.esotericsoftware.kryo.Kryo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: GraphKryoRegistrator.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx

import com.esotericsoftware.kryo.Kryo

import org.apache.spark.serializer.KryoRegistrator
import org.apache.spark.util.BoundedPriorityQueue
import org.apache.spark.util.collection.BitSet

import org.apache.spark.graphx.impl._
import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
import org.apache.spark.util.collection.OpenHashSet


@deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0")
class GraphKryoRegistrator extends KryoRegistrator {

  def registerClasses(kryo: Kryo) {
    kryo.register(classOf[Edge[Object]])
    kryo.register(classOf[(VertexId, Object)])
    kryo.register(classOf[EdgePartition[Object, Object]])
    kryo.register(classOf[BitSet])
    kryo.register(classOf[VertexIdToIndexMap])
    kryo.register(classOf[VertexAttributeBlock[Object]])
    kryo.register(classOf[PartitionStrategy])
    kryo.register(classOf[BoundedPriorityQueue[Object]])
    kryo.register(classOf[EdgeDirection])
    kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]])
    kryo.register(classOf[OpenHashSet[Int]])
    kryo.register(classOf[OpenHashSet[Long]])
  }
} 
Example 2
Source File: LinearTransformLayer.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
package com.github.nearbydelta.deepspark.layer

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.github.nearbydelta.deepspark.data._

import scala.collection.parallel.ParSeq


      val dGdx: DataVec = weight.value.t * dGdX

      (dGdW, dGdx)
    }.unzip

    (external, ParSeq(weight -= dW))
  }

  override def initiateBy(builder: WeightBuilder): this.type = {
    if (NIn > 0 && NOut > 0) {
      val range = act.initialize(NIn, NOut)
      builder.buildMatrix(weight, NOut, NIn, range)
    }

    this
  }

  override def loss: Double = weight.loss

  override def read(kryo: Kryo, input: Input): Unit = {
    act = kryo.readClassAndObject(input).asInstanceOf[Activation]
    weight.read(kryo, input)
    super.read(kryo, input)
  }

  override def write(kryo: Kryo, output: Output): Unit = {
    kryo.writeClassAndObject(output, act)
    weight.write(kryo, output)
    super.write(kryo, output)
  }
} 
Example 3
Source File: Ledger.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
package com.github.nearbydelta.deepspark.word.layer

import breeze.linalg.DenseVector
import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.layer.InputLayer
import com.github.nearbydelta.deepspark.word._
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast

import scala.reflect.{ClassTag, classTag}


trait Ledger[OutInfo] extends InputLayer[Array[Int], OutInfo] {
  @transient implicit override protected val evidenceI: ClassTag[Array[Int]] = classTag[Array[Int]]
  @transient var algorithm: LedgerAlgorithm = _
  var bcModel: Broadcast[LedgerModel] = _
  @transient var builder: LedgerBuilder = _
  var dimension: Int = 0
  @transient var model: LedgerModel = _
  protected var padID = -1

  def withModel(model: LedgerModel, builder: LedgerBuilder): this.type = {
    this.model = model
    this.builder = builder
    this.padID = model.padID
    this.dimension = model.dimension
    this.algorithm = builder.getUpdater(this.model.vectors)
    this
  }

  protected def pad =
    if (padID == -1) null
    else if (bcModel != null) vectorOf(bcModel.value.padID)
    else vectorOf(padID)

  protected def updateWord(word: Int, dx: DataVec): Unit =
    if (word != -1 && algorithm != null) {
      val vec = algorithm.delta.getOrElseUpdate(word, DenseVector.zeros[Double](dimension))
      vec += dx
    }

  protected def vectorOf(str: Int) =
    if (bcModel != null) bcModel.value.vectorAt(str)
    else model.vectorAt(str)

  override def broadcast(sc: SparkContext): Unit = {
    bcModel = sc.broadcast(model)
  }

  override def loss: Double = algorithm.loss

  override def read(kryo: Kryo, input: Input): Unit = {
    builder = kryo.readClassAndObject(input).asInstanceOf[LedgerBuilder]
    val model = new LedgerModel
    model.read(kryo, input)

    require(model.size > 0, "Model is empty!")
    withModel(model, builder)
    super.read(kryo, input)
  }

  override def unbroadcast(): Unit = {
    bcModel.unpersist(blocking = false)
  }

  @deprecated
  override def withInput(in: Int): this.type = this

  @deprecated
  override def withOutput(out: Int): this.type = this

  override def write(kryo: Kryo, output: Output): Unit = {
    kryo.writeClassAndObject(output, builder)
    model.write(kryo, output)
    super.write(kryo, output)
  }
} 
Example 4
Source File: FixedLedger.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
package com.github.nearbydelta.deepspark.word.layer

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.layer.InputLayer
import com.github.nearbydelta.deepspark.word._
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast

import scala.collection.parallel.ParSeq
import scala.reflect.{ClassTag, classTag}


trait FixedLedger[OutInfo] extends InputLayer[Array[Int], OutInfo] {
  @transient implicit override protected val evidenceI: ClassTag[Array[Int]] = classTag[Array[Int]]
  var bcModel: Broadcast[LedgerModel] = _
  @transient var model: LedgerModel = _
  protected var padID = -1

  def withModel(model: LedgerModel): this.type = {
    this.model = model
    this.padID = model.padID
    this
  }

  protected def pad =
    if (padID == -1) null
    else if (bcModel != null) vectorOf(bcModel.value.padID)
    else vectorOf(padID)

  protected def vectorOf(str: Int) =
    if (bcModel != null) bcModel.value.vectorAt(str)
    else model.vectorAt(str)

  override def backprop(seq: ParSeq[((Array[Int], OutInfo), DataVec)]): (ParSeq[DataVec], ParSeq[() ⇒ Unit]) =
    (null, ParSeq())

  override def broadcast(sc: SparkContext): Unit = {
    bcModel = sc.broadcast(model)
  }

  override def loss: Double = 0.0

  override def read(kryo: Kryo, input: Input): Unit = {
    val model = new LedgerModel
    model.read(kryo, input)
    withModel(model)
    super.read(kryo, input)
  }

  override def unbroadcast(): Unit = {
    bcModel.unpersist(blocking = false)
  }

  @deprecated
  override def withInput(in: Int): this.type = this

  @deprecated
  override def withOutput(out: Int): this.type = this

  override def write(kryo: Kryo, output: Output): Unit = {
    model.write(kryo, output)
    super.write(kryo, output)
  }
} 
Example 5
Source File: GeneralNetwork.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
package com.github.nearbydelta.deepspark.network

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.github.nearbydelta.deepspark.data._
import com.github.nearbydelta.deepspark.layer.InputLayer
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import scala.collection.mutable.ArrayBuffer
import scala.collection.parallel.ParSeq


class GeneralNetwork[In, Out](var inputLayer: InputLayer[In, _]) extends Network[In, Out] {
  @deprecated(message = "This is for kryo deserialization. Please use this(inputlayer)")
  def this() = this(null)

  override def NOut: Int =
    layerSeq.lastOption match {
      case Some(x) ⇒ x.NOut
      case None if inputLayer != null ⇒ inputLayer.NOut
      case None ⇒ 0
    }

  override def backward(error: ParSeq[DataVec]): ArrayBuffer[() ⇒ Unit] = {
    val (upper, fseq) = backwardSeq(error)
    val (x, f) = inputLayer backward upper
    fseq ++= f.seq
    fseq
  }

  override def broadcast(sc: SparkContext): Unit = {
    inputLayer.broadcast(sc)
    super.broadcast(sc)
  }

  override def forward(in: In) = {
    val out = inputLayer.forward(in)
    forwardSingle(out)
  }

  override def forward(in: ParSeq[In]): ParSeq[DataVec] = {
    val out = inputLayer.forward(in)
    forwardSeq(out)
  }

  override def forward(in: RDD[(Long, In)]): RDD[(Long, DataVec)] = {
    val out = inputLayer.forward(in)
    broadcast(in.context)
    forwardRDD(out)
  }

  override def initiateBy(builder: WeightBuilder): this.type = {
    inputLayer.initiateBy(builder)
    super.initiateBy(builder)
    this
  }

  override def loss: Double = super.loss + inputLayer.loss

  override def read(kryo: Kryo, input: Input): Unit = {
    inputLayer = kryo.readClassAndObject(input).asInstanceOf[InputLayer[In, _]]
    super.read(kryo, input)
  }

  override def setUpdatable(bool: Boolean): Network[In, Out] = {
    inputLayer.setUpdatable(bool)
    super.setUpdatable(bool)
  }

  override def unbroadcast(): Unit = {
    inputLayer.unbroadcast()
    super.unbroadcast()
  }

  override def write(kryo: Kryo, output: Output): Unit = {
    kryo.writeClassAndObject(output, inputLayer)
    super.write(kryo, output)
  }
} 
Example 6
Source File: KryoSerializerDistributedSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import org.apache.spark.util.Utils

import com.esotericsoftware.kryo.Kryo

import org.apache.spark._
import org.apache.spark.serializer.KryoDistributedTest._

class KryoSerializerDistributedSuite extends SparkFunSuite {

  test("kryo objects are serialised consistently in different processes") {
    val conf = new SparkConf(false)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
      .set("spark.task.maxFailures", "1")

    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
    conf.setJars(List(jar.getPath))

    val sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
    val original = Thread.currentThread.getContextClassLoader
    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
    SparkEnv.get.serializer.setDefaultClassLoader(loader)

    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()

    // Randomly mix the keys so that the join below will require a shuffle with each partition
    // sending data to multiple other partitions.
    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}

    // Join the two RDDs, and force evaluation
    assert(shuffledRDD.join(cachedRDD).collect().size == 1)

    LocalSparkContext.stop(sc)
  }
}

object KryoDistributedTest {
  class MyCustomClass

  class AppJarRegistrator extends KryoRegistrator {
    override def registerClasses(k: Kryo) {
      val classLoader = Thread.currentThread.getContextClassLoader
      // scalastyle:off classforname
      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
      // scalastyle:on classforname
    }
  }

  object AppJarRegistrator {
    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
  }
} 
Example 7
Source File: SparkSqlSerializer.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution

import java.nio.ByteBuffer
import java.util.{HashMap => JavaHashMap}

import scala.reflect.ClassTag
import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.twitter.chill.ResourcePool
import org.apache.spark.serializer.{KryoSerializer, SerializerInstance}
import org.apache.spark.sql.types.{Decimal, StructField, StructType}
import org.apache.spark.util.MutablePair
import org.apache.spark.{SparkConf, SparkEnv}


//private[sql]
class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
  override def newKryo(): Kryo = {
    val kryo = super.newKryo()
    kryo.setRegistrationRequired(false)
    kryo.register(classOf[MutablePair[_, _]])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericMutableRow])
    kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer)
    kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer)

    kryo.register(classOf[Decimal])
    kryo.register(classOf[JavaHashMap[_, _]])

    // APS
    kryo.register(classOf[StructType])
    kryo.register(classOf[StructField])

    kryo.setReferences(false)
    kryo
  }
}

private[execution] class KryoResourcePool(size: Int)
  extends ResourcePool[SerializerInstance](size) {

  val ser: SparkSqlSerializer = {
    val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
    new SparkSqlSerializer(sparkConf)
  }

  def newInstance(): SerializerInstance = ser.newInstance()
}

//private[sql]
object SparkSqlSerializer {
  @transient lazy val resourcePool = new KryoResourcePool(30)

  private[this] def acquireRelease[O](fn: SerializerInstance => O): O = {
    val kryo = resourcePool.borrow
    try {
      fn(kryo)
    } finally {
      resourcePool.release(kryo)
    }
  }

  def serialize[T: ClassTag](o: T): Array[Byte] =
    acquireRelease { k =>
      k.serialize(o).array()
    }

  def deserialize[T: ClassTag](bytes: Array[Byte]): T =
    acquireRelease { k =>
      k.deserialize[T](ByteBuffer.wrap(bytes))
    }
}

private[sql] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) {
    // TODO: There are probably more efficient representations than strings...
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

private[sql] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: BigDecimal) {
    // TODO: There are probably more efficient representations than strings...
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
} 
Example 8
Source File: GraphKryoRegistrator.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx

import com.esotericsoftware.kryo.Kryo

import org.apache.spark.serializer.KryoRegistrator
import org.apache.spark.util.BoundedPriorityQueue
import org.apache.spark.util.collection.BitSet

import org.apache.spark.graphx.impl._
import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
import org.apache.spark.util.collection.OpenHashSet


@deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0")
class GraphKryoRegistrator extends KryoRegistrator {

  def registerClasses(kryo: Kryo) {
    kryo.register(classOf[Edge[Object]])
    kryo.register(classOf[(VertexId, Object)])
    kryo.register(classOf[EdgePartition[Object, Object]])
    kryo.register(classOf[BitSet])
    kryo.register(classOf[VertexIdToIndexMap])
    kryo.register(classOf[VertexAttributeBlock[Object]])
    kryo.register(classOf[PartitionStrategy])
    kryo.register(classOf[BoundedPriorityQueue[Object]])
    kryo.register(classOf[EdgeDirection])
    kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]])
    kryo.register(classOf[OpenHashSet[Int]])
    kryo.register(classOf[OpenHashSet[Long]])
  }
} 
Example 9
Source File: KryoSerializerDistributedSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import com.esotericsoftware.kryo.Kryo

import org.apache.spark._
import org.apache.spark.internal.config
import org.apache.spark.serializer.KryoDistributedTest._
import org.apache.spark.util.Utils

class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContext {

  test("kryo objects are serialised consistently in different processes") {
    val conf = new SparkConf(false)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
      .set(config.MAX_TASK_FAILURES, 1)
      .set(config.BLACKLIST_ENABLED, false)

    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
    conf.setJars(List(jar.getPath))

    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
    val original = Thread.currentThread.getContextClassLoader
    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
    SparkEnv.get.serializer.setDefaultClassLoader(loader)

    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()

    // Randomly mix the keys so that the join below will require a shuffle with each partition
    // sending data to multiple other partitions.
    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}

    // Join the two RDDs, and force evaluation
    assert(shuffledRDD.join(cachedRDD).collect().size == 1)
  }
}

object KryoDistributedTest {
  class MyCustomClass

  class AppJarRegistrator extends KryoRegistrator {
    override def registerClasses(k: Kryo) {
      val classLoader = Thread.currentThread.getContextClassLoader
      // scalastyle:off classforname
      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
      // scalastyle:on classforname
    }
  }

  object AppJarRegistrator {
    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
  }
} 
Example 10
Source File: BasicLayer.scala    From deepspark   with GNU General Public License v2.0 5 votes vote down vote up
package com.github.nearbydelta.deepspark.layer

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.github.nearbydelta.deepspark.data._

import scala.collection.parallel.ParSeq


      val dGdx: DataVec = weight.value.t * dGdX

      (dGdX, dGdW, dGdx)
    }.unzip3

    (external, ParSeq(bias -= dX, weight -= dW))
  }

  override def initiateBy(builder: WeightBuilder): this.type = {
    if (NIn > 0 && NOut > 0) {
      val range = act.initialize(NIn, NOut)
      builder.buildMatrix(weight, NOut, NIn, range)
      builder.buildVector(bias, NOut, range)
    }

    this
  }

  override def loss: Double = weight.loss + bias.loss

  override def read(kryo: Kryo, input: Input): Unit = {
    act = kryo.readClassAndObject(input).asInstanceOf[Activation]
    weight.read(kryo, input)
    bias.read(kryo, input)
    super.read(kryo, input)
  }

  override def write(kryo: Kryo, output: Output): Unit = {
    kryo.writeClassAndObject(output, act)
    weight.write(kryo, output)
    bias.write(kryo, output)
    super.write(kryo, output)
  }
} 
Example 11
Source File: OpKryoRegistrator.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.utils.kryo

import java.util.TreeMap

import com.esotericsoftware.kryo.{Kryo, Registration}
import com.esotericsoftware.kryo.serializers.DefaultSerializers.TreeMapSerializer
import com.salesforce.op.utils.stats.StreamingHistogram
import com.salesforce.op.utils.stats.StreamingHistogram.{StreamingHistogramBuilder, StreamingHistogramComparator}
import com.twitter.chill.algebird.AlgebirdRegistrar
import com.twitter.chill.avro.AvroSerializer
import org.apache.avro.generic.GenericData
import org.apache.avro.specific.SpecificRecordBase
import org.apache.spark.serializer.KryoRegistrator

import scala.collection.mutable.{WrappedArray => MWrappedArray}
import scala.reflect._


class OpKryoRegistrator extends KryoRegistrator {

  protected def doAvroRegistration[T <: SpecificRecordBase : ClassTag](kryo: Kryo): Registration =
    kryo.register(classTag[T].runtimeClass, AvroSerializer.SpecificRecordBinarySerializer[T])

  protected def doClassRegistration(kryo: Kryo)(seqPC: Class[_]*): Unit =
    seqPC.foreach { pC =>
      kryo.register(pC)
      // also register arrays of that class
      val arrayType = java.lang.reflect.Array.newInstance(pC, 0).getClass
      kryo.register(arrayType)
    }

  final override def registerClasses(kryo: Kryo): Unit = {
    doClassRegistration(kryo)(
      classOf[org.apache.avro.generic.GenericData],
      scala.collection.immutable.Map.empty[Any, Any].getClass
    )
    doClassRegistration(kryo)(
      OpKryoClasses.ArraysOfPrimitives: _*
    )
    // Avro generic-data array deserialization fails - hence providing workaround
    kryo.register(
      classOf[GenericData.Array[_]],
      new GenericJavaCollectionSerializer(classOf[java.util.ArrayList[_]])
    )

    new AlgebirdRegistrar().apply(kryo)
    registerCustomClasses(kryo)

    // Streaming histogram registration
    kryo.register(classOf[StreamingHistogram])
    kryo.register(classOf[StreamingHistogramBuilder])
    kryo.register(classOf[StreamingHistogramComparator])
    kryo.register(classOf[TreeMap[_, _]], new TreeMapSerializer())

    // Mutable wrapped arrays
    OpKryoClasses.WrappedArrays.foreach(kryo.register)

  }

  
  )

  lazy val WrappedArrays: Seq[Class[_]] = Seq(
    MWrappedArray.make(Array[Boolean]()).getClass,
    MWrappedArray.make(Array[Byte]()).getClass,
    MWrappedArray.make(Array[Char]()).getClass,
    MWrappedArray.make(Array[Double]()).getClass,
    MWrappedArray.make(Array[Float]()).getClass,
    MWrappedArray.make(Array[Int]()).getClass,
    MWrappedArray.make(Array[Long]()).getClass,
    MWrappedArray.make(Array[Short]()).getClass,
    MWrappedArray.make(Array[String]()).getClass
  )
} 
Example 12
Source File: KryoSerializerDistributedSuite.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import org.apache.spark.util.Utils

import com.esotericsoftware.kryo.Kryo

import org.apache.spark._
import org.apache.spark.serializer.KryoDistributedTest._

class KryoSerializerDistributedSuite extends SparkFunSuite {

  test("kryo objects are serialised consistently in different processes") {
    val conf = new SparkConf(false)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
      .set("spark.task.maxFailures", "1")

    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
    conf.setJars(List(jar.getPath))

    val sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
    val original = Thread.currentThread.getContextClassLoader
    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
    SparkEnv.get.serializer.setDefaultClassLoader(loader)

    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()

    // Randomly mix the keys so that the join below will require a shuffle with each partition
    // sending data to multiple other partitions.
    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}

    // Join the two RDDs, and force evaluation
    assert(shuffledRDD.join(cachedRDD).collect().size == 1)

    LocalSparkContext.stop(sc)
  }
}

object KryoDistributedTest {
  class MyCustomClass

  class AppJarRegistrator extends KryoRegistrator {
    override def registerClasses(k: Kryo) {
      val classLoader = Thread.currentThread.getContextClassLoader
      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
    }
  }

  object AppJarRegistrator {
    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
  }
} 
Example 13
Source File: GraphKryoRegistrator.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx

import com.esotericsoftware.kryo.Kryo

import org.apache.spark.serializer.KryoRegistrator
import org.apache.spark.util.BoundedPriorityQueue
import org.apache.spark.util.collection.BitSet

import org.apache.spark.graphx.impl._
import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
import org.apache.spark.util.collection.OpenHashSet


@deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0")
class GraphKryoRegistrator extends KryoRegistrator {

  def registerClasses(kryo: Kryo) {
    kryo.register(classOf[Edge[Object]])
    kryo.register(classOf[(VertexId, Object)])
    kryo.register(classOf[EdgePartition[Object, Object]])
    kryo.register(classOf[BitSet])
    kryo.register(classOf[VertexIdToIndexMap])
    kryo.register(classOf[VertexAttributeBlock[Object]])
    kryo.register(classOf[PartitionStrategy])
    kryo.register(classOf[BoundedPriorityQueue[Object]])
    kryo.register(classOf[EdgeDirection])
    kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]])
    kryo.register(classOf[OpenHashSet[Int]])
    kryo.register(classOf[OpenHashSet[Long]])
  }
} 
Example 14
Source File: KryoSerializerDistributedSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import com.esotericsoftware.kryo.Kryo

import org.apache.spark._
import org.apache.spark.internal.config
import org.apache.spark.serializer.KryoDistributedTest._
import org.apache.spark.util.Utils

class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContext {

  test("kryo objects are serialised consistently in different processes") {
    val conf = new SparkConf(false)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
      .set(config.MAX_TASK_FAILURES, 1)
      .set(config.BLACKLIST_ENABLED, false)

    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
    conf.setJars(List(jar.getPath))

    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
    val original = Thread.currentThread.getContextClassLoader
    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
    SparkEnv.get(sc._sparkUser).serializer.setDefaultClassLoader(loader)

    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()

    // Randomly mix the keys so that the join below will require a shuffle with each partition
    // sending data to multiple other partitions.
    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}

    // Join the two RDDs, and force evaluation
    assert(shuffledRDD.join(cachedRDD).collect().size == 1)
  }
}

object KryoDistributedTest {
  class MyCustomClass

  class AppJarRegistrator extends KryoRegistrator {
    override def registerClasses(k: Kryo) {
      val classLoader = Thread.currentThread.getContextClassLoader
      // scalastyle:off classforname
      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
      // scalastyle:on classforname
    }
  }

  object AppJarRegistrator {
    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
  }
} 
Example 15
Source File: HailKryoRegistrator.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.kryo

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.serializers.JavaSerializer
import is.hail.annotations.{Region, UnsafeIndexedSeq, UnsafeRow}
import is.hail.utils.{Interval, SerializableHadoopConfiguration}
import is.hail.variant.Locus
import org.apache.spark.serializer.KryoRegistrator
import org.apache.spark.sql.catalyst.expressions.GenericRow

class HailKryoRegistrator extends KryoRegistrator {
  override def registerClasses(kryo: Kryo) {
    kryo.register(classOf[SerializableHadoopConfiguration], new JavaSerializer())
    kryo.register(classOf[UnsafeRow])
    kryo.register(classOf[GenericRow])
    kryo.register(classOf[Locus])
    kryo.register(classOf[Interval])
    kryo.register(classOf[UnsafeIndexedSeq])
    kryo.register(classOf[Region])
  }
} 
Example 16
Source File: TimeSeriesKryoRegistrator.scala    From spark-timeseries   with Apache License 2.0 5 votes vote down vote up
package com.cloudera.sparkts

import com.esotericsoftware.kryo.{Serializer, Kryo}
import com.esotericsoftware.kryo.io.{Output, Input}

import org.apache.spark.SparkConf
import org.apache.spark.serializer.{KryoRegistrator, KryoSerializer}
import com.cloudera.sparkts.TimeSeriesUtils._

import java.time._

class TimeSeriesKryoRegistrator extends KryoRegistrator {
  def registerClasses(kryo: Kryo): Unit = {
    kryo.register(classOf[TimeSeries[_]])
    kryo.register(classOf[UniformDateTimeIndex])
    kryo.register(classOf[IrregularDateTimeIndex])
    kryo.register(classOf[BusinessDayFrequency])
    kryo.register(classOf[DayFrequency])
    kryo.register(classOf[ZonedDateTime], new DateTimeSerializer)
  }
}

class DateTimeSerializer extends Serializer[ZonedDateTime] {
  def write(kryo: Kryo, out: Output, dt: ZonedDateTime): Unit = {
    out.writeLong(zonedDateTimeToLong(dt), true)
  }

  def read(kryo: Kryo, in: Input, clazz: Class[ZonedDateTime]): ZonedDateTime = {
    longToZonedDateTime(in.readLong(true), ZoneId.systemDefault())
  }
}

object TimeSeriesKryoRegistrator {
  def registerKryoClasses(conf: SparkConf): Unit = {
    conf.set("spark.serializer", classOf[KryoSerializer].getName)
    conf.set("spark.kryo.registrator", classOf[TimeSeriesKryoRegistrator].getName)
  }
} 
Example 17
Source File: IkeKryoRegistrator.scala    From ike   with Apache License 2.0 5 votes vote down vote up
package org.allenai.ike

import org.allenai.ike.patterns.NamedPattern

import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer }
import org.apache.spark.serializer.KryoRegistrator


class IkeKryoRegistrator extends KryoRegistrator {
  override def registerClasses(kryo: Kryo): Unit = {
    OptionSerializers.register(kryo)
    kryo.register(Class.forName("scala.collection.immutable.Nil$"))

    val classes: Array[Class[_]] = Array(
      classOf[BlackLabResult],
      classOf[Interval],
      classOf[WordData],
      classOf[java.time.Instant],
      classOf[java.time.LocalDate],
      classOf[java.time.Year]
    )

    classes.foreach(kryo.register)
  }
} 
Example 18
Source File: KryoSerializerDistributedSuite.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import org.apache.spark.util.Utils

import com.esotericsoftware.kryo.Kryo
import org.scalatest.FunSuite

import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv, TestUtils}
import org.apache.spark.serializer.KryoDistributedTest._

class KryoSerializerDistributedSuite extends FunSuite {

  test("kryo objects are serialised consistently in different processes") {
    val conf = new SparkConf(false)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
      .set("spark.task.maxFailures", "1")

    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
    conf.setJars(List(jar.getPath))

    val sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
    val original = Thread.currentThread.getContextClassLoader
    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
    SparkEnv.get.serializer.setDefaultClassLoader(loader)

    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()

    // Randomly mix the keys so that the join below will require a shuffle with each partition
    // sending data to multiple other partitions.
    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}

    // Join the two RDDs, and force evaluation
    assert(shuffledRDD.join(cachedRDD).collect().size == 1)

    LocalSparkContext.stop(sc)
  }
}

object KryoDistributedTest {
  class MyCustomClass

  class AppJarRegistrator extends KryoRegistrator {
    override def registerClasses(k: Kryo) {
      val classLoader = Thread.currentThread.getContextClassLoader
      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
    }
  }

  object AppJarRegistrator {
    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
  }
} 
Example 19
Source File: ReadingWritingData.scala    From Spark-RSVD   with Apache License 2.0 5 votes vote down vote up
package com.criteo.rsvd

import java.nio.ByteBuffer

import com.esotericsoftware.kryo.Kryo
import com.typesafe.scalalogging.slf4j.StrictLogging
import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{BytesWritable, NullWritable}
import org.apache.spark.mllib.linalg.distributed.MatrixEntry
import org.apache.spark.rdd.RDD
import org.apache.spark.serializer.{KryoRegistrator, KryoSerializer}
import org.apache.spark.{SparkConf, SparkContext}

import scala.reflect.ClassTag

object ReadingWritingData extends StrictLogging {

  def getInputDataSizeMB(inputPathPattern: String, sc: SparkContext): Int = {
    val fs = FileSystem.get(sc.hadoopConfiguration)
    val path = new Path(inputPathPattern)
    (fs.globStatus(path).map(f => f.getLen).sum / 1024 / 1024).toInt
  }

  def loadMatrixEntries(inputPath: String,
                        singlePartitionSizeMB: Int,
                        sc: SparkContext): RDD[MatrixEntry] = {

    logger.info(s"Input matrix path: $inputPath")
    val inputDataSizeMB = getInputDataSizeMB(inputPath + "
  def makeRddFromKryoFile[T: ClassTag](
      sc: SparkContext,
      path: String,
      minPartitionsOpt: Option[Int] = None): RDD[T] = {
    val minPartitions = minPartitionsOpt.getOrElse(sc.defaultMinPartitions)
    val serializer = new KryoSerializer(sc.getConf)
    sc.sequenceFile(path,
                    classOf[NullWritable],
                    classOf[BytesWritable],
                    minPartitions)
      .mapPartitions { it =>
        val instance = serializer.newInstance()
        it.flatMap {
          case (_, v) =>
            instance.deserialize[Array[T]](ByteBuffer.wrap(v.getBytes))
        }
      }
  }

  object RandomizedSVDKryoRegistrator extends KryoRegistrator {

    def registerClasses(kryo: Kryo): Unit = {
      UnmodifiableCollectionsSerializer.registerSerializers(kryo)
      kryo.register(classOf[MatrixEntry])
      kryo.register(classOf[Array[MatrixEntry]])
    }
  }

  def appendBasicRegistratorToSparkConf(sparkConf: SparkConf): SparkConf =
    appendRegistratorToSparkConf(sparkConf,
                                 RandomizedSVDKryoRegistrator.getClass.getName)

  def appendRegistratorToSparkConf(sparkConf: SparkConf,
                                   registratorName: String): SparkConf = {
    val oldValue = sparkConf.get("spark.kryo.registrator", "")
    if (oldValue == "") {
      sparkConf.set("spark.kryo.registrator", registratorName)
    } else {
      sparkConf.set("spark.kryo.registrator", oldValue + "," + registratorName)
    }
  }

} 
Example 20
Source File: KryoSerializerInit.scala    From nexus-kg   with Apache License 2.0 5 votes vote down vote up
package ch.epfl.bluebrain.nexus.kg.serializers

import java.nio.file.Path

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import io.altoo.akka.serialization.kryo.DefaultKryoInitializer
import io.altoo.akka.serialization.kryo.serializer.scala.ScalaKryo

class PathSerializer extends Serializer[Path] {

  override def write(kryo: Kryo, output: Output, path: Path): Unit =
    output.writeString(path.toString)

  override def read(kryo: Kryo, input: Input, `type`: Class[Path]): Path =
    Path.of(input.readString())
}

class KryoSerializerInit extends DefaultKryoInitializer {

  override def postInit(kryo: ScalaKryo): Unit = {
    super.postInit(kryo)
    kryo.addDefaultSerializer(classOf[Path], classOf[PathSerializer])
    kryo.register(classOf[Path], new PathSerializer)
    ()
  }
} 
Example 21
Source File: SparkSerializer.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package com.truecar.mleap.spark.benchmark.util

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.ScalaKryoInstantiator


object SparkSerializer {
  def apply(): SparkSerializer = {
    val kryoInstantiator = new ScalaKryoInstantiator()
    kryoInstantiator.setRegistrationRequired(false)
    val kryo = kryoInstantiator.newKryo()
    kryo.setClassLoader(Thread.currentThread.getContextClassLoader)

    SparkSerializer(kryo)
  }
}

case class SparkSerializer(kryo: Kryo) {
  def write[T](obj: T, output: Output) = {
    kryo.writeClassAndObject(output, obj)
  }

  def read[T](input: Input): T = {
    kryo.readClassAndObject(input).asInstanceOf[T]
  }
} 
Example 22
Source File: ModelStateSerializerKryo.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.spark

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.lightbend.model.winerecord.WineRecord
import com.lightbend.modelserving.model.ModelFactoryResolver
import org.apache.spark.serializer.KryoRegistrator


  override def write(kryo: Kryo, output: Output, value: ModelState): Unit = {
    val start = System.currentTimeMillis()
    output.writeLong(value.name.length)
    output.write(value.name.getBytes)
    output.writeLong(value.model.getType.value.toLong)
    val bytes = value.model.toBytes
    output.writeLong(bytes.length)
    output.write(bytes)
    println(s"KRYO serialization in ${System.currentTimeMillis() - start} ms")
  }
}

object ModelStateSerializerKryo{

  // Model Factory resolver
  private var resolver : ModelFactoryResolver[WineRecord, Double] = _

  // This method has to be invoked before execution starts
  def setResolver(res : ModelFactoryResolver[WineRecord, Double]) : Unit = resolver = res
  // Ensure that resolver is set
  private def validateResolver() : Unit = if(resolver == null) throw new Exception("Model factory resolver is not set")
}

class ModelStateRegistrator extends KryoRegistrator {
  override def registerClasses(kryo: Kryo) {
    kryo.register(classOf[ModelState], new ModelStateSerializerKryo())
  }
} 
Example 23
Source File: JTraversableSerializer.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.coders.instances.kryo

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, InputChunked, Output, OutputChunked}
import com.twitter.chill.KSerializer

import scala.jdk.CollectionConverters._
import scala.collection.mutable
import scala.collection.compat._


private[coders] class JTraversableSerializer[T, C <: Traversable[T]](
  val bufferSize: Int = 64 * 1024
)(implicit cbf: Factory[T, C])
    extends KSerializer[C] {
  override def write(kser: Kryo, out: Output, obj: C): Unit = {
    val i = obj.iterator
    val chunked = new OutputChunked(out, bufferSize)
    while (i.hasNext) {
      chunked.writeBoolean(true)
      kser.writeClassAndObject(chunked, i.next())
    }
    chunked.writeBoolean(false)
    chunked.endChunks()
    chunked.flush()
  }

  override def read(kser: Kryo, in: Input, cls: Class[C]): C = {
    val b = cbf.newBuilder
    val chunked = new InputChunked(in, bufferSize)
    while (chunked.readBoolean()) {
      b += kser.readClassAndObject(chunked).asInstanceOf[T]
    }
    b.result()
  }
}

// workaround for Java Iterable/Collection missing proper equality check
abstract private[coders] class JWrapperCBF[T] extends Factory[T, Iterable[T]] {
  def asScala(xs: java.util.List[T]): Iterable[T]

  class JIterableWrapperBuilder extends mutable.Builder[T, Iterable[T]] {
    private val xs = new java.util.ArrayList[T]()

    override def addOne(elem: T): this.type = {
      xs.add(elem)
      this
    }

    override def clear(): Unit = xs.clear()
    override def result(): Iterable[T] = asScala(xs)
  }

  override def fromSpecific(it: IterableOnce[T]): Iterable[T] = {
    val b = new JIterableWrapperBuilder
    it.foreach(b += _)
    b.result()
  }

  override def newBuilder: mutable.Builder[T, Iterable[T]] = new JIterableWrapperBuilder
}

private[coders] class JIterableWrapperCBF[T] extends JWrapperCBF[T] {
  override def asScala(xs: java.util.List[T]): Iterable[T] =
    xs.asInstanceOf[java.lang.Iterable[T]].asScala
}

private[coders] class JCollectionWrapperCBF[T] extends JWrapperCBF[T] {
  override def asScala(xs: java.util.List[T]): Iterable[T] =
    xs.asInstanceOf[java.util.Collection[T]].asScala
} 
Example 24
Source File: JodaSerializer.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.coders.instances.kryo

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import org.joda.time.{DateTime, DateTimeZone, LocalDate, LocalDateTime, LocalTime}
import org.joda.time.chrono.ISOChronology

private[coders] class JodaLocalDateTimeSerializer extends Serializer[LocalDateTime] {
  setImmutable(true)

  def write(kryo: Kryo, output: Output, ldt: LocalDateTime): Unit = {
    output.writeInt(ldt.getYear,  false)
    val month = input.readByte().toInt
    val day = input.readByte().toInt

    new LocalDate(year, month, day)
  }
}

private[coders] class JodaDateTimeSerializer extends Serializer[DateTime] {
  setImmutable(true)

  def write(kryo: Kryo, output: Output, dt: DateTime): Unit = {
    output.writeLong(dt.getMillis)
    output.writeString(dt.getZone.getID)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[DateTime]): DateTime = {
    val millis = input.readLong()
    val zone = DateTimeZone.forID(input.readString())
    new DateTime(millis, zone)
  }
} 
Example 25
Source File: ByteStringSerializer.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.coders.instances.kryo

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.google.protobuf.ByteString
import com.twitter.chill.KSerializer

private[coders] class ByteStringSerializer extends KSerializer[ByteString] {
  override def read(kryo: Kryo, input: Input, tpe: Class[ByteString]): ByteString = {
    val n = input.readInt()
    ByteString.copyFrom(input.readBytes(n))
  }

  override def write(kryo: Kryo, output: Output, byteStr: ByteString): Unit = {
    val len = byteStr.size
    output.writeInt(len)
    val bytes = byteStr.iterator
    while (bytes.hasNext) {
      output.write(bytes.nextByte())
    }
  }
} 
Example 26
Source File: AvroSerializer.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.coders.instances.kryo

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.KSerializer
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.avro.specific.SpecificRecordBase
import org.apache.beam.sdk.coders.AvroCoder

import scala.collection.mutable.{Map => MMap}
import scala.util.Try

private[coders] class GenericAvroSerializer extends KSerializer[GenericRecord] {
  private lazy val cache: MMap[String, AvroCoder[GenericRecord]] = MMap()

  private def getCoder(schemaStr: String): AvroCoder[GenericRecord] =
    cache.getOrElseUpdate(schemaStr, AvroCoder.of(new Schema.Parser().parse(schemaStr)))
  private def getCoder(schemaStr: String, schema: Schema): AvroCoder[GenericRecord] =
    cache.getOrElseUpdate(schemaStr, AvroCoder.of(schema))

  override def write(kryo: Kryo, out: Output, obj: GenericRecord): Unit = {
    val schemaStr = obj.getSchema.toString
    val coder = this.getCoder(schemaStr, obj.getSchema)
    // write schema before every record in case it's not in reader serializer's cache
    out.writeString(schemaStr)
    coder.encode(obj, out)
  }

  override def read(kryo: Kryo, in: Input, cls: Class[GenericRecord]): GenericRecord = {
    val coder = this.getCoder(in.readString())
    coder.decode(in)
  }
}

private[coders] class SpecificAvroSerializer[T <: SpecificRecordBase] extends KSerializer[T] {
  private lazy val cache: MMap[Class[T], AvroCoder[T]] = MMap()

  private def getCoder(cls: Class[T]): AvroCoder[T] =
    cache.getOrElseUpdate(
      cls,
      Try(cls.getConstructor().newInstance().getSchema)
        .map(AvroCoder.of(cls, _))
        .getOrElse(AvroCoder.of(cls))
    )

  override def write(kser: Kryo, out: Output, obj: T): Unit =
    this.getCoder(obj.getClass.asInstanceOf[Class[T]]).encode(obj, out)

  override def read(kser: Kryo, in: Input, cls: Class[T]): T =
    this.getCoder(cls).decode(in)
} 
Example 27
Source File: KVSerializer.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.coders.instances.kryo

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.KSerializer
import org.apache.beam.sdk.values.KV

private[coders] class KVSerializer[K, V] extends KSerializer[KV[K, V]] {
  override def write(kser: Kryo, out: Output, obj: KV[K, V]): Unit = {
    kser.writeClassAndObject(out, obj.getKey)
    kser.writeClassAndObject(out, obj.getValue)
  }

  override def read(kser: Kryo, in: Input, cls: Class[KV[K, V]]): KV[K, V] = {
    val k = kser.readClassAndObject(in).asInstanceOf[K]
    val v = kser.readClassAndObject(in).asInstanceOf[V]
    KV.of(k, v)
  }
} 
Example 28
Source File: GrpcSerializers.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.coders.instances.kryo

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.KSerializer
import io.grpc.{Metadata, Status, StatusRuntimeException}

private[coders] object GrpcSerializers {

  class StatusSerializer extends KSerializer[Status] {
    override def write(kryo: Kryo, output: Output, status: Status): Unit = {
      output.writeInt(status.getCode().value())
      output.writeString(status.getDescription)
      kryo.writeClassAndObject(output, status.getCause)
    }

    override def read(kryo: Kryo, input: Input, `type`: Class[Status]): Status = {
      val code = input.readInt()
      val description = input.readString()
      val cause = kryo.readClassAndObject(input).asInstanceOf[Throwable]

      Status
        .fromCodeValue(code)
        .withDescription(description)
        .withCause(cause)
    }
  }

  class StatusRuntimeExceptionSerializer extends KSerializer[StatusRuntimeException] {
    lazy val statusSer = new StatusSerializer()

    override def write(kryo: Kryo, output: Output, e: StatusRuntimeException): Unit = {
      kryo.writeObject(output, e.getStatus, statusSer)
      kryo.writeObjectOrNull(output, e.getTrailers, classOf[Metadata])
    }

    override def read(
      kryo: Kryo,
      input: Input,
      `type`: Class[StatusRuntimeException]
    ): StatusRuntimeException = {
      val status = kryo.readObject(input, classOf[Status], statusSer)
      val trailers = kryo.readObjectOrNull(input, classOf[Metadata])

      new StatusRuntimeException(status, trailers)
    }
  }
} 
Example 29
Source File: JTraversableSerializer.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.coders.instances.kryo

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, InputChunked, Output, OutputChunked}
import com.twitter.chill.KSerializer

import scala.jdk.CollectionConverters._
import scala.collection.generic.CanBuildFrom
import scala.collection.mutable


private[coders] class JTraversableSerializer[T, C <: Traversable[T]](
  val bufferSize: Int = 64 * 1024
)(implicit cbf: CanBuildFrom[C, T, C])
    extends KSerializer[C] {
  override def write(kser: Kryo, out: Output, obj: C): Unit = {
    val i = obj.toIterator
    val chunked = new OutputChunked(out, bufferSize)
    while (i.hasNext) {
      chunked.writeBoolean(true)
      kser.writeClassAndObject(chunked, i.next())
    }
    chunked.writeBoolean(false)
    chunked.endChunks()
    chunked.flush()
  }

  override def read(kser: Kryo, in: Input, cls: Class[C]): C = {
    val b = cbf()
    val chunked = new InputChunked(in, bufferSize)
    while (chunked.readBoolean()) {
      b += kser.readClassAndObject(chunked).asInstanceOf[T]
    }
    b.result()
  }
}

// workaround for Java Iterable/Collection missing proper equality check
abstract private[coders] class JWrapperCBF[T] extends CanBuildFrom[Iterable[T], T, Iterable[T]] {
  override def apply(from: Iterable[T]): mutable.Builder[T, Iterable[T]] = {
    val b = new JIterableWrapperBuilder
    from.foreach(b += _)
    b
  }
  override def apply(): mutable.Builder[T, Iterable[T]] =
    new JIterableWrapperBuilder
  def asScala(xs: java.util.List[T]): Iterable[T]

  class JIterableWrapperBuilder extends mutable.Builder[T, Iterable[T]] {
    private val xs = new java.util.ArrayList[T]()

    override def +=(elem: T): this.type = {
      xs.add(elem)
      this
    }

    override def clear(): Unit = xs.clear()
    override def result(): Iterable[T] = asScala(xs)
  }
}

private[coders] class JIterableWrapperCBF[T] extends JWrapperCBF[T] {
  override def asScala(xs: java.util.List[T]): Iterable[T] =
    xs.asInstanceOf[java.lang.Iterable[T]].asScala
}

private[coders] class JCollectionWrapperCBF[T] extends JWrapperCBF[T] {
  override def asScala(xs: java.util.List[T]): Iterable[T] =
    xs.asInstanceOf[java.util.Collection[T]].asScala
} 
Example 30
Source File: ShapeSerializer.scala    From Simba   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.simba

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.Input
import com.esotericsoftware.kryo.io.Output
import java.io._

import org.apache.spark.sql.simba.spatial._
import org.apache.spark.sql.simba.util.KryoShapeSerializer


object ShapeSerializer {
  private[simba] val kryo = new Kryo()

  kryo.register(classOf[Shape], new KryoShapeSerializer)
  kryo.register(classOf[Point], new KryoShapeSerializer)
  kryo.register(classOf[MBR], new KryoShapeSerializer)
  kryo.register(classOf[Polygon], new KryoShapeSerializer)
  kryo.register(classOf[Circle], new KryoShapeSerializer)
  kryo.register(classOf[LineSegment], new KryoShapeSerializer)
  kryo.addDefaultSerializer(classOf[Shape], new KryoShapeSerializer)
  kryo.setReferences(false)

  def deserialize(data: Array[Byte]): Shape = {
    val in = new ByteArrayInputStream(data)
    val input = new Input(in)
    val res = kryo.readObject(input, classOf[Shape])
    input.close()
    res
  }

  def serialize(o: Shape): Array[Byte] = {
    val out = new ByteArrayOutputStream()
    val output = new Output(out)
    kryo.writeObject(output, o)
    output.close()
    out.toByteArray
  }
}

class ShapeSerializer {

} 
Example 31
Source File: FlagsSerializer.scala    From spark-bam   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.bam.check.full.error

import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer }

import scala.collection.immutable.BitSet


class FlagsSerializer
  extends Serializer[Flags] {
  override def read(kryo: Kryo, input: Input, clz: Class[Flags]): Flags = {
    kryo
      .readClassAndObject(input)
      .asInstanceOf[(BitSet, Int)]
  }

  override def write(kryo: Kryo, output: Output, flags: Flags): Unit =
    kryo.writeClassAndObject(output, flags: (BitSet, Int))
} 
Example 32
Source File: FeaturePoint.scala    From spark-pip   with Apache License 2.0 5 votes vote down vote up
package com.esri

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, KryoSerializable}
import com.vividsolutions.jts.geom.Geometry


case class FeaturePoint(var geom: Geometry, var attr: Array[String]) extends Feature with KryoSerializable {

  def this() = this(null, null)

  override def toRowCols(cellSize: Double): Seq[(RowCol, FeaturePoint)] = {
    val coordinate = geom.getCoordinate
    val c = (coordinate.x / cellSize).floor.toInt
    val r = (coordinate.y / cellSize).floor.toInt
    Seq((RowCol(r, c), this))
  }

  override def write(kryo: Kryo, output: Output): Unit = {
    val coordinate = geom.getCoordinate
    output.writeDouble(coordinate.x)
    output.writeDouble(coordinate.y)
    output.writeInt(attr.length)
    attr.foreach(output.writeString)
  }

  override def read(kryo: Kryo, input: Input): Unit = {
    val x = input.readDouble()
    val y = input.readDouble()
    geom = GeomFact.createPoint(x, y)
    val len = input.readInt()
    attr = Array.ofDim[String](len)
    for (i <- 0 until len)
      attr(i) = input.readString()
  }
} 
Example 33
Source File: FeaturePolygonTest.scala    From spark-pip   with Apache License 2.0 5 votes vote down vote up
package com.esri

import java.io.ByteArrayOutputStream

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import org.geotools.geometry.jts.WKTReader2
import org.scalatest._

import scala.io.Source


class FeaturePolygonTest extends FlatSpec with Matchers {

  it should "read zero area geometry" in {
    val kryo = new Kryo()
    kryo.register(classOf[FeaturePolygon])

    val reader = new WKTReader2()
    Source
      .fromFile("/tmp/world.tsv")
      .getLines()
      .foreach(line => {
        val tokens = line.split("\t")
        val geom = reader.read(tokens(14))
        FeaturePolygon(geom, Array.empty[String])
          .toRowCols(4.0)
          .foreach {
            case (rowcol, feature) => {
              feature.geom.getGeometryType should endWith("Polygon")

              val baos = new ByteArrayOutputStream(4096)
              val output = new Output(baos)
              kryo.writeObject(output, feature)
              output.flush()

              val obj = kryo.readObject[FeaturePolygon](new Input(baos.toByteArray), classOf[FeaturePolygon])
              obj.geom.equalsExact(feature.geom, 0.000001)
            }
          }
      })
  }
} 
Example 34
Source File: KryoStringEventBatch.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.log

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import com.yahoo.maha.data.StringEventBatch
import org.slf4j.{Logger, LoggerFactory}


object KryoStringEventBatch {
  private val logger: Logger = LoggerFactory.getLogger(classOf[KryoStringEventBatch])
}

class KryoStringEventBatch extends Serializer[StringEventBatch] {
  KryoStringEventBatch.logger.info("Created instance of " + this.getClass.getSimpleName)

  override def write(kryo: Kryo, output: Output, stringEventBatch: StringEventBatch): Unit = {
    val size: Int = stringEventBatch.getEvents.size
    output.writeInt(size)
    stringEventBatch.getEvents.stream().forEach(output.writeString(_))
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[StringEventBatch]): StringEventBatch = {
    val size: Int = input.readInt
    val builder: StringEventBatch.Builder = new StringEventBatch.Builder(size)
    var i: Int = 0
    while ( i < size) {
      builder.add(input.readString)
      i += 1
    }
    builder.build.asInstanceOf[StringEventBatch]
  }
} 
Example 35
Source File: RegisterNodeSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.RegisterNode
import justin.db.consistenthashing.NodeId
import org.scalatest.{FlatSpec, Matchers}

class RegisterNodeSerializerTest extends FlatSpec with Matchers {

  behavior of "RegisterNode Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[RegisterNode], RegisterNodeSerializer)

    // object
    val serializedData = RegisterNode(NodeId(1))

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[RegisterNode])

    serializedData shouldBe deserializedData
  }
} 
Example 36
Source File: StorageNodeReadResponseSerializer.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.Data
import justin.db.actors.protocol._

object StorageNodeReadResponseSerializer extends Serializer[StorageNodeReadResponse] {

  private object Discriminator {
    val Found      = 1
    val Conflicted = 2
    val NotFound   = 3
    val Failed     = 4
  }

  override def write(kryo: Kryo, output: Output, readResponse: StorageNodeReadResponse): Unit = readResponse match {
    case StorageNodeFoundRead(data)           =>
      output.writeInt(Discriminator.Found)
      DataSerializer.write(kryo, output, data)
    case StorageNodeConflictedRead(conflicts) =>
      output.writeInt(Discriminator.Conflicted)
      ListOfDataSerializer.write(kryo, output, conflicts)
    case StorageNodeNotFoundRead(id)          =>
      output.writeInt(Discriminator.NotFound)
      output.writeString(id.toString)
    case StorageNodeFailedRead(id)            =>
      output.writeInt(Discriminator.Failed)
      output.writeString(id.toString)
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[StorageNodeReadResponse]): StorageNodeReadResponse = {
    input.readInt() match {
      case Discriminator.Found      => StorageNodeFoundRead(DataSerializer.read(kryo, input, classOf[Data]))
      case Discriminator.Conflicted => StorageNodeConflictedRead(ListOfDataSerializer.read(kryo, input, classOf[List[Data]]))
      case Discriminator.NotFound   => StorageNodeNotFoundRead(UUID.fromString(input.readString()))
      case Discriminator.Failed     => StorageNodeFailedRead(UUID.fromString(input.readString()))
    }
  }
} 
Example 37
Source File: ListOfDataSerializer.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.Data

object ListOfDataSerializer extends Serializer[List[Data]] {
  override def write(kryo: Kryo, output: Output, listOfData: List[Data]): Unit = {
    val length = listOfData.size
    output.writeInt(length, true)
    if(length != 0) {
      val it = listOfData.iterator
      while(it.hasNext)
        DataSerializer.write(kryo, output, it.next())
    }
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[List[Data]]): List[Data] = {
    var length = input.readInt(true)
    var result = List.empty[Data]

    while(length > 0) {
      result = result :+ DataSerializer.read(kryo, input, classOf[Data])
      length -= 1
    }
    result
  }
} 
Example 38
Source File: DataSerializer.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.Data
import justin.db.versioning.NodeIdVectorClockBase64

object DataSerializer extends Serializer[Data] {
  override def write(kryo: Kryo, output: Output, data: Data): Unit = {
    output.writeString(data.id.toString) // UUID
    output.writeString(data.value)       // Value
    output.writeString(new NodeIdVectorClockBase64().encode(data.vclock).get) // Vector Clock
    output.writeLong(data.timestamp)    // Timestamp
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[Data]): Data = {
    Data(
      id        = UUID.fromString(input.readString()), // UUID
      value     = input.readString(),                  // Value
      vclock    = new NodeIdVectorClockBase64().decode(input.readString()).get, // Vector Clock
      timestamp = input.readLong()                     // Timestamp
    )
  }
} 
Example 39
Source File: StorageNodeWriteResponseSerializer.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.Data
import justin.db.actors.protocol.{StorageNodeConflictedWrite, StorageNodeFailedWrite, StorageNodeSuccessfulWrite, StorageNodeWriteResponse}

object StorageNodeWriteResponseSerializer extends Serializer[StorageNodeWriteResponse] {

  private object Discriminator {
    val SuccessfulWrite = 1
    val FailedWrite     = 2
    val ConflictedWrite = 3
  }

  override def write(kryo: Kryo, output: Output, response: StorageNodeWriteResponse): Unit = response match {
    case StorageNodeSuccessfulWrite(id)               =>
      output.writeInt(Discriminator.SuccessfulWrite)
      output.writeString(id.toString) // UUID
    case StorageNodeFailedWrite(id)                   =>
      output.writeInt(Discriminator.FailedWrite)
      output.writeString(id.toString) // UUID
    case StorageNodeConflictedWrite(oldData, newData) =>
      output.writeInt(Discriminator.ConflictedWrite)
      DataSerializer.write(kryo, output, oldData)
      DataSerializer.write(kryo, output, newData)
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[StorageNodeWriteResponse]): StorageNodeWriteResponse = input.readInt() match {
    case Discriminator.SuccessfulWrite =>
      StorageNodeSuccessfulWrite(UUID.fromString(input.readString()))
    case Discriminator.FailedWrite     =>
      StorageNodeFailedWrite(UUID.fromString(input.readString()))
    case Discriminator.ConflictedWrite =>
      StorageNodeConflictedWrite(
        oldData = DataSerializer.read(kryo, input, classOf[Data]),
        newData = DataSerializer.read(kryo, input, classOf[Data])
      )
  }
} 
Example 40
Source File: StorageNodeWriteDataLocalSerializer.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.Data
import justin.db.actors.protocol.StorageNodeWriteDataLocal
import justin.db.versioning.NodeIdVectorClockBase64

object StorageNodeWriteDataLocalSerializer extends Serializer[StorageNodeWriteDataLocal] {
  override def write(kryo: Kryo, output: Output, local: StorageNodeWriteDataLocal): Unit = {
    output.writeString(local.data.id.toString) // UUID
    output.writeString(local.data.value)       // Value
    output.writeString(new NodeIdVectorClockBase64().encode(local.data.vclock).get)  // Vector Clock
    output.writeLong(local.data.timestamp)     // Timestamp
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[StorageNodeWriteDataLocal]): StorageNodeWriteDataLocal = {
    val id = UUID.fromString(input.readString()) // UUID
    val value = input.readString()               // Value
    val vectorClock = new NodeIdVectorClockBase64().decode(input.readString()).get // Vector Clock
    val timestamp = input.readLong()             // Timestamp

    StorageNodeWriteDataLocal(Data(id, value, vectorClock, timestamp))
  }
} 
Example 41
Source File: SerializerInit.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import com.esotericsoftware.kryo.Kryo
import com.typesafe.scalalogging.StrictLogging

class SerializerInit extends StrictLogging {

  def customize(kryo: Kryo): Unit = {
    logger.info("Initialized Kryo")

    // cluster
    kryo.register(classOf[justin.db.actors.protocol.RegisterNode], RegisterNodeSerializer, 50)

    // write -- request
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer, 60)

    // write -- responses
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeFailedWrite],     StorageNodeWriteResponseSerializer, 70)
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeSuccessfulWrite], StorageNodeWriteResponseSerializer, 71)
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeConflictedWrite], StorageNodeWriteResponseSerializer, 72)

    // read - request
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeLocalRead], StorageNodeLocalReadSerializer, 80)

    // read - responses
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeFoundRead],      StorageNodeReadResponseSerializer, 90)
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeConflictedRead], StorageNodeReadResponseSerializer, 91)
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeNotFoundRead],   StorageNodeReadResponseSerializer, 92)
    kryo.register(classOf[justin.db.actors.protocol.StorageNodeFailedRead],     StorageNodeReadResponseSerializer, 93)

    ()
  }
} 
Example 42
Source File: KryoStringEventBatchTest.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.log



import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.yahoo.maha.data.StringEventBatch
import org.junit.{Assert, Test}

class KryoStringEventBatchTest {
  @Test
  @throws[Exception]
  def test(): Unit = {
    val kryo = new Kryo
    kryo.register(classOf[StringEventBatch], new KryoStringEventBatch)
    val builder = new StringEventBatch.Builder(3)
    builder.add("one")
    builder.add("two")
    builder.add("three")
    val recordList = builder.build.asInstanceOf[StringEventBatch]
    val output = new Output(new Array[Byte](1024 * 1024 + 1))
    kryo.writeObject(output, recordList)
    System.out.println("output.position=" + output.position)
    val input = new Input(output.getBuffer, 0, output.total.toInt)
    val resultRecordList = kryo.readObject(input, classOf[StringEventBatch])
    Assert.assertEquals(resultRecordList.getEvents.get(0), "one")
    Assert.assertEquals(resultRecordList.getEvents.get(1), "two")
    Assert.assertEquals(resultRecordList.getEvents.get(2), "three")
    val output2 = new Output(new Array[Byte](1024 * 1024 + 1))
    kryo.writeObject(output2, resultRecordList)
  }
} 
Example 43
Source File: SerializerInitTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import com.esotericsoftware.kryo.Kryo
import org.scalatest.{FlatSpec, Matchers}

class SerializerInitTest extends FlatSpec with Matchers {

  behavior of "SerializerInit"

  it should "init Kryo serializer" in {
    val kryo = new Kryo()
    val serializerInit = new SerializerInit()
    serializerInit.customize(kryo)

    // cluster
    val classId_50 = 50
    kryo.getRegistration(classId_50).getId          shouldBe 50
    kryo.getRegistration(classId_50).getSerializer  shouldBe RegisterNodeSerializer
    kryo.getRegistration(classId_50).getType        shouldBe classOf[justin.db.actors.protocol.RegisterNode]


    // write -- request
    val classId_60 = 60
    kryo.getRegistration(classId_60).getId          shouldBe 60
    kryo.getRegistration(classId_60).getSerializer  shouldBe StorageNodeWriteDataLocalSerializer
    kryo.getRegistration(classId_60).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeWriteDataLocal]

    // write -- responses
    val classId_70 = 70
    kryo.getRegistration(classId_70).getId          shouldBe 70
    kryo.getRegistration(classId_70).getSerializer  shouldBe StorageNodeWriteResponseSerializer
    kryo.getRegistration(classId_70).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeFailedWrite]

    val classId_71 = 71
    kryo.getRegistration(classId_71).getId          shouldBe 71
    kryo.getRegistration(classId_71).getSerializer  shouldBe StorageNodeWriteResponseSerializer
    kryo.getRegistration(classId_71).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeSuccessfulWrite]

    val classId_72 = 72
    kryo.getRegistration(classId_72).getId          shouldBe 72
    kryo.getRegistration(classId_72).getSerializer  shouldBe StorageNodeWriteResponseSerializer
    kryo.getRegistration(classId_72).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeConflictedWrite]

    // read - request
    val classId_80 = 80
    kryo.getRegistration(classId_80).getId          shouldBe 80
    kryo.getRegistration(classId_80).getSerializer  shouldBe StorageNodeLocalReadSerializer
    kryo.getRegistration(classId_80).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeLocalRead]

    // read - responses
    val classId_90 = 90
    kryo.getRegistration(classId_90).getId          shouldBe 90
    kryo.getRegistration(classId_90).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_90).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeFoundRead]

    val classId_91 = 91
    kryo.getRegistration(classId_91).getId          shouldBe 91
    kryo.getRegistration(classId_91).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_91).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeConflictedRead]

    val classId_92 = 92
    kryo.getRegistration(classId_92).getId          shouldBe 92
    kryo.getRegistration(classId_92).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_92).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeNotFoundRead]

    val classId_93 = 93
    kryo.getRegistration(classId_93).getId          shouldBe 93
    kryo.getRegistration(classId_93).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_93).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeFailedRead]
  }
} 
Example 44
Source File: NodeSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.types

import java.io._

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.AllScalaRegistrar
import org.scalatest.FunSuite

class NodeSuite extends FunSuite {

  def isSerializable(input: Any): Boolean = {
    val byteArray = new ByteArrayOutputStream()
    val oos = new ObjectOutputStream(byteArray)
    oos.writeObject(input)
    oos.close()
    val ois = new ObjectInputStream(new ByteArrayInputStream(byteArray.toByteArray))
    val output = ois.readObject()
    ois.close()
    input == output
  }

  def isKryoSerializable(input: Any): Boolean = {
    val kryo = new Kryo()
    kryo.setRegistrationRequired(false)
    new AllScalaRegistrar().apply(kryo)
    kryo.register(input.getClass)
    kryo.setReferences(false)
    kryo.setClassLoader(input.getClass.getClassLoader)
    val byteArray = new ByteArrayOutputStream()
    val kryoOutput = new Output(byteArray)
    kryo.writeObject(kryoOutput, input)
    kryoOutput.flush()
    val kryoInput = new Input(new ByteArrayInputStream(byteArray.toByteArray))
    val output = kryo.readObject(kryoInput, input.getClass)
    kryoInput.close()
    kryoOutput.close()
    input == output
  }

  test("Node is serializable") {
    assert(isSerializable(Node(Seq(1), IntegerType)))
    assert(isSerializable(Node(Seq(1L), LongType)))
    assert(isSerializable(Node(Seq("1"), StringType)))
    assert(isSerializable(Node(Seq(1, 2), IntegerType)))
    assert(isSerializable(Node(Seq(1L, 2L), LongType)))
    // assert(isSerializable(Node(Seq("1", "2", 1))))
    assert(isSerializable(Node(Seq(1), IntegerType, 1, 1, true)))
  }

  test("Node is serializable with Kryo") {
    assert(isKryoSerializable(Node(Seq(1), IntegerType)))
    assert(isKryoSerializable(Node(Seq(1L), LongType)))
    assert(isKryoSerializable(Node(Seq("1"), StringType)))
    assert(isKryoSerializable(Node(Seq(1, 2), IntegerType)))
    assert(isKryoSerializable(Node(Seq(1L, 2L), LongType)))
    // assert(isKryoSerializable(Node(Seq("1", "2", 1))))
    assert(isKryoSerializable(Node(Seq(1), IntegerType, 1, 1, true)))
  }

} 
Example 45
Source File: GraphKryoRegistrator.scala    From graphx-algorithm   with GNU General Public License v2.0 5 votes vote down vote up
package org.apache.spark.graphx

import com.esotericsoftware.kryo.Kryo

import org.apache.spark.serializer.KryoRegistrator
import org.apache.spark.util.BoundedPriorityQueue
import org.apache.spark.util.collection.BitSet

import org.apache.spark.graphx.impl._
import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
import org.apache.spark.util.collection.OpenHashSet


@deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0")
class GraphKryoRegistrator extends KryoRegistrator {

  def registerClasses(kryo: Kryo) {
    kryo.register(classOf[Edge[Object]])
    kryo.register(classOf[(VertexId, Object)])
    kryo.register(classOf[EdgePartition[Object, Object]])
    kryo.register(classOf[BitSet])
    kryo.register(classOf[VertexIdToIndexMap])
    kryo.register(classOf[VertexAttributeBlock[Object]])
    kryo.register(classOf[PartitionStrategy])
    kryo.register(classOf[BoundedPriorityQueue[Object]])
    kryo.register(classOf[EdgeDirection])
    kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]])
    kryo.register(classOf[OpenHashSet[Int]])
    kryo.register(classOf[OpenHashSet[Long]])
  }
} 
Example 46
Source File: KryoInitSpec.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.core.akka

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import com.romix.scala.serialization.kryo.{
  EnumerationSerializer,
  ScalaImmutableAbstractMapSerializer,
  ScalaMutableMapSerializer
}
import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpecLike

class KryoInitSpec extends Matchers with AnyFlatSpecLike {

  "The custom KryoInit" should "register serializers" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    kryo.getDefaultSerializer(classOf[scala.Enumeration#Value]) shouldBe an[
      EnumerationSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[
      ScalaImmutableAbstractMapSerializer
    ]
    kryo.getDefaultSerializer(classOf[collection.mutable.HashMap[_, _]]) shouldBe a[
      ScalaMutableMapSerializer
    ]
  }

  it should "serialize immutable maps" in {
    val kryo = new Kryo()
    new KryoInit().customize(kryo)
    val map1 = Map(
      "Rome" -> "Italy",
      "London" -> "England",
      "Paris" -> "France",
      "New York" -> "USA",
      "Tokyo" -> "Japan",
      "Peking" -> "China",
      "Brussels" -> "Belgium"
    )
    val map2 = map1 + ("Moscow" -> "Russia")
    val map3 = map2 + ("Berlin" -> "Germany")
    val map4 = map3 + ("Germany" -> "Berlin", "Russia" -> "Moscow")
    roundTrip(map1, kryo)
    roundTrip(map2, kryo)
    roundTrip(map3, kryo)
    roundTrip(map4, kryo)
  }

  def roundTrip[T](obj: T, kryo: Kryo): T = {
    val outStream = new ByteArrayOutputStream()
    val output = new Output(outStream, 4096)
    kryo.writeClassAndObject(output, obj)
    output.flush()

    val input = new Input(new ByteArrayInputStream(outStream.toByteArray), 4096)
    val obj1 = kryo.readClassAndObject(input)

    assert(obj == obj1)

    obj1.asInstanceOf[T]
  }

} 
Example 47
Source File: KryoInit.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.core.akka

import com.esotericsoftware.kryo.Kryo
import com.romix.scala.serialization.kryo._
import hydra.common.logging.LoggingAdapter

import scala.collection._

class KryoInit extends LoggingAdapter {

  def customize(kryo: Kryo): Unit = {
    log.debug("Initializing Kryo...")
    kryo.addDefaultSerializer(
      classOf[scala.Enumeration#Value],
      classOf[EnumerationSerializer]
    )
    kryo.addDefaultSerializer(
      classOf[mutable.HashMap[_, _]],
      classOf[ScalaMutableMapSerializer]
    )
    kryo.addDefaultSerializer(
      classOf[scala.collection.immutable.Map[_, _]],
      classOf[ScalaImmutableAbstractMapSerializer]
    )
    kryo.register(
      classOf[immutable.Map[_, _]],
      new ScalaImmutableAbstractMapSerializer(),
      420
    )
  }
} 
Example 48
Source File: MemoryContextStore.scala    From dbpedia-spotlight-model   with Apache License 2.0 5 votes vote down vote up
package org.dbpedia.spotlight.db.memory

import java.util.{HashMap, Map}

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, KryoException, KryoSerializable}
import org.apache.commons.lang.NotImplementedException
import org.dbpedia.spotlight.db.model.{ContextStore, TokenTypeStore}
import org.dbpedia.spotlight.model.{DBpediaResource, TokenType}



  def calculateTotalTokenCounts(){
    var i = 0
    while(i < counts.size){

      if (counts(i).isInstanceOf[Array[Short]]){
        var j = 0

        while(j < counts(i).size ){
          totalTokenCounts(i) += qc(counts(i)(j))
          j += 1
        }

      }
      i += 1
    }
  }


  def read(kryo: Kryo, input: Input) {
    val size = input.readInt()

    tokens = new Array[Array[Int]](size)
    counts = new Array[Array[Short]](size)
    totalTokenCounts = new Array[Int](size)

    var i = 0
    var j = 0

    while(i < size) {
      val subsize = input.readInt()

      if (subsize > 0) {
        tokens(i) = new Array[Int](subsize)
        counts(i) = new Array[Short](subsize)

        j = 0
        while(j < subsize) {
          tokens(i)(j) = input.readInt()
          j += 1
        }

        j = 0
        while(j < subsize) {
          counts(i)(j) = input.readShort()
          j += 1
        }
     }

     i += 1
   }

   if(input.readChar() != '#')
     throw new KryoException("Error in deserializing context store...")

  }

} 
Example 49
Source File: KryoMLMatrixSerializer.scala    From MatRel   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.matfast.util

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}

import org.apache.spark.sql.matfast.matrix._

class KryoMLMatrixSerializer extends Serializer[MLMatrix]{

  private def getTypeInt(m: MLMatrix): Short = m match {
    case _: SparseMatrix => 0
    case _: DenseMatrix => 1
    case _ => -1
  }

   override def write(kryo: Kryo, output: Output, matrix: MLMatrix) {
    output.writeShort(getTypeInt(matrix))
    matrix match {
      case dense: DenseMatrix =>
        output.writeInt(dense.numRows, true)
        output.writeInt(dense.numCols, true)
        output.writeInt(dense.values.length, true)
        dense.values.foreach(output.writeDouble)
        output.writeBoolean(dense.isTransposed)
      case sp: SparseMatrix =>
        output.writeInt(sp.numRows, true)
        output.writeInt(sp.numCols, true)
        output.writeInt(sp.colPtrs.length, true)
        sp.colPtrs.foreach(x => output.writeInt(x, true))
        output.writeInt(sp.rowIndices.length, true)
        sp.rowIndices.foreach(x => output.writeInt(x, true))
        output.writeInt(sp.values.length, true)
        sp.values.foreach(output.writeDouble)
        output.writeBoolean(sp.isTransposed)
    }
  }

  override def read(kryo: Kryo, input: Input, typ: Class[MLMatrix]): MLMatrix = {
    val typInt = input.readShort()
    if (typInt == 1) { // DenseMatrix
      val numRows = input.readInt(true)
      val numCols = input.readInt(true)
      val dim = input.readInt(true)
      val values = Array.ofDim[Double](dim)
      for (i <- 0 until dim) values(i) = input.readDouble()
      val isTransposed = input.readBoolean()
      new DenseMatrix(numRows, numCols, values, isTransposed)
    } else if (typInt == 0) { // SparseMatrix
      val numRows = input.readInt(true)
      val numCols = input.readInt(true)
      val colPtrsDim = input.readInt(true)
      val colPtrs = Array.ofDim[Int](colPtrsDim)
      for (i <- 0 until colPtrsDim) colPtrs(i) = input.readInt(true)
      val rowIndicesDim = input.readInt(true)
      val rowIndices = Array.ofDim[Int](rowIndicesDim)
      for (i <- 0 until rowIndicesDim) rowIndices(i) = input.readInt(true)
      val valueDim = input.readInt(true)
      val values = Array.ofDim[Double](valueDim)
      for (i <- 0 until valueDim) values(i) = input.readDouble()
      val isTransposed = input.readBoolean()
      new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed)
    } else null
  }
} 
Example 50
Source File: MatfastSerializer.scala    From MatRel   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.matfast.util

import java.math.BigDecimal
import java.nio.ByteBuffer
import java.util.{HashMap => JavaHashMap}

import scala.reflect.ClassTag

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.ResourcePool

import org.apache.spark.{SparkConf, SparkEnv}
import org.apache.spark.serializer.{KryoSerializer, SerializerInstance}
import org.apache.spark.sql.matfast.matrix._
import org.apache.spark.sql.types.Decimal
import org.apache.spark.util.MutablePair


private[matfast] class MatfastSerializer(conf: SparkConf) extends KryoSerializer(conf) {
  override def newKryo(): Kryo = {
    val kryo = super.newKryo()
    kryo.setRegistrationRequired(false)
    kryo.register(classOf[MutablePair[_, _]])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow])
    kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer)
    kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer)

    kryo.register(classOf[Decimal])
    kryo.register(classOf[JavaHashMap[_, _]])
    kryo.register(classOf[DenseMatrix])
    kryo.register(classOf[SparseMatrix])

    kryo.setReferences(false)
    kryo
  }
}

private[matfast] class KryoResourcePool(size: Int) extends ResourcePool[SerializerInstance](size) {
  val ser: MatfastSerializer = {
    val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
    new MatfastSerializer(sparkConf)
  }

  def newInstance(): SerializerInstance = ser.newInstance()
}

private[matfast] object MatfastSerializer {
  @transient lazy val resourcePool = new KryoResourcePool(50)

  private[this] def acquireRelease[O](fn: SerializerInstance => O): O = {
    val kryo = resourcePool.borrow()
    try {
      fn(kryo)
    } finally {
      resourcePool.release(kryo)
    }
  }

  def serialize[T: ClassTag](o: T): Array[Byte] = {
    acquireRelease { k =>
      k.serialize(o).array()
    }
  }

  def deserialize[T: ClassTag](bytes: Array[Byte]): T =
    acquireRelease { k =>
      k.deserialize[T](ByteBuffer.wrap(bytes))
    }
}

private[matfast] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

private[matfast] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: BigDecimal): Unit = {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
} 
Example 51
Source File: KryoSerializerDistributedSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import com.esotericsoftware.kryo.Kryo

import org.apache.spark._
import org.apache.spark.internal.config
import org.apache.spark.serializer.KryoDistributedTest._
import org.apache.spark.util.Utils

class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContext {

  test("kryo objects are serialised consistently in different processes") {
    val conf = new SparkConf(false)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
      .set(config.MAX_TASK_FAILURES, 1)
      .set(config.BLACKLIST_ENABLED, false)

    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
    conf.setJars(List(jar.getPath))

    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
    val original = Thread.currentThread.getContextClassLoader
    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
    SparkEnv.get.serializer.setDefaultClassLoader(loader)

    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()

    // Randomly mix the keys so that the join below will require a shuffle with each partition
    // sending data to multiple other partitions.
    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}

    // Join the two RDDs, and force evaluation
    assert(shuffledRDD.join(cachedRDD).collect().size == 1)
  }
}

object KryoDistributedTest {
  class MyCustomClass

  class AppJarRegistrator extends KryoRegistrator {
    override def registerClasses(k: Kryo) {
      val classLoader = Thread.currentThread.getContextClassLoader
      // scalastyle:off classforname
      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
      // scalastyle:on classforname
    }
  }

  object AppJarRegistrator {
    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
  }
} 
Example 52
Source File: DataSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class DataSerializerTest extends FlatSpec with Matchers {

  behavior of "Data Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[justin.db.Data], DataSerializer)

    // object
    val vClock         = VectorClock[NodeId](Map(NodeId(1) -> Counter(3)))
    val timestamp      = System.currentTimeMillis()
    val serializedData = Data(id = UUID.randomUUID(), value = "some value", vClock, timestamp)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[Data])

    serializedData shouldBe deserializedData
  }
} 
Example 53
Source File: StorageNodeWriteDataLocalSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.actors.protocol.StorageNodeWriteDataLocal
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeWriteDataLocalSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeWriteDataLocal Serializer"

  it should "serialize/deserialize StorageNodeWriteDataLocal" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer)

    // object
    val data = Data(
      id        = UUID.randomUUID(),
      value     = "some value",
      vclock    = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))),
      timestamp = System.currentTimeMillis()
    )
    val serializedData = StorageNodeWriteDataLocal(data)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeWriteDataLocal])

    serializedData shouldBe deserializedData
  }
} 
Example 54
Source File: StorageNodeLocalReadSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.StorageNodeLocalRead
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeLocalReadSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeLocalReader Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeLocalRead], StorageNodeLocalReadSerializer)

    // object
    val serializedData = StorageNodeLocalRead(UUID.randomUUID())

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeLocalRead])

    serializedData shouldBe deserializedData
  }
} 
Example 55
Source File: RocksDBStorage.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData}
import org.rocksdb.{FlushOptions, Options, RocksDB}

import scala.concurrent.Future

// TODO:
// Current version store every single data under one file (totally doesn't care about data originality).
// Data should be eventually splitted by ring partitionId.
// This might be an issue during possible data movements between nodes.
final class RocksDBStorage(dir: File) extends PluggableStorageProtocol {
  import RocksDBStorage._

  {
    RocksDB.loadLibrary()
  }

  private[this] val kryo = new Kryo()

  private[this] val db: RocksDB = {
    val options: Options = new Options().setCreateIfMissing(true)
    RocksDB.open(options, dir.getPath)
  }

  override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = {
    val key: Array[Byte] = uuid2bytes(kryo, id)
    val dataBytes: Array[Byte] = db.get(key)

    val justinDataOpt = Option(dataBytes).map { dataBytes =>
      val input = new Input(new ByteArrayInputStream(dataBytes))
      JustinDataSerializer.read(kryo, input, classOf[JustinData])
    }

    Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None))
  }

  override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = {
    val key: Array[Byte] = uuid2bytes(kryo, data.id)
    val dataBytes: Array[Byte] = {
      val output = new Output(new ByteArrayOutputStream())
      JustinDataSerializer.write(kryo, output, data)
      output.getBuffer
    }

    db.put(key, dataBytes)
    db.flush(new FlushOptions().setWaitForFlush(true))

    Ack.future
  }
}

object RocksDBStorage {

  def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = {
    val output = new Output(new ByteArrayOutputStream(), 16)
    UUIDSerializer.write(kryo, output, id)
    output.getBuffer
  }

  object UUIDSerializer extends Serializer[UUID] {
    override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = {
      new UUID(input.readLong, input.readLong)
    }

    override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = {
      output.writeLong(uuid.getMostSignificantBits)
      output.writeLong(uuid.getLeastSignificantBits)
    }
  }

  object JustinDataSerializer extends Serializer[JustinData] {
    override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = {
      JustinData(
        id        = UUIDSerializer.read(kryo, input, classOf[UUID]),
        value     = input.readString(),
        vclock    = input.readString(),
        timestamp = input.readLong()
      )
    }

    override def write(kryo: Kryo, output: Output, data: JustinData): Unit = {
      UUIDSerializer.write(kryo, output, data.id)
      output.writeString(data.value)
      output.writeString(data.vclock)
      output.writeLong(data.timestamp)
    }
  }
} 
Example 56
Source File: JustinDataSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.storage.RocksDBStorage.JustinDataSerializer
import org.scalatest.{FlatSpec, Matchers}

class JustinDataSerializerTest extends FlatSpec with Matchers {

  behavior of "JustinDataSerializer"

  it should "serialize/deserialize JustinData with Kryo" in {
    val kryo = new Kryo()
    val data = JustinData(
      id        = UUID.randomUUID,
      value     = "to jest przykladowa wartość",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )

    // serialize
    val output = new Output(new ByteArrayOutputStream())
    JustinDataSerializer.write(kryo, output, data)
    val dataBytes = output.getBuffer

    // deserialize
    val input = new Input(new ByteArrayInputStream(dataBytes))
    JustinDataSerializer.read(kryo, input, classOf[JustinData]) shouldBe data
  }
} 
Example 57
Source File: UUIDSerializerTest.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.ByteArrayInputStream
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.Input
import justin.db.storage.RocksDBStorage.UUIDSerializer
import org.scalatest.{FlatSpec, Matchers}

class UUIDSerializerTest extends FlatSpec with Matchers {

  behavior of "UUIDSerializer"

  it should "serialize/deserialize UUID with Kryo" in {
    val uuid = UUID.randomUUID()
    val kryo = new Kryo()

    // serialize
    val bytes = RocksDBStorage.uuid2bytes(kryo, uuid)

    // deserialize
    val input = new Input(new ByteArrayInputStream(bytes))
    val id = UUIDSerializer.read(kryo, input, classOf[UUID])

    uuid shouldBe id
  }
} 
Example 58
Source File: WritableSerializer.scala    From spark-util   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.hadoop.kryo

import java.io.{ DataInputStream, DataOutputStream }

import com.esotericsoftware.kryo
import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer }
import org.apache.hadoop.io.Writable


class WritableSerializer[T <: Writable](ctorArgs: Any*)
  extends kryo.Serializer[T] {
  override def read(kryo: Kryo, input: Input, clz: Class[T]): T = {
    val t = clz.newInstance()
    t.readFields(new DataInputStream(input))
    t
  }

  override def write(kryo: Kryo, output: Output, t: T): Unit = {
    t.write(new DataOutputStream(output))
  }
} 
Example 59
Source File: SerializableSerializer.scala    From spark-util   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.hadoop.kryo

import java.io.{ ObjectInputStream, ObjectOutputStream }

import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer }


case class SerializableSerializer[T <: Serializable]()
  extends Serializer[T] {
  override def read(kryo: Kryo, input: Input, `type`: Class[T]): T =
    new ObjectInputStream(input)
      .readObject()
      .asInstanceOf[T]

  override def write(kryo: Kryo, output: Output, t: T): Unit =
    new ObjectOutputStream(output)
      .writeObject(t)
} 
Example 60
Source File: Serializer.scala    From spark-util   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.kryo

import com.esotericsoftware.kryo
import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{ Input, Output }

sealed trait Serializer[T] {
  def apply(k: Kryo): kryo.Serializer[T]
}

object Serializer {
  implicit class Kryo[T](serializer: kryo.Serializer[T]) extends Serializer[T] {
    override def apply(k: kryo.Kryo): kryo.Serializer[T] = serializer
  }
  implicit class Fn[T](fn: kryo.Kryo ⇒ kryo.Serializer[T]) extends Serializer[T] {
    override def apply(k: kryo.Kryo): kryo.Serializer[T] = fn(k)
  }

  def apply[T](r: (kryo.Kryo, Input) ⇒ T, w: (kryo.Kryo, Output, T) ⇒ Unit): Serializer[T] =
    new kryo.Serializer[T] {
      override def read(k: kryo.Kryo, input: Input, `type`: Class[T]): T = r(k, input)
      override def write(k: kryo.Kryo, output: Output, t: T): Unit = w(k, output, t)
    }

  def apply[T](fn: kryo.Kryo ⇒ kryo.Serializer[T]): Serializer[T] = fn
} 
Example 61
Source File: Registration.scala    From spark-util   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.kryo

import com.esotericsoftware.kryo
import com.esotericsoftware.kryo.Kryo
import org.apache.spark.serializer.KryoRegistrator
import org.hammerlab.kryo.spark.Registrator

import scala.reflect.ClassTag


  implicit def classNameWithImplicits(
    className: String
  ):
    ClassWithSerializerToRegister[_] =
    ClassWithSerializerToRegister(
      Class.forName(className),
      None,
      None
    )
} 
Example 62
Source File: SerializableSerializerTest.scala    From spark-util   with Apache License 2.0 5 votes vote down vote up
package org.hammerlab.hadoop.kryo

import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream }

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{ Input, Output }
import org.hammerlab.test.Suite

class SerializableSerializerTest
  extends Suite {
  test("serde") {
    val kryo = new Kryo()
    kryo.setRegistrationRequired(true)
    val baos = new ByteArrayOutputStream()
    val output = new Output(baos)

    val foo = new Foo
    foo.n = 123
    foo.s = "abc"

    intercept[IllegalArgumentException] {
      kryo.writeClassAndObject(output, foo)
    }
    .getMessage should startWith("Class is not registered: org.hammerlab.hadoop.kryo.Foo")

    kryo.register(classOf[Foo], SerializableSerializer[Foo]())

    kryo.writeClassAndObject(output, foo)

    output.close()

    val bytes = baos.toByteArray
    bytes.length should be(93)

    val bais = new ByteArrayInputStream(bytes)

    val input = new Input(bais)
    val after = kryo.readClassAndObject(input).asInstanceOf[Foo]

    after.n should be(foo.n)
    after.s should be(foo.s)
  }
}

class Foo
  extends Serializable {

  var n = 0
  var s = ""

  private def writeObject(out: ObjectOutputStream): Unit = {
    out.writeInt(n)
    out.writeUTF(s)
  }

  private def readObject(in: ObjectInputStream): Unit = {
    n = in.readInt()
    s = in.readUTF()
  }
} 
Example 63
Source File: L4-3ProtonFlux.scala    From prosparkstreaming   with Apache License 2.0 5 votes vote down vote up
package org.apress.prospark

import com.esotericsoftware.kryo.{KryoSerializable,Kryo}
import com.esotericsoftware.kryo.io.{Output, Input}

class ProtonFlux(
    var year: Int,
    var bin0_57to1_78: Double,
    var bin3_40to17_6: Double,
    var bin22_0to31_0: Double,
    var bin1_894to2_605: Double,
    var bin4_200to6_240: Double,
    var bin3_256to8_132: Double,
    var bin3_276to8_097: Double,
    var bin6_343to42_03: Double,
    var bin17_88to26_81: Double,
    var bin30_29to69_47: Double,
    var bin132_8to242_0: Double
  ) extends KryoSerializable {
  
  def this(year: String, bin0_57to1_78: String, bin3_40to17_6: String, 
      bin22_0to31_0: String, bin1_894to2_605: String, bin4_200to6_240: String, 
      bin3_256to8_132: String, bin3_276to8_097: String, bin6_343to42_03: String,
      bin17_88to26_81: String, bin30_29to69_47: String, bin132_8to242_0: String) {
    this(year.toInt, bin0_57to1_78.toDouble, bin3_40to17_6.toDouble,
        bin22_0to31_0.toDouble, bin1_894to2_605.toDouble, bin4_200to6_240.toDouble, 
        bin3_256to8_132.toDouble, bin3_276to8_097.toDouble, bin6_343to42_03.toDouble,
        bin17_88to26_81.toDouble, bin30_29to69_47.toDouble, bin132_8to242_0.toDouble)
  }
  
  def isSolarStorm = (bin0_57to1_78 > 1.0 || bin3_40to17_6 > 1.0 
    || bin22_0to31_0 > 1.0 || bin1_894to2_605 > 1.0 || bin4_200to6_240 > 1.0 
    || bin3_256to8_132 > 1.0 || bin3_276to8_097 > 1.0 || bin6_343to42_03 > 1.0
    || bin17_88to26_81 > 1.0 || bin30_29to69_47 > 1.0 || bin132_8to242_0 > 1.0)

  override def write(kryo: Kryo, output: Output) {
    output.writeInt(year)
    output.writeDouble(bin0_57to1_78)
    output.writeDouble(bin3_40to17_6)
    output.writeDouble(bin22_0to31_0)
    output.writeDouble(bin1_894to2_605)
    output.writeDouble(bin4_200to6_240)
    output.writeDouble(bin3_256to8_132)
    output.writeDouble(bin3_276to8_097)
    output.writeDouble(bin6_343to42_03)
    output.writeDouble(bin17_88to26_81)
    output.writeDouble(bin30_29to69_47)
    output.writeDouble(bin132_8to242_0)
  }

  override def read(kryo: Kryo, input: Input) {
    year = input.readInt()
    bin0_57to1_78 = input.readDouble()
    bin3_40to17_6 = input.readDouble()
    bin22_0to31_0 = input.readDouble()
    bin1_894to2_605 = input.readDouble()
    bin4_200to6_240 = input.readDouble()
    bin3_256to8_132 = input.readDouble()
    bin3_276to8_097 = input.readDouble()
    bin6_343to42_03 = input.readDouble()
    bin17_88to26_81 = input.readDouble()
    bin30_29to69_47 = input.readDouble()
    bin132_8to242_0 = input.readDouble()
  }

} 
Example 64
Source File: StormSerializationFramework.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.storm.util

import java.lang.{Integer => JInteger}
import java.util.{Map => JMap}

import akka.actor.ExtendedActorSystem
import backtype.storm.serialization.SerializationFactory
import backtype.storm.utils.ListDelegate
import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.experiments.storm.topology.GearpumpTuple
import org.apache.gearpump.experiments.storm.util.StormConstants._
import org.apache.gearpump.serializer.{SerializationFramework, Serializer}

class StormSerializationFramework extends SerializationFramework {
  private var stormConfig: JMap[AnyRef, AnyRef] = null
  private var pool: ThreadLocal[Serializer] = null

  override def init(system: ExtendedActorSystem, config: UserConfig): Unit = {
    implicit val actorSystem = system
    stormConfig = config.getValue[JMap[AnyRef, AnyRef]](STORM_CONFIG).get
    pool = new ThreadLocal[Serializer]() {
      override def initialValue(): Serializer = {
        val kryo = SerializationFactory.getKryo(stormConfig)
        new StormSerializer(kryo)
      }
    }
  }

  override def get(): Serializer = {
    pool.get()
  }
}


class StormSerializer(kryo: Kryo) extends Serializer {
  // -1 means the max buffer size is 2147483647
  private val output = new Output(4096, -1)
  private val input = new Input

  override def serialize(message: Any): Array[Byte] = {
    val tuple = message.asInstanceOf[GearpumpTuple]
    output.clear()
    output.writeInt(tuple.sourceTaskId)
    output.writeString(tuple.sourceStreamId)
    val listDelegate = new ListDelegate
    listDelegate.setDelegate(tuple.values)
    kryo.writeObject(output, listDelegate)
    output.toBytes
  }

  override def deserialize(msg: Array[Byte]): Any = {
    input.setBuffer(msg)
    val sourceTaskId: JInteger = input.readInt
    val sourceStreamId: String = input.readString
    val listDelegate = kryo.readObject[ListDelegate](input, classOf[ListDelegate])
    new GearpumpTuple(listDelegate.getDelegate, sourceTaskId, sourceStreamId, null)
  }
} 
Example 65
Source File: StormSerializerPoolSpec.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.experiments.storm.util

import java.util.{HashMap => JHashMap, List => JList, Map => JMap}
import scala.collection.JavaConverters._

import akka.actor.ExtendedActorSystem
import backtype.storm.utils.Utils
import com.esotericsoftware.kryo.Kryo
import org.scalacheck.Gen
import org.scalatest.mock.MockitoSugar
import org.scalatest.prop.PropertyChecks
import org.scalatest.{Matchers, PropSpec}

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.experiments.storm.topology.GearpumpTuple
import org.apache.gearpump.experiments.storm.util.StormConstants._
import org.apache.gearpump.streaming.MockUtil

class StormSerializerPoolSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar {

  property("StormSerializerPool should create and manage StormSerializer") {
    val taskContext = MockUtil.mockTaskContext
    val serializerPool = new StormSerializationFramework
    val system = taskContext.system.asInstanceOf[ExtendedActorSystem]
    implicit val actorSystem = system
    val stormConfig = Utils.readDefaultConfig.asInstanceOf[JMap[AnyRef, AnyRef]]
    val config = UserConfig.empty.withValue[JMap[AnyRef, AnyRef]](STORM_CONFIG, stormConfig)
    serializerPool.init(system, config)
    serializerPool.get shouldBe a[StormSerializer]
  }

  property("StormSerializer should serialize and deserialize GearpumpTuple") {
    val tupleGen = for {
      values <- Gen.listOf[String](Gen.alphaStr).map(_.asJava.asInstanceOf[JList[AnyRef]])
      sourceTaskId <- Gen.chooseNum[Int](0, Int.MaxValue)
      sourceStreamId <- Gen.alphaStr
    } yield new GearpumpTuple(values, new Integer(sourceTaskId), sourceStreamId, null)

    val kryo = new Kryo
    forAll(tupleGen) { (tuple: GearpumpTuple) =>
      val serializer = new StormSerializer(kryo)
      serializer.deserialize(serializer.serialize(tuple)) shouldBe tuple
    }
  }
} 
Example 66
Source File: KryoSerializerDistributedSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.serializer

import com.esotericsoftware.kryo.Kryo

import org.apache.spark._
import org.apache.spark.internal.config
import org.apache.spark.serializer.KryoDistributedTest._
import org.apache.spark.util.Utils

class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContext {

  test("kryo objects are serialised consistently in different processes") {
    val conf = new SparkConf(false)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("spark.kryo.registrator", classOf[AppJarRegistrator].getName)
      .set(config.MAX_TASK_FAILURES, 1)
      .set(config.BLACKLIST_ENABLED, false)

    val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
    conf.setJars(List(jar.getPath))

    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
    val original = Thread.currentThread.getContextClassLoader
    val loader = new java.net.URLClassLoader(Array(jar), Utils.getContextOrSparkClassLoader)
    SparkEnv.get.serializer.setDefaultClassLoader(loader)

    val cachedRDD = sc.parallelize((0 until 10).map((_, new MyCustomClass)), 3).cache()

    // Randomly mix the keys so that the join below will require a shuffle with each partition
    // sending data to multiple other partitions.
    val shuffledRDD = cachedRDD.map { case (i, o) => (i * i * i - 10 * i * i, o)}

    // Join the two RDDs, and force evaluation
    assert(shuffledRDD.join(cachedRDD).collect().size == 1)
  }
}

object KryoDistributedTest {
  class MyCustomClass

  class AppJarRegistrator extends KryoRegistrator {
    override def registerClasses(k: Kryo) {
      val classLoader = Thread.currentThread.getContextClassLoader
      // scalastyle:off classforname
      k.register(Class.forName(AppJarRegistrator.customClassName, true, classLoader))
      // scalastyle:on classforname
    }
  }

  object AppJarRegistrator {
    val customClassName = "KryoSerializerDistributedSuiteCustomClass"
  }
}