org.apache.spark.serializer.SerializerInstance Scala Examples
The following examples show how to use org.apache.spark.serializer.SerializerInstance.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: TaskDescription.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.nio.ByteBuffer import scala.collection.mutable import scala.collection.mutable.HashSet import scala.util.control.NonFatal import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.serializer.SerializerInstance import org.apache.spark.util.SerializableBuffer private[spark] class TaskDescription( val taskId: Long, val attemptNumber: Int, val executorId: String, val name: String, val index: Int, // Index within this task's TaskSet val isFutureTask: Boolean, @transient private val _task: Task[_], @transient private val _addedFiles: mutable.Map[String, Long], @transient private val _addedJars: mutable.Map[String, Long], @transient private val _ser: SerializerInstance) extends Serializable with Logging { // Because ByteBuffers are not serializable, wrap the task in a SerializableBuffer private var buffer: SerializableBuffer = _ def prepareSerializedTask(): Unit = { if (_task != null) { val serializedTask: ByteBuffer = try { Task.serializeWithDependencies(_task, _addedFiles, _addedJars, _ser) } catch { // If the task cannot be serialized, then there is not point in re-attempting // the task as it will always fail. So just abort the task set. case NonFatal(e) => val msg = s"Failed to serialize the task $taskId, not attempting to retry it." logError(msg, e) // FIXME(shivaram): We dont have a handle to the taskSet here to abort it. throw new TaskNotSerializableException(e) } if (serializedTask.limit > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024) { logWarning(s"Stage ${_task.stageId} contains a task of very large size " + s"(${serializedTask.limit / 1024} KB). The maximum recommended task size is " + s"${TaskSetManager.TASK_SIZE_TO_WARN_KB} KB.") } buffer = new SerializableBuffer(serializedTask) } else { buffer = new SerializableBuffer(ByteBuffer.allocate(0)) } } def serializedTask: ByteBuffer = buffer.value override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index) }
Example 2
Source File: TaskResult.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkEnv import org.apache.spark.serializer.SerializerInstance import org.apache.spark.storage.BlockId import org.apache.spark.util.{AccumulatorV2, Utils} // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(resultSer: SerializerInstance = null): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value val ser = if (resultSer == null) SparkEnv.get.serializer.newInstance() else resultSer valueObject = ser.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 3
Source File: MatfastSerializer.scala From MatRel with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.matfast.util import java.math.BigDecimal import java.nio.ByteBuffer import java.util.{HashMap => JavaHashMap} import scala.reflect.ClassTag import com.esotericsoftware.kryo.{Kryo, Serializer} import com.esotericsoftware.kryo.io.{Input, Output} import com.twitter.chill.ResourcePool import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.serializer.{KryoSerializer, SerializerInstance} import org.apache.spark.sql.matfast.matrix._ import org.apache.spark.sql.types.Decimal import org.apache.spark.util.MutablePair private[matfast] class MatfastSerializer(conf: SparkConf) extends KryoSerializer(conf) { override def newKryo(): Kryo = { val kryo = super.newKryo() kryo.setRegistrationRequired(false) kryo.register(classOf[MutablePair[_, _]]) kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow]) kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow]) kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer) kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer) kryo.register(classOf[Decimal]) kryo.register(classOf[JavaHashMap[_, _]]) kryo.register(classOf[DenseMatrix]) kryo.register(classOf[SparseMatrix]) kryo.setReferences(false) kryo } } private[matfast] class KryoResourcePool(size: Int) extends ResourcePool[SerializerInstance](size) { val ser: MatfastSerializer = { val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf()) new MatfastSerializer(sparkConf) } def newInstance(): SerializerInstance = ser.newInstance() } private[matfast] object MatfastSerializer { @transient lazy val resourcePool = new KryoResourcePool(50) private[this] def acquireRelease[O](fn: SerializerInstance => O): O = { val kryo = resourcePool.borrow() try { fn(kryo) } finally { resourcePool.release(kryo) } } def serialize[T: ClassTag](o: T): Array[Byte] = { acquireRelease { k => k.serialize(o).array() } } def deserialize[T: ClassTag](bytes: Array[Byte]): T = acquireRelease { k => k.deserialize[T](ByteBuffer.wrap(bytes)) } } private[matfast] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] { def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) { output.writeString(bd.toString) } def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = { new java.math.BigDecimal(input.readString()) } } private[matfast] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] { def write(kryo: Kryo, output: Output, bd: BigDecimal): Unit = { output.writeString(bd.toString) } def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = { new java.math.BigDecimal(input.readString()) } }
Example 4
Source File: SerializerFactory.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import java.nio.ByteBuffer import org.apache.spark.serializer.SerializerInstance import org.apache.spark.serializer.DeserializationStream import org.apache.spark.serializer.SerializationStream import java.io.OutputStream import java.io.InputStream import scala.reflect.ClassTag import com.fasterxml.jackson.databind.ObjectMapper import org.apache.spark.SparkConf import org.apache.spark.serializer.JavaSerializer import org.apache.spark.serializer.KryoSerializer object SerializerFactory { val DEFAULT = new SerializerFactory { override def getSerializerInstance(serializerName: String): SerializerInstance = { serializerName.toLowerCase() match { case "kryo" ⇒ new KryoSerializer(new SparkConf()).newInstance(); case "java" ⇒ new JavaSerializer(new SparkConf()).newInstance(); case _ ⇒ throw new InvalidSerializerNameException(serializerName); } } } } trait SerializerFactory { def getSerializerInstance(serializerName: String): SerializerInstance; }
Example 5
Source File: RemoteShuffleUtils.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.remote import java.util.UUID import org.apache.hadoop.fs.Path import org.apache.spark.SparkEnv import org.apache.spark.executor.ShuffleWriteMetrics import org.apache.spark.serializer.{SerializerInstance, SerializerManager} import org.apache.spark.shuffle.ShuffleWriteMetricsReporter import org.apache.spark.storage.{BlockId, TempLocalBlockId, TempShuffleBlockId} object RemoteShuffleUtils { val env = SparkEnv.get def getRemoteWriter( blockId: BlockId, file: Path, serializerManager: SerializerManager, serializerInstance: SerializerInstance, bufferSize: Int, writeMetrics: ShuffleWriteMetricsReporter): RemoteBlockObjectWriter = { val syncWrites = false // env.blockManager.conf.getBoolean("spark.shuffle.sync", false) new RemoteBlockObjectWriter(file, serializerManager, serializerInstance, bufferSize, syncWrites, writeMetrics, blockId) } }
Example 6
Source File: TaskResult.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkEnv import org.apache.spark.serializer.SerializerInstance import org.apache.spark.storage.BlockId import org.apache.spark.util.{AccumulatorV2, Utils} // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(resultSer: SerializerInstance = null): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value val ser = if (resultSer == null) SparkEnv.get.serializer.newInstance() else resultSer valueObject = ser.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 7
Source File: TaskResult.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkEnv import org.apache.spark.serializer.SerializerInstance import org.apache.spark.storage.BlockId import org.apache.spark.util.{AccumulatorV2, Utils} // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(resultSer: SerializerInstance = null): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value val ser = if (resultSer == null) SparkEnv.get(user).serializer.newInstance() else resultSer valueObject = ser.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 8
Source File: TaskResult.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkEnv import org.apache.spark.serializer.SerializerInstance import org.apache.spark.storage.BlockId import org.apache.spark.util.{AccumulatorV2, Utils} // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(resultSer: SerializerInstance = null): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value val ser = if (resultSer == null) SparkEnv.get.serializer.newInstance() else resultSer valueObject = ser.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 9
Source File: SparkSqlSerializer.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import java.nio.ByteBuffer import java.util.{HashMap => JavaHashMap} import scala.reflect.ClassTag import com.esotericsoftware.kryo.io.{Input, Output} import com.esotericsoftware.kryo.{Kryo, Serializer} import com.twitter.chill.ResourcePool import org.apache.spark.serializer.{KryoSerializer, SerializerInstance} import org.apache.spark.sql.types.{Decimal, StructField, StructType} import org.apache.spark.util.MutablePair import org.apache.spark.{SparkConf, SparkEnv} //private[sql] class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) { override def newKryo(): Kryo = { val kryo = super.newKryo() kryo.setRegistrationRequired(false) kryo.register(classOf[MutablePair[_, _]]) kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow]) kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow]) kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericMutableRow]) kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer) kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer) kryo.register(classOf[Decimal]) kryo.register(classOf[JavaHashMap[_, _]]) // APS kryo.register(classOf[StructType]) kryo.register(classOf[StructField]) kryo.setReferences(false) kryo } } private[execution] class KryoResourcePool(size: Int) extends ResourcePool[SerializerInstance](size) { val ser: SparkSqlSerializer = { val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf()) new SparkSqlSerializer(sparkConf) } def newInstance(): SerializerInstance = ser.newInstance() } //private[sql] object SparkSqlSerializer { @transient lazy val resourcePool = new KryoResourcePool(30) private[this] def acquireRelease[O](fn: SerializerInstance => O): O = { val kryo = resourcePool.borrow try { fn(kryo) } finally { resourcePool.release(kryo) } } def serialize[T: ClassTag](o: T): Array[Byte] = acquireRelease { k => k.serialize(o).array() } def deserialize[T: ClassTag](bytes: Array[Byte]): T = acquireRelease { k => k.deserialize[T](ByteBuffer.wrap(bytes)) } } private[sql] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] { def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) { // TODO: There are probably more efficient representations than strings... output.writeString(bd.toString) } def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = { new java.math.BigDecimal(input.readString()) } } private[sql] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] { def write(kryo: Kryo, output: Output, bd: BigDecimal) { // TODO: There are probably more efficient representations than strings... output.writeString(bd.toString) } def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = { new java.math.BigDecimal(input.readString()) } }