java.io.ObjectOutputStream Scala Examples
The following examples show how to use java.io.ObjectOutputStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: QueueInputDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 2
Source File: DStreamCheckpointData.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.streaming.Time import org.apache.spark.util.Utils private[streaming] class DStreamCheckpointData[T: ClassTag](dstream: DStream[T]) extends Serializable with Logging { protected val data = new HashMap[Time, AnyRef]() // Mapping of the batch time to the checkpointed RDD file of that time @transient private var timeToCheckpointFile = new HashMap[Time, String] // Mapping of the batch time to the time of the oldest checkpointed RDD // in that batch's checkpoint data @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time] @transient private var fileSystem: FileSystem = null protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]] def restore() { // Create RDDs from the checkpoint data currentCheckpointFiles.foreach { case(time, file) => logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'") dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file))) } } override def toString: String = { "[\n" + currentCheckpointFiles.size + " checkpoint files \n" + currentCheckpointFiles.mkString("\n") + "\n]" } @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".writeObject used") if (dstream.context.graph != null) { dstream.context.graph.synchronized { if (dstream.context.graph.checkpointInProgress) { oos.defaultWriteObject() } else { val msg = "Object of " + this.getClass.getName + " is being serialized " + " possibly as a part of closure of an RDD operation. This is because " + " the DStream object is being referred to from within the closure. " + " Please rewrite the RDD operation inside this DStream to avoid this. " + " This has been enforced to avoid bloating of Spark tasks " + " with unnecessary objects." throw new java.io.NotSerializableException(msg) } } } else { throw new java.io.NotSerializableException( "Graph is unexpectedly null when DStream is being serialized.") } } @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".readObject used") ois.defaultReadObject() timeToOldestCheckpointFileTime = new HashMap[Time, Time] timeToCheckpointFile = new HashMap[Time, String] } }
Example 3
Source File: SerializableConfiguration.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration private[spark] class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new Configuration(false) value.readFields(in) } }
Example 4
Source File: SerializableJobConf.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 5
Source File: SerializableBuffer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 6
Source File: CartesianRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[(T, U)](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 7
Source File: UnionRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport} import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 8
Source File: PartitionerAwareUnionRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.nonEmpty) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map { index => new PartitionerAwareUnionRDDPartition(rdds, index) }.toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 9
Source File: DnnStorage.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.tensor import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import com.intel.analytics.bigdl.mkl.Memory import com.intel.analytics.bigdl.nn.mkldnn.MemoryOwner import scala.reflect._ private[bigdl] class Pointer(val address: Long) object DnnStorage { private[tensor] val CACHE_LINE_SIZE = System.getProperty("bigdl.cache.line", "64").toInt private[bigdl] val FLOAT_BYTES: Int = 4 private[bigdl] val INT8_BYTES: Int = 1 private[bigdl] val INT_BYTES: Int = 4 import java.util.concurrent.ConcurrentHashMap private val nativeStorages: ConcurrentHashMap[Long, Boolean] = new ConcurrentHashMap() def checkAndSet(pointer: Long): Boolean = { nativeStorages.replace(pointer, false, true) } def add(key: Long): Unit = { nativeStorages.put(key, false) } def get(): Map[Long, Boolean] = { import scala.collection.JavaConverters._ nativeStorages.asScala.toMap } }
Example 10
Source File: JavaSerializationConverter.scala From scala-serialization with MIT License | 5 votes |
package com.komanov.serialization.converters import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.komanov.serialization.converters.IoUtils.using import com.komanov.serialization.domain.{Site, SiteEvent, SiteEventData} object JavaSerializationConverter extends MyConverter { override def toByteArray(site: Site): Array[Byte] = { using(new ByteArrayOutputStream()) { baos => using(new ObjectOutputStream(baos)) { os => os.writeObject(site) os.flush() baos.toByteArray } } } override def fromByteArray(bytes: Array[Byte]): Site = { using(new ByteArrayInputStream(bytes)) { bais => using(new ObjectInputStream(bais)) { os => os.readObject().asInstanceOf[Site] } } } override def toByteArray(event: SiteEvent): Array[Byte] = { using(new ByteArrayOutputStream()) { baos => using(new ObjectOutputStream(baos)) { os => os.writeObject(event) os.flush() baos.toByteArray } } } override def siteEventFromByteArray(clazz: Class[_], bytes: Array[Byte]): SiteEvent = { using(new ByteArrayInputStream(bytes)) { bais => using(new ObjectInputStream(bais)) { os => os.readObject().asInstanceOf[SiteEvent] } } } }
Example 11
Source File: TestableQueueInputDStream.scala From SparkUnitTestingExamples with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.dstream.InputDStream import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag class TestableQueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 12
Source File: File.scala From nescala with GNU General Public License v2.0 | 5 votes |
package com.owlandrews.nescala.helpers import com.owlandrews.nescala.Console object File { import java.io.File import java.net.URL import java.io.{FileFilter, FileInputStream, FileOutputStream, ObjectInputStream, ObjectOutputStream} import javax.imageio.ImageIO import scala.util.Try import scala.xml.XML import scala.language.postfixOps import sys.process._ import com.typesafe.config.ConfigFactory def Download(url: String, filename: String) = (for{ url <- Try(new URL(url)) conn <- Try(url.openConnection().connect()) file <- Try(new File(filename)) } yield Try(url #> file !!)) map {x => new File(filename)} def Writer(filename: String)(op: java.io.PrintWriter => Unit) = { val p = new java.io.PrintWriter(new File(filename)) try op(p) finally p.close() } def Write(filename: String, content: String) = { val res = new java.io.PrintWriter(new File(filename)) res.write(content) res.close() } def Filter = new FileFilter { override def accept(pathname: File): Boolean = pathname.getName.toLowerCase.endsWith(".nes") } def Image(file:Try[File]) = file.map(ImageIO.read) def Image(filename:String) = Try(ImageIO.read(resource(filename))) def Xml(filename:String) = XML.load(resource("/database.xml")) def Config(filename:String) = { val file = new File(filename) file.exists() match { case true => ConfigFactory.parseFile(file) case false => ConfigFactory.empty() } } def SaveState(console:Console) = { val fos = new FileOutputStream(s"$ApplicationFolder/${console.cartridge.CRC}.save") val oos = new ObjectOutputStream(fos) oos.writeObject(console) oos.close() } def LoadState(crc:String):Try[Console] = Try { val fis = new FileInputStream(s"$ApplicationFolder/$crc.save") val ois = new ObjectInputStreamWithCustomClassLoader(fis) val console = ois.readObject.asInstanceOf[Console] ois.close() console } // Taken from: https://gist.github.com/ramn/5566596 private class ObjectInputStreamWithCustomClassLoader(fileInputStream: FileInputStream) extends ObjectInputStream(fileInputStream) { override def resolveClass(desc: java.io.ObjectStreamClass): Class[_] = { try { Class.forName(desc.getName, false, getClass.getClassLoader) } catch { case ex: ClassNotFoundException => super.resolveClass(desc) } } } lazy val ApplicationFolder: File = { val settingDirectory = System.getProperty("user.home") + "/.nescala" val settings = new java.io.File(settingDirectory) if (!settings.exists()) settings.mkdir() settings } private def resource(filename:String) = getClass.getResourceAsStream(filename) }
Example 13
Source File: HBasePartitioner.scala From Backup-Repo with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.serializer.JavaSerializer import org.apache.spark.util.{CollectionsUtils, Utils} import org.apache.spark.{Partitioner, SparkEnv} object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (var splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail def numPartitions = if (len == 0) 1 else len @transient private val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 14
Source File: WriSer.scala From flint with Apache License 2.0 | 5 votes |
package com.twosigma.flint.hadoop import java.io.{ DataInputStream, DataOutputStream, ObjectInputStream, ObjectOutputStream } import java.io.IOException import scala.reflect.{ classTag, ClassTag } import org.apache.hadoop.io.Writable // Note: we could make this implement InputSplit, but we do not because many input splits do a // cast to their specific InputSplit, so we do not want to risk it. Further, this currently works // for any Writable. case class WriSer[T <: Writable: ClassTag](@transient var get: T) extends Serializable { def this() = this(null.asInstanceOf[T]) @throws(classOf[IOException]) private def writeObject(out: ObjectOutputStream) { out.writeObject(classTag[T]) get.write(new DataOutputStream(out)) } @throws(classOf[IOException]) @throws(classOf[ClassNotFoundException]) private def readObject(in: ObjectInputStream) { get = in.readObject.asInstanceOf[ClassTag[T]].runtimeClass.newInstance.asInstanceOf[T] get.readFields(new DataInputStream(in)) } }
Example 15
Source File: AggregatorTest.scala From noether with Apache License 2.0 | 5 votes |
package com.spotify.noether import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.twitter.algebird.Aggregator import org.scalatest._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers trait AggregatorTest extends AnyFlatSpec with Matchers { def run[A, B, C](aggregator: Aggregator[A, B, C])(as: Seq[A]): C = { val bs = as.map(aggregator.prepare _ compose ensureSerializable) val b = ensureSerializable(aggregator.reduce(bs)) ensureSerializable(aggregator.present(b)) } private def serializeToByteArray(value: Any): Array[Byte] = { val buffer = new ByteArrayOutputStream() val oos = new ObjectOutputStream(buffer) oos.writeObject(value) buffer.toByteArray } private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = { val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue)) ois.readObject() } private def ensureSerializable[T](value: T): T = deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T] }
Example 16
Source File: MapJoinPartitionsRDD.scala From spark-vlbfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils class MapJoinPartitionsPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s2IdxArr: Array[Int]) extends Partition { var s1 = rdd1.partitions(idx) var s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { s1 = rdd1.partitions(idx) s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) oos.defaultWriteObject() } } class MapJoinPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, var idxF: (Int) => Array[Int], var f: (Int, Iterator[A], Array[(Int, Iterator[B])]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B]) extends RDD[V](sc, Nil) { override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdd1.partitions.length) for (s1 <- rdd1.partitions) { val idx = s1.index array(idx) = new MapJoinPartitionsPartition(idx, rdd1, rdd2, idxF(idx)) } array } override def getDependencies: Seq[Dependency[_]] = List( new OneToOneDependency(rdd1), new NarrowDependency(rdd2) { override def getParents(partitionId: Int): Seq[Int] = { idxF(partitionId) } } ) override def getPreferredLocations(s: Partition): Seq[String] = { val fp = firstParent[A] // println(s"pref loc: ${fp.preferredLocations(fp.partitions(s.index))}") fp.preferredLocations(fp.partitions(s.index)) } override def compute(split: Partition, context: TaskContext): Iterator[V] = { val currSplit = split.asInstanceOf[MapJoinPartitionsPartition] f(currSplit.s1.index, rdd1.iterator(currSplit.s1, context), currSplit.s2Arr.map(s2 => (s2.index, rdd2.iterator(s2, context))) ) } override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null idxF = null f = null } }
Example 17
Source File: MapJoinPartitionsRDDV2.scala From spark-vlbfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import org.apache.spark.serializer.Serializer import org.apache.spark.{TaskContext, _} import org.apache.spark.util.Utils import scala.reflect.ClassTag class MapJoinPartitionsPartitionV2( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s2IdxArr: Array[Int]) extends Partition { var s1 = rdd1.partitions(idx) var s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { s1 = rdd1.partitions(idx) s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) oos.defaultWriteObject() } } class MapJoinPartitionsRDDV2[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, var idxF: (Int) => Array[Int], var f: (Int, Iterator[A], Array[(Int, Iterator[B])]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], preservesPartitioning: Boolean = false) extends RDD[V](sc, Nil) { var rdd2WithPid = rdd2.mapPartitionsWithIndex((pid, iter) => iter.map(x => (pid, x))) private val serializer: Serializer = SparkEnv.get.serializer override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdd1.partitions.length) for (s1 <- rdd1.partitions) { val idx = s1.index array(idx) = new MapJoinPartitionsPartitionV2(idx, rdd1, rdd2, idxF(idx)) } array } override def getDependencies: Seq[Dependency[_]] = List( new OneToOneDependency(rdd1), new ShuffleDependency[Int, B, B]( rdd2WithPid.asInstanceOf[RDD[_ <: Product2[Int, B]]], new IdentityPartitioner(rdd2WithPid.getNumPartitions), serializer) ) override def getPreferredLocations(s: Partition): Seq[String] = { val fp = firstParent[A] // println(s"pref loc: ${fp.preferredLocations(fp.partitions(s.index))}") fp.preferredLocations(fp.partitions(s.index)) } override def compute(split: Partition, context: TaskContext): Iterator[V] = { val currSplit = split.asInstanceOf[MapJoinPartitionsPartitionV2] val rdd2Dep = dependencies(1).asInstanceOf[ShuffleDependency[Int, Any, Any]] val rdd2PartIter = currSplit.s2Arr.map(s2 => (s2.index, SparkEnv.get.shuffleManager .getReader[Int, B](rdd2Dep.shuffleHandle, s2.index, s2.index + 1, context) .read().map(x => x._2) )) val rdd1Iter = rdd1.iterator(currSplit.s1, context) f(currSplit.s1.index, rdd1Iter, rdd2PartIter) } override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null rdd2WithPid = null idxF = null f = null } } private[spark] class IdentityPartitioner(val numParts: Int) extends Partitioner { require(numPartitions > 0) override def getPartition(key: Any): Int = key.asInstanceOf[Int] override def numPartitions: Int = numParts }
Example 18
Source File: SapSQLContextSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.io.{ByteArrayOutputStream, ObjectOutputStream} import org.apache.spark.sql.parser.SapParserException import org.apache.spark.util.DummyRelationUtils._ import org.mockito.Mockito import org.scalatest.FunSuite class SapSQLContextSuite extends FunSuite with GlobalSapSQLContext { test("SQL contexts do not support hive functions") { val rdd = sc.parallelize(Seq(Row("1"), Row("2"))) sqlc.createDataFrame(rdd, 'a.string, needsConversion = false) .registerTempTable("foo") intercept[AnalysisException] { sqlc.sql("SELECT int(a) FROM foo") } } test ("Check Spark Version"){ val sap_sqlc = sqlContext.asInstanceOf[CommonSapSQLContext] // current spark runtime version shall be supported sap_sqlc.checkSparkVersion(List(org.apache.spark.SPARK_VERSION)) // runtime exception for an unsupported version intercept[RuntimeException]{ sap_sqlc.checkSparkVersion(List("some.unsupported.version")) } } test("Slightly different versions") { val sap_sqlc = sqlContext.asInstanceOf[CommonSapSQLContext] val spy_sap_sqlc = Mockito.spy(sap_sqlc) Mockito.when(spy_sap_sqlc.getCurrentSparkVersion()) .thenReturn(org.apache.spark.SPARK_VERSION + "-CDH") // should not throw! spy_sap_sqlc.checkSparkVersion(spy_sap_sqlc.supportedVersions) Mockito.when(spy_sap_sqlc.getCurrentSparkVersion()) .thenReturn("something- " + org.apache.spark.SPARK_VERSION) // should not throw! spy_sap_sqlc.checkSparkVersion(spy_sap_sqlc.supportedVersions) } test("Ensure SapSQLContext stays serializable"){ // relevant for Bug 92818 // Remember that all class references in SapSQLContext must be serializable! val oos = new ObjectOutputStream(new ByteArrayOutputStream()) oos.writeObject(sqlContext) oos.close() } test("Rand function") { sqlContext.sql( s""" |CREATE TABLE test (name varchar(20), age integer) |USING com.sap.spark.dstest |OPTIONS ( |tableName "test" |) """.stripMargin) sqlContext.sql("SELECT * FROM test WHERE rand() < 0.1") } test("test version fields") { val sapSqlContext = sqlContext.asInstanceOf[CommonSapSQLContext] assert(sapSqlContext.EXTENSIONS_VERSION.isEmpty) assert(sapSqlContext.DATASOURCES_VERSION.isEmpty) } }
Example 19
Source File: SerializableSerializer.scala From spark-util with Apache License 2.0 | 5 votes |
package org.hammerlab.hadoop.kryo import java.io.{ ObjectInputStream, ObjectOutputStream } import com.esotericsoftware.kryo.io.{ Input, Output } import com.esotericsoftware.kryo.{ Kryo, Serializer } case class SerializableSerializer[T <: Serializable]() extends Serializer[T] { override def read(kryo: Kryo, input: Input, `type`: Class[T]): T = new ObjectInputStream(input) .readObject() .asInstanceOf[T] override def write(kryo: Kryo, output: Output, t: T): Unit = new ObjectOutputStream(output) .writeObject(t) }
Example 20
Source File: Configuration.scala From spark-util with Apache License 2.0 | 5 votes |
package org.hammerlab.hadoop import java.io.{ ObjectInputStream, ObjectOutputStream } import org.apache.hadoop.conf import org.apache.hadoop.conf.{ Configuration ⇒ HadoopConfiguration } import org.apache.spark.SparkContext import org.apache.spark.broadcast.Broadcast import org.hammerlab.hadoop.kryo.WritableSerializer import org.hammerlab.kryo._ class Configuration(@transient var value: HadoopConfiguration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = { value.write(out) } private def readObject(in: ObjectInputStream): Unit = { value = new HadoopConfiguration(false) value.readFields(in) } } object Configuration extends Registrar { def apply(loadDefaults: Boolean = true): Configuration = new HadoopConfiguration(loadDefaults) def apply(conf: HadoopConfiguration): Configuration = new Configuration(conf) implicit def wrapConfiguration(conf: HadoopConfiguration): Configuration = apply(conf) implicit def unwrapConfiguration(conf: Configuration): HadoopConfiguration = conf.value implicit def unwrapConfigurationBroadcast(confBroadcast: Broadcast[Configuration]): Configuration = confBroadcast.value implicit def sparkContextToHadoopConfiguration(sc: SparkContext): Configuration = sc.hadoopConfiguration implicit class Ops(val conf: HadoopConfiguration) extends AnyVal { def serializable: Configuration = conf } register( cls[conf.Configuration] → new WritableSerializer[conf.Configuration], cls[Configuration] → serializeAs[Configuration, conf.Configuration] ) }
Example 21
Source File: SerializableSerializerTest.scala From spark-util with Apache License 2.0 | 5 votes |
package org.hammerlab.hadoop.kryo import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream } import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{ Input, Output } import org.hammerlab.test.Suite class SerializableSerializerTest extends Suite { test("serde") { val kryo = new Kryo() kryo.setRegistrationRequired(true) val baos = new ByteArrayOutputStream() val output = new Output(baos) val foo = new Foo foo.n = 123 foo.s = "abc" intercept[IllegalArgumentException] { kryo.writeClassAndObject(output, foo) } .getMessage should startWith("Class is not registered: org.hammerlab.hadoop.kryo.Foo") kryo.register(classOf[Foo], SerializableSerializer[Foo]()) kryo.writeClassAndObject(output, foo) output.close() val bytes = baos.toByteArray bytes.length should be(93) val bais = new ByteArrayInputStream(bytes) val input = new Input(bais) val after = kryo.readClassAndObject(input).asInstanceOf[Foo] after.n should be(foo.n) after.s should be(foo.s) } } class Foo extends Serializable { var n = 0 var s = "" private def writeObject(out: ObjectOutputStream): Unit = { out.writeInt(n) out.writeUTF(s) } private def readObject(in: ObjectInputStream): Unit = { n = in.readInt() s = in.readUTF() } }
Example 22
Source File: DesignSerializationTest.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import DesignTest._ import wvlet.airspec.AirSpec object DesignSerializationTest { def serialize(d: Design): Array[Byte] = { val b = new ByteArrayOutputStream() val oo = new ObjectOutputStream(b) oo.writeObject(d) oo.close() b.toByteArray } def deserialize(b: Array[Byte]): Design = { val in = new ByteArrayInputStream(b) val oi = new ObjectInputStream(in) val obj = oi.readObject().asInstanceOf[Design] obj.asInstanceOf[Design] } } class DesignSerializationTest extends AirSpec { import DesignSerializationTest._ def `be serializable`: Unit = { val b = serialize(d1) val d1s = deserialize(b) d1s shouldBe (d1) } def `serialize instance binding`: Unit = { val d = Design.blanc.bind[Message].toInstance(Hello("world")) val b = serialize(d) val ds = deserialize(b) ds shouldBe (d) } }
Example 23
Source File: SerializationTest.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.log import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import wvlet.log.io.IOUtil object SerializationTest { trait A extends LogSupport { debug("new A") def hello = debug("hello") } } class SerializationTest extends Spec { import SerializationTest._ def `logger should be serializable`: Unit = { val a = new A {} val b = new ByteArrayOutputStream() IOUtil.withResource(new ObjectOutputStream(b)) { out => out.writeObject(a) } val ser = b.toByteArray IOUtil.withResource(new ObjectInputStream(new ByteArrayInputStream(ser))) { in => debug("deserialization") val a = in.readObject().asInstanceOf[A] a.hello } } }
Example 24
Source File: ObjectStreamUtil.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.testutil import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} object ObjectStreamUtil { def serializeAndDeserialize[T](value: T): T = { val outputStream = new ByteArrayOutputStream() val objectOutputStream = new ObjectOutputStream(outputStream) objectOutputStream.writeObject(value) val bytes = outputStream.toByteArray val objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytes)) objectInputStream.readObject().asInstanceOf[T] } }
Example 25
Source File: Inputs.scala From perf_tester with Apache License 2.0 | 5 votes |
package org.perftester.process import java.io.ObjectOutputStream sealed trait Inputs { def writeTo(outputStream: ObjectOutputStream) = outputStream.synchronized { outputStream.writeObject(this) } } case class Run(className: String, args: Seq[String]) extends Inputs case class ScalacGlobalConfig(id: String, outputDirectory: Option[String], classPath: Option[Seq[String]], otherParams: Option[List[String]], files: Option[List[String]]) extends Inputs { override def toString = s"ScalacGlobalConfig[$id] ${option("outputDirectory", outputDirectory)}${option( "classPath", classPath)}${option("otherParams", otherParams)}${option("files", files)}" def option(name: String, value: Option[Any]) = value match { case None => "" case Some(x) => s"$name=$x" } } case class ScalacRun(id: String) extends Inputs case class ScalacRetire(id: String) extends Inputs case object Gc extends Inputs case object Exit extends Inputs
Example 26
Source File: Rule.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.parser import jigg.nlp.ccg.lexicon.{Category, Derivation, Point, UnaryChildPoint, BinaryChildrenPoints, AppliedRule} import scala.collection.mutable.{HashMap, HashSet} import java.io.{ObjectOutputStream, ObjectInputStream} trait Rule { def unify(left:Category, right:Category): Option[Array[(Category, String)]] def raise(child:Category): Option[Array[(Category, String)]] def headFinder:HeadFinder } // rules are restricted to CFG rules extracted from the training CCGBank case class CFGRule(val binaryRules:Map[(Int,Int), Array[(Category, String)]], // category ids -> (category, ruleType) val unaryRules:Map[Int, Array[(Category, String)]], override val headFinder:HeadFinder) extends Rule { def unify(left:Category, right:Category):Option[Array[(Category, String)]] = binaryRules.get((left.id, right.id)) def raise(child:Category):Option[Array[(Category, String)]] = unaryRules.get(child.id) } object CFGRule { def extractRulesFromDerivations(derivations: Array[Derivation], headFinder:HeadFinder): CFGRule = { val binaryRules = new HashMap[(Int, Int), HashSet[(Category, String)]] val unaryRules = new HashMap[Int, HashSet[(Category, String)]] derivations.foreach { deriv => deriv.foreachPoint({ point:Point => deriv.get(point) match { case Some(AppliedRule(UnaryChildPoint(child), ruleType)) => val parents = unaryRules.getOrElseUpdate(child.category.id, new HashSet[(Category, String)]) parents += ((point.category, ruleType)) case Some(AppliedRule(BinaryChildrenPoints(left, right), ruleType)) => val parents = binaryRules.getOrElseUpdate((left.category.id, right.category.id), new HashSet[(Category, String)]) parents += ((point.category, ruleType)) case _ => }}) } new CFGRule(binaryRules.map { case (k, v) => k -> v.toArray }.toMap, unaryRules.map { case (k, v) => k -> v.toArray }.toMap, headFinder) } }
Example 27
Source File: HadoopCheckpointStoreFactory.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.hadoop import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.gearpump.streaming.hadoop.lib.HadoopUtil import org.apache.gearpump.streaming.hadoop.lib.rotation.{FileSizeRotation, Rotation} import org.apache.gearpump.streaming.transaction.api.{CheckpointStore, CheckpointStoreFactory} object HadoopCheckpointStoreFactory { val VERSION = 1 } class HadoopCheckpointStoreFactory( dir: String, @transient private var hadoopConfig: Configuration, rotation: Rotation = new FileSizeRotation(128 * Math.pow(2, 20).toLong)) extends CheckpointStoreFactory { import org.apache.gearpump.streaming.hadoop.HadoopCheckpointStoreFactory._ private def readObject(in: ObjectInputStream): Unit = { in.defaultReadObject() hadoopConfig = new Configuration(false) hadoopConfig.readFields(in) } override def getCheckpointStore(name: String): CheckpointStore = { val dirPath = new Path(dir + Path.SEPARATOR + s"v$VERSION", name) val fs = HadoopUtil.getFileSystemForPath(dirPath, hadoopConfig) new HadoopCheckpointStore(dirPath, fs, hadoopConfig, rotation) } }
Example 28
Source File: QueueInputDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 29
Source File: SerializableConfiguration.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration private[spark] class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new Configuration(false) value.readFields(in) } }
Example 30
Source File: SerializableJobConf.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 31
Source File: SerializableBuffer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 32
Source File: CartesianRDD.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[(T, U)](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 33
Source File: UnionRDD.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport} import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 34
Source File: PartitionerAwareUnionRDD.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.nonEmpty) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map { index => new PartitionerAwareUnionRDDPartition(rdds, index) }.toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 35
Source File: exercise10.scala From scala-for-the-Impatient with MIT License | 5 votes |
import collection.mutable.ArrayBuffer import java.io.{ObjectInputStream, FileOutputStream, FileInputStream, ObjectOutputStream} class Person(var name:String) extends Serializable{ val friends = new ArrayBuffer[Person]() def addFriend(friend : Person){ friends += friend } override def toString() = { var str = "My name is " + name + " and my friends name is " friends.foreach(str += _.name + ",") str } } object Test extends App{ val p1 = new Person("Ivan") val p2 = new Person("F2") val p3 = new Person("F3") p1.addFriend(p2) p1.addFriend(p3) println(p1) val out = new ObjectOutputStream(new FileOutputStream("person.obj")) out.writeObject(p1) out.close() val in = new ObjectInputStream(new FileInputStream("person.obj")) val p = in.readObject().asInstanceOf[Person] println(p) }
Example 36
Source File: FileUtils.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.utils import java.io.BufferedInputStream import java.io.BufferedOutputStream import java.io.File import java.io.FileInputStream import java.io.FileOutputStream import java.io.FilenameFilter import java.io.ObjectInputStream import java.io.ObjectOutputStream import java.io.PrintWriter import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser import scala.io.Source object FileUtils { def appendingPrintWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = true) def appendingPrintWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = true) def printWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = false) def printWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = false) // Output def newBufferedOutputStream(file: File): BufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file)) def newBufferedOutputStream(filename: String): BufferedOutputStream = newBufferedOutputStream(new File(filename)) def newAppendingBufferedOutputStream(file: File): BufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file, true)) def newAppendingBufferedOutputStream(filename: String): BufferedOutputStream = newAppendingBufferedOutputStream(new File(filename)) def newObjectOutputStream(filename: String): ObjectOutputStream = new ObjectOutputStream(newBufferedOutputStream(filename)) // Input def newBufferedInputStream(file: File): BufferedInputStream = new BufferedInputStream(new FileInputStream(file)) def newBufferedInputStream(filename: String): BufferedInputStream = newBufferedInputStream(new File(filename)) def newObjectInputStream(filename: String): ObjectInputStream = new ObjectInputStream(newBufferedInputStream(filename)) def findFiles(collectionDir: String, extension: String): Seq[File] = { val dir = new File(collectionDir) val filter = new FilenameFilter { def accept(dir: File, name: String): Boolean = name.endsWith(extension) } val result = Option(dir.listFiles(filter)) .getOrElse(throw Sourcer.newFileNotFoundException(collectionDir)) result } protected def getTextFromSource(source: Source): String = source.mkString def getTextFromFile(file: File): String = Sourcer.sourceFromFile(file).autoClose { source => getTextFromSource(source) } }
Example 37
Source File: TestObjSerialization.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.serialization.obj import java.io.ByteArrayOutputStream import java.io.ObjectOutputStream import org.clulab.odin.EventMention import org.clulab.odin.Mention import org.clulab.odin.TextBoundMention import org.clulab.wm.eidos.EidosSystem import org.clulab.wm.eidos.test.TestUtils.Test import org.clulab.wm.eidos.utils.Closer.AutoCloser import org.clulab.wm.eidos.utils.FileUtils class TestSerialization extends Test { val config = this.defaultConfig val reader = new EidosSystem(config) def serialize(original: Any, index: Int): Unit = { val copy = (new ByteArrayOutputStream()).autoClose { streamOut => (new ObjectOutputStream(streamOut)).autoClose { encoder => encoder.writeObject(original) } val bytes = streamOut.toByteArray FileUtils.load[Any](bytes, this) } if (original.isInstanceOf[Mention]) require(original == copy) } behavior of "Standard Serializer" it should "serialize and deserialize mentions" in { val text = "Water trucking has decreased due to the cost of fuel last week." // "last week" added for time //val text = "Food shortages cause hunger." val annotatedDocument = reader.extractFromText(text) val mentionsOut = annotatedDocument.odinMentions mentionsOut.foreach { case eventMention: EventMention => var index = 0 // test serialize(eventMention.labels, index) index += 1 serialize(eventMention.tokenInterval, index) index += 1 serialize(eventMention.trigger, index) index += 1 serialize(eventMention.arguments, index) index += 1 serialize(eventMention.paths, index) index += 1 serialize(eventMention.sentence, index) index += 1 serialize(eventMention.document, index) index += 1 serialize(eventMention.keep, index) index += 1 serialize(eventMention.foundBy, index) eventMention.attachments.foreach { attachment => serialize(attachment, index); index += 1 } serialize(eventMention.attachments, index) index += 1 serialize(eventMention, index) index += 1 case textBoundMention: TextBoundMention => var index = 0 // test textBoundMention.attachments.foreach { attachment => serialize(attachment, index); index += 1 } serialize(textBoundMention.attachments, index) index += 1 val hash = textBoundMention.hashCode val mention2 = textBoundMention.copy() val hash2 = mention2.hashCode serialize(mention2, index) index += 1 } } }
Example 38
Source File: SerializableBuffer.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 39
Source File: CartesianRDD.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient rdd1: RDD[_], @transient rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[Pair[T, U]](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.size override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.size * rdd2.partitions.size) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext) = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 40
Source File: UnionRDD.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient rdd: RDD[T], val parentRddIndex: Int, @transient parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations() = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.size).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.size) pos += rdd.partitions.size } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 41
Source File: PartitionerAwareUnionRDD.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.length > 0) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map(index => { new PartitionerAwareUnionRDDPartition(rdds, index) }).toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => { val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 42
Source File: NotSerializableFakeTask.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{ObjectInputStream, ObjectOutputStream, IOException} import org.apache.spark.TaskContext private[spark] class NotSerializableFakeTask(myId: Int, stageId: Int) extends Task[Array[Byte]](stageId, 0) { override def runTask(context: TaskContext): Array[Byte] = Array.empty[Byte] override def preferredLocations: Seq[TaskLocation] = Seq[TaskLocation]() @throws(classOf[IOException]) private def writeObject(out: ObjectOutputStream): Unit = { if (stageId == 0) { throw new IllegalStateException("Cannot serialize") } } @throws(classOf[IOException]) private def readObject(in: ObjectInputStream): Unit = {} }
Example 43
Source File: SerializableUtils.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic.test import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} object SerializableUtils { private def serializeToByteArray(value: Serializable): Array[Byte] = { val buffer = new ByteArrayOutputStream() val oos = new ObjectOutputStream(buffer) oos.writeObject(value) buffer.toByteArray } private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = { val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue)) ois.readObject() } def ensureSerializable[T <: Serializable](value: T): T = deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T] }
Example 44
Source File: SerializableUtils.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic.test import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} object SerializableUtils { private def serializeToByteArray(value: Serializable): Array[Byte] = { val buffer = new ByteArrayOutputStream() val oos = new ObjectOutputStream(buffer) oos.writeObject(value) buffer.toByteArray } private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = { val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue)) ois.readObject() } def ensureSerializable[T <: Serializable](value: T): T = deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T] }
Example 45
Source File: GenericReader.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic import java.io.{InputStream, ObjectInputStream, ObjectOutputStream, OutputStream} import java.nio.ByteBuffer import java.util.{ArrayList => JArrayList, LinkedHashMap => JLinkedHashMap, TreeMap => JTreeMap} import com.google.protobuf.Descriptors.FieldDescriptor.Type import com.google.protobuf.{CodedInputStream, WireFormat} import scala.collection.JavaConverters._ object GenericReader { def of(schema: Schema): GenericReader = new GenericReader(schema) } class GenericReader(val schema: Schema) extends Serializable { def read(buf: Array[Byte]): GenericRecord = read(CodedInputStream.newInstance(buf), schema.root) def read(buf: ByteBuffer): GenericRecord = read(CodedInputStream.newInstance(buf), schema.root) def read(input: InputStream): GenericRecord = read(CodedInputStream.newInstance(input), schema.root) private def read(input: CodedInputStream, messageSchema: MessageSchema): GenericRecord = { val map = new JTreeMap[java.lang.Integer, Any]() while (!input.isAtEnd) { val tag = input.readTag() val id = WireFormat.getTagFieldNumber(tag) val field = messageSchema.fields(id) if (field.label == Label.REPEATED) { if (!map.containsKey(id)) { map.put(id, new JArrayList[Any]()) } val list = map.get(id).asInstanceOf[java.util.ArrayList[Any]] if (field.packed) { val bytesIn = CodedInputStream.newInstance(input.readByteBuffer()) while (!bytesIn.isAtEnd) { list.add(readValue(bytesIn, field)) } } else { list.add(readValue(input, field)) } } else { map.put(id, readValue(input, field)) } } val result = new JLinkedHashMap[String, Any]() map.asScala.foreach(kv => result.put(messageSchema.fields(kv._1).name, kv._2)) messageSchema.fields.valuesIterator.foreach { f => if (f.default.isDefined && !result.containsKey(f.name)) { result.put(f.name, f.default.get) } } result } private def readValue(in: CodedInputStream, field: Field): Any = field.`type` match { case Type.FLOAT => in.readFloat() case Type.DOUBLE => in.readDouble() case Type.FIXED32 => in.readFixed32() case Type.FIXED64 => in.readFixed64() case Type.INT32 => in.readInt32() case Type.INT64 => in.readInt64() case Type.UINT32 => in.readUInt32() case Type.UINT64 => in.readUInt64() case Type.SFIXED32 => in.readSFixed32() case Type.SFIXED64 => in.readSFixed64() case Type.SINT32 => in.readSInt32() case Type.SINT64 => in.readSInt64() case Type.BOOL => in.readBool() case Type.STRING => in.readString() case Type.BYTES => Base64.encode(in.readByteArray()) case Type.ENUM => schema.enums(field.schema.get).values(in.readEnum()) case Type.MESSAGE => val nestedIn = CodedInputStream.newInstance(in.readByteBuffer()) read(nestedIn, schema.messages(field.schema.get)) case Type.GROUP => throw new IllegalArgumentException("Unsupported type: GROUP") } private def readObject(in: ObjectInputStream): Unit = { val schema = Schema.fromJson(in.readUTF()) val schemaField = getClass.getDeclaredField("schema") schemaField.setAccessible(true) schemaField.set(this, schema) } private def writeObject(out: ObjectOutputStream): Unit = out.writeUTF(schema.toJson) }
Example 46
Source File: TemplateSpec.scala From cluster-broccoli with Apache License 2.0 | 5 votes |
package de.frosner.broccoli.models import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import org.specs2.mutable.Specification import play.api.libs.json.Json import Template.{templateApiWrites, templatePersistenceReads} class TemplateSpec extends Specification { "A template" should { "extract only parameters specified in the parameters" in { Template("test", "Hallo {{id}}. I like {{person_name}}.", "desc", Map("id" -> ParameterInfo("id", None, None, None, ParameterType.Raw, None))).parameters === Set("id") } "not automatically extract parameters from a template" in { Template("test", "Hallo {{id}}, how is {{object}}", "desc", Map.empty).parameters === Set.empty } "create the template version correctly in" in { Template("test", "template JSON", "desc", Map.empty).version === "889df4c8118c30a28ed4f51674a0f19d" } "result in different template versions if the template JSON differs" in { Template("test", "template JSON", "desc", Map.empty).version !== Template("test", "template JSONs", "desc", Map.empty).version } "result in different template versions if the template parameter info differs" in { Template( id = "test", template = "template JSON {{id}}", description = "desc", parameterInfos = Map.empty ).version !== Template( id = "test", template = "template JSON {{id}}", description = "desc", parameterInfos = Map( "id" -> ParameterInfo("id", None, None, secret = Some(false), `type` = ParameterType.String, orderIndex = None) ) ).version } } "Template serialization" should { "work correctly" in { val originalTemplate = Template("test", "Hallo {{name}}", "desc", Map.empty) val bos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(bos) oos.writeObject(originalTemplate) oos.close() val ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray)) val deserializedTemplate = ois.readObject() ois.close() originalTemplate === deserializedTemplate } } "Template back-end JSON serialization" should { "work" in { val template = Template( id = "t", template = "{{id}}", description = "d", parameterInfos = Map.empty ) Json .fromJson(Json.toJson(template)(Template.templatePersistenceWrites))(Template.templatePersistenceReads) .get === template } } }
Example 47
Source File: JavaSerializationBenchmark.scala From scala-commons with MIT License | 5 votes |
package com.avsystem.commons package rpc.akka.serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Fork, Measurement, Mode, Scope, State, Warmup} import org.openjdk.jmh.infra.Blackhole @Warmup(iterations = 5) @Measurement(iterations = 20) @Fork(1) @BenchmarkMode(Array(Mode.Throughput)) @State(Scope.Thread) class JavaSerializationBenchmark { val something = Something(42, Nested(4 :: 8 :: 15 :: 16 :: 23 :: 42 :: Nil, 0), "lol") val array = { val baos = new ByteArrayOutputStream() val o = new ObjectOutputStream(baos) o.writeObject(something) o.close() baos.toByteArray } @Benchmark def byteStringOutput(): Something = { val baos = new ByteArrayOutputStream() val o = new ObjectOutputStream(baos) o.writeObject(something) o.close() val array = baos.toByteArray new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something] } @Benchmark def writeTest(): Array[Byte] = { val baos = new ByteArrayOutputStream() val o = new ObjectOutputStream(baos) o.writeObject(something) o.close() baos.toByteArray } @Benchmark def readTest(): Something = { new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something] } }
Example 48
Source File: MessageSerializationSuite.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.pubnub import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import java.io.ObjectInputStream import java.io.ObjectOutputStream import com.google.gson.JsonParser import com.pubnub.api.models.consumer.pubsub.PNMessageResult import org.apache.spark.SparkFunSuite class MessageSerializationSuite extends SparkFunSuite { test("Full example") { checkMessageSerialization( "{\"message\":\"Hello, World!\"}", "channel1", "publisher1", "subscription1", System.currentTimeMillis * 10000 ) } test("Message from channel") { checkMessageSerialization("{\"message\":\"Hello, World!\"}", "c", "p", null, 13534398158620385L) } test("Message from subscription") { checkMessageSerialization("{\"message\":\"Hello, World!\"}", null, "p", "s", 13534397812467596L) } def checkMessageSerialization(payload: String, channel: String, publisher: String, subscription: String, timestamp: Long): Unit = { val builder = PNMessageResult.builder .message(if (payload != null) new JsonParser().parse(payload) else null) .channel(channel) .publisher(publisher) .subscription(subscription) .timetoken(timestamp) val pubNubMessage = builder.build() val sparkMessage = new SparkPubNubMessage sparkMessage.message = pubNubMessage // serializer val byteOutStream = new ByteArrayOutputStream val outputStream = new ObjectOutputStream(byteOutStream) outputStream.writeObject(sparkMessage) outputStream.flush() outputStream.close() byteOutStream.close() val serializedBytes = byteOutStream.toByteArray // deserialize val byteInStream = new ByteArrayInputStream(serializedBytes) val inputStream = new ObjectInputStream(byteInStream) val deserializedMessage = inputStream.readObject().asInstanceOf[SparkPubNubMessage] inputStream.close() byteInStream.close() assert(payload.equals(deserializedMessage.getPayload)) if (channel != null) { assert(channel.equals(deserializedMessage.getChannel)) } else { assert(deserializedMessage.getChannel == null) } if (subscription != null) { assert(subscription.equals(deserializedMessage.getSubscription)) } else { assert(deserializedMessage.getSubscription == null) } assert(publisher.equals(deserializedMessage.getPublisher)) val unixTimestamp = Math.ceil(timestamp / 10000).longValue() assert(unixTimestamp.equals(deserializedMessage.getTimestamp)) } }
Example 49
Source File: TestSerializationAndLazy.scala From incubator-daffodil with Apache License 2.0 | 5 votes |
package org.apache.daffodil.util import org.junit.Assert._ import java.io.ByteArrayOutputStream import java.io.ObjectOutputStream import java.io.ByteArrayInputStream import java.io.ObjectInputStream import org.junit.Test class ToSerialize extends Serializable { val v = 5 var lazyValWasEvaluated = false lazy val x = { // println("v is " + v) lazyValWasEvaluated = true 2 * v } } class TestSerializationAndLazy { @Test def testSerializeBeforeLazyEval(): Unit = { val instance = new ToSerialize val baos = new ByteArrayOutputStream val stream = new ObjectOutputStream(baos) stream.writeObject(instance) stream.flush() stream.close() assertFalse(instance.lazyValWasEvaluated) val ba = baos.toByteArray() val bais = new ByteArrayInputStream(ba) val istream = new ObjectInputStream(bais) val restoredInstance = istream.readObject() istream.close() assertTrue(restoredInstance.isInstanceOf[ToSerialize]) val ts = restoredInstance.asInstanceOf[ToSerialize] assertFalse(ts.lazyValWasEvaluated) ts.x assertTrue(ts.lazyValWasEvaluated) } }
Example 50
Source File: TMNodesWriter.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp.annotators.btm import java.io.{ByteArrayOutputStream, ObjectOutputStream} import com.johnsnowlabs.storage.{RocksDBConnection, StorageBatchWriter} class TMNodesWriter( override protected val connection: RocksDBConnection ) extends StorageBatchWriter[TrieNode] { def toBytes(content: TrieNode): Array[Byte] = { val stream: ByteArrayOutputStream = new ByteArrayOutputStream() val oos = new ObjectOutputStream(stream) oos.writeObject(content) oos.close() stream.toByteArray } def add(word: Int, value: TrieNode): Unit = { super.add(word.toString, value) } override protected def writeBufferSize: Int = 10000 }
Example 51
Source File: SerializableHadoopConfiguration.scala From hail with MIT License | 5 votes |
package is.hail.utils import java.io.{ObjectInputStream, ObjectOutputStream, Serializable} import org.apache.hadoop class SerializableHadoopConfiguration(@transient var value: hadoop.conf.Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream) { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream) { value = new hadoop.conf.Configuration(false) value.readFields(in) } }
Example 52
Source File: SpillingCollectIterator.scala From hail with MIT License | 5 votes |
package is.hail.utils import java.io.{ObjectInputStream, ObjectOutputStream} import is.hail.backend.spark.SparkBackend import is.hail.expr.ir.ExecuteContext import is.hail.io.fs.FS import org.apache.spark.rdd.RDD import scala.reflect.ClassTag import scala.reflect.classTag object SpillingCollectIterator { def apply[T: ClassTag](localTmpdir: String, fs: FS, rdd: RDD[T], sizeLimit: Int): SpillingCollectIterator[T] = { val nPartitions = rdd.partitions.length val x = new SpillingCollectIterator(localTmpdir, fs, nPartitions, sizeLimit) val ctc = classTag[T] SparkBackend.sparkContext("SpillingCollectIterator.apply").runJob( rdd, (_, it: Iterator[T]) => it.toArray(ctc), 0 until nPartitions, x.append _) x } } class SpillingCollectIterator[T: ClassTag] private (localTmpdir: String, fs: FS, nPartitions: Int, sizeLimit: Int) extends Iterator[T] { private[this] val files: Array[(String, Long)] = new Array(nPartitions) private[this] val buf: Array[Array[T]] = new Array(nPartitions) private[this] var _size: Long = 0L private[this] var i: Int = 0 private[this] var it: Iterator[T] = null private def append(partition: Int, a: Array[T]): Unit = synchronized { assert(buf(partition) == null) buf(partition) = a _size += a.length if (_size > sizeLimit) { val file = ExecuteContext.createTmpPathNoCleanup(localTmpdir, s"spilling-collect-iterator-$partition") using(fs.createNoCompression(file)) { os => var k = 0 while (k < buf.length) { val vals = buf(k) if (vals != null) { buf(k) = null val pos = os.getPosition val oos = new ObjectOutputStream(os) oos.writeInt(vals.length) var j = 0 while (j < vals.length) { oos.writeObject(vals(j)) j += 1 } files(k) = (file, pos) oos.flush() } k += 1 } } _size = 0 } } def hasNext: Boolean = { if (it == null || !it.hasNext) { if (i >= files.length) { it = null return false } else if (files(i) == null) { assert(buf(i) != null) it = buf(i).iterator buf(i) = null } else { val (filename, pos) = files(i) using(fs.openNoCompression(filename)) { is => is.seek(pos) using(new ObjectInputStream(is)) { ois => val length = ois.readInt() val arr = new Array[T](length) var j = 0 while (j < length) { arr(j) = ois.readObject().asInstanceOf[T] j += 1 } it = arr.iterator } } } i += 1 } it.hasNext } def next: T = { hasNext it.next } }
Example 53
Source File: QueueInputDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 54
Source File: SerializableConfiguration.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration private[spark] class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new Configuration(false) value.readFields(in) } }
Example 55
Source File: SerializableJobConf.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 56
Source File: SerializableBuffer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 57
Source File: CartesianRDD.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[(T, U)](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 58
Source File: UnionRDD.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport} import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 59
Source File: PartitionerAwareUnionRDD.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.nonEmpty) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map { index => new PartitionerAwareUnionRDDPartition(rdds, index) }.toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 60
Source File: MqttConfig.scala From akka-iot-mqtt-v2 with GNU Lesser General Public License v3.0 | 5 votes |
package akkaiot import scala.concurrent.duration._ import java.io.Serializable import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import java.io.ObjectInputStream import java.io.ObjectOutputStream import com.sandinh.paho.akka._ import com.sandinh.paho.akka.MqttPubSub._ object MqttConfig { val topic = "akka-iot-mqtt-topic" // Pub-Sub config val psConfig = PSConfig( brokerUrl = "tcp://test.mosquitto.org:1883", userName = null, password = null, stashTimeToLive = 1.minute, stashCapacity = 8000, reconnectDelayMin = 10.millis, reconnectDelayMax = 30.seconds, cleanSession = false ) // Serialize object to byte array def writeToByteArray(obj: Any): Array[Byte] = { val baos = new ByteArrayOutputStream val oos = new ObjectOutputStream(baos) try { oos.writeObject(obj) baos.toByteArray } finally { try { oos.close } catch { case _: Throwable => // Do nothing } } } // Deserialize object from byte array def readFromByteArray[A](bytes: Array[Byte]): A = { val bais = new ByteArrayInputStream(bytes) val ois = new ObjectInputStream(bais) try { val obj = ois.readObject obj.asInstanceOf[A] } finally { try { ois.close } catch { case _: Throwable => // Do nothing } } } }
Example 61
Source File: QueueInputDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassTag]( @transient ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() if (oneAtATime && queue.size > 0) { buffer += queue.dequeue() } else { buffer ++= queue.dequeueAll(_ => true) } if (buffer.size > 0) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(ssc.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { None } } }
Example 62
Source File: SerializableBuffer.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 63
Source File: CartesianRDD.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient rdd1: RDD[_], @transient rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[Pair[T, U]](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 64
Source File: UnionRDD.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient rdd: RDD[T], val parentRddIndex: Int, @transient parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.length).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 65
Source File: PartitionerAwareUnionRDD.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.length > 0) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map(index => { new PartitionerAwareUnionRDDPartition(rdds, index) }).toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => { val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 66
Source File: Serialization.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.commons.serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} trait Serialization { def deserialize[T](bytes: Array[Byte]): T = { val bufferIn = new ByteArrayInputStream(bytes) val streamIn = new ObjectInputStream(bufferIn) try { streamIn.readObject().asInstanceOf[T] } finally { streamIn.close() } } def serialize[T](objectToSerialize: T): Array[Byte] = { val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream() val oos = new ObjectOutputStream(byteArrayOutputStream) try { oos.writeObject(objectToSerialize) oos.flush() byteArrayOutputStream.toByteArray } finally { oos.close() } } def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj)) } object Serialization extends Serialization
Example 67
Source File: JavaSerde.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.core.serde import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream} import akka.actor.ExtendedActorSystem import akka.serialization.JavaSerializer import akka.util.ClassLoaderObjectInputStream class JavaSerde(system: ExtendedActorSystem) extends Serde[AnyRef] { override def identifier: Int = 101 override def close(): Unit = () override def fromBytes(bytes: Array[Byte]): AnyRef = { val in = new ClassLoaderObjectInputStream(system.dynamicAccess.classLoader, new ByteArrayInputStream(bytes)) val obj = JavaSerializer.currentSystem.withValue(system) { in.readObject } in.close() obj } override def toBytes(o: AnyRef): Array[Byte] = { val bos = new ByteArrayOutputStream val out = new ObjectOutputStream(bos) JavaSerializer.currentSystem.withValue(system) { out.writeObject(o) } out.close() bos.toByteArray } }
Example 68
Source File: QueueInputDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassTag]( @transient ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() if (oneAtATime && queue.size > 0) { buffer += queue.dequeue() } else { buffer ++= queue.dequeueAll(_ => true) } if (buffer.size > 0) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(ssc.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { None } } }
Example 69
Source File: SerializableConfiguration.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration private[spark] class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new Configuration(false) value.readFields(in) } }
Example 70
Source File: SerializableJobConf.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 71
Source File: SerializableBuffer.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() //ByteBuffer.allocate在能够读和写之前,必须有一个缓冲区,用静态方法 allocate() 来分配缓冲区 buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 72
Source File: CartesianRDD.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient rdd1: RDD[_], @transient rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[Pair[T, U]](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split 创建交叉产品拆分 val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 73
Source File: UnionRDD.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient rdd: RDD[T], val parentRddIndex: Int, @transient parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization //在任务序列化时更新对父拆分的引用 parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.length).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 74
Source File: Github415.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.{FileOutputStream, ObjectOutputStream} import com.sksamuel.avro4s.Encoder import com.sksamuel.avro4s.github.Github415.PlaybackSession import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.must.Matchers class Github415 extends AnyFunSuite with Matchers { test("github 415") { val fileOut = new FileOutputStream("remove_me") val out = new ObjectOutputStream(fileOut) out.writeObject(Encoder[PlaybackSession]) } } object Github415 { object Rebuffers { case class Metrics(count: Int) case class EarlyLate(early: Metrics) case class Stats(session: Option[EarlyLate]) } case class Rebuffers(network: Option[Rebuffers.Stats]) case class PlaybackSession(rebuffers: Option[Rebuffers]) }
Example 75
Source File: GithubIssue485.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.sksamuel.avro4s.record.decoder.CPWrapper import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper} import org.apache.avro.generic.GenericData import org.apache.avro.util.Utf8 import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import shapeless.Coproduct class GithubIssue485 extends AnyFunSuite with Matchers { test("Serializable Coproduct Decoder #485") { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(Decoder[CPWrapper]) oos.close() val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)).readObject().asInstanceOf[Decoder[CPWrapper]] val schema = AvroSchema[CPWrapper] val record = new GenericData.Record(schema) record.put("u", new Utf8("wibble")) decoder.decode(record) shouldBe CPWrapper(Coproduct[CPWrapper.ISBG]("wibble")) } }
Example 76
Source File: GithubIssue484.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.sksamuel.avro4s.record.decoder.ScalaEnumClass import com.sksamuel.avro4s.schema.Colours import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper} import org.apache.avro.generic.GenericData import org.apache.avro.generic.GenericData.EnumSymbol import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers class GithubIssue484 extends AnyFunSuite with Matchers { test("Serializable Scala Enum Decoder #484") { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(Decoder[ScalaEnumClass]) oos.close() val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)) .readObject() .asInstanceOf[Decoder[ScalaEnumClass]] val schema = AvroSchema[ScalaEnumClass] val record = new GenericData.Record(schema) record.put("colour", new EnumSymbol(schema.getField("colour").schema(), "Green")) decoder.decode(record) shouldBe ScalaEnumClass(Colours.Green) } }
Example 77
Source File: GithubIssue432.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.sksamuel.avro4s.Encoder import org.scalatest.{FunSuite, Matchers} class GithubIssue432 extends FunSuite with Matchers { test("Serializable Encoder[BigDecimal] #432") { val oos = new ObjectOutputStream(new ByteArrayOutputStream()) oos.writeObject(Encoder.bigDecimalEncoder) oos.close() } test("Deserialized Encoder[BigDecimal] works") { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(Encoder.bigDecimalEncoder) oos.close() val ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)) val encoder = ois.readObject().asInstanceOf[Encoder[BigDecimal]] encoder.encode(12.34) } }
Example 78
Source File: JsDataSpec.scala From mist with Apache License 2.0 | 5 votes |
package mist.api.data import java.io.{ByteArrayOutputStream, ObjectOutputStream} import java.util import mist.api.encoding.defaultEncoders._ import mist.api.encoding.JsSyntax._ import org.scalatest._ import org.scalatest.prop.TableDrivenPropertyChecks._ class JsDataSpec extends FunSpec with Matchers { import java.{lang => jl, util => ju} val rawToData = Table( ("raw", "data"), (1, JsNumber(1)), ("str", JsString("str")), (1.2, JsNumber(1.2)), (List(1, 2), JsList(Seq(JsNumber(1), JsNumber(2)))), (Array(1, 2), JsList(Seq(JsNumber(1), JsNumber(2)))), (Map("key" -> "value"), JsMap(Map("key" -> JsString("value")))) ) val javaMap: ju.Map[String, jl.Integer] = { val m = new ju.HashMap[String, jl.Integer](1) m.put("test", new jl.Integer(42)) m } val javaRawToData = Table( ("raw", "data"), (new jl.Integer(42), JsNumber(42)), (new jl.Double(42.0), JsNumber(42.0)), (ju.Arrays.asList(new jl.Integer(42)), JsList(Seq(JsNumber(42)))), (javaMap, JsMap(Map("test"-> JsNumber(42)))) ) it("should parse raw any structure") { forAll(rawToData) { (raw: Any, jsLike: JsData) => JsData.fromScala(raw) shouldBe jsLike } } it("should parse raw any java structure") { forAll(javaRawToData){ (raw: Any, jsLike: JsData) => JsData.fromJava(raw) shouldBe jsLike } } describe("JsLikeMap") { // problem with MapLike - akka can't serialize it // scala.collection.immutable.MapLike$$anon$2 // java.io.NotSerializableException: scala.collection.immutable.MapLike$$anon$2 it("JsLikeMap should be serializable") { val map = Map("1" -> 1, "2" -> 2).mapValues(i => JsNumber(i)) val jslikeMap = JsMap(map) val bos = new ByteArrayOutputStream val out = new ObjectOutputStream(bos) out.writeObject(jslikeMap) out.close() } } it("should return untyped map") { val js = JsMap( "a" -> 1.js, "b" -> false.js, "c" -> JsList(Seq( JsMap("x" -> "y".js) )) ) val exp = Map( "a" -> 1, "b" -> false, "c" -> Seq( Map("x" -> "y") ) ) JsData.untyped(js) shouldBe exp } }
Example 79
Source File: QueueInputDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 80
Source File: SerializableConfiguration.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration private[spark] class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new Configuration(false) value.readFields(in) } }
Example 81
Source File: SerializableJobConf.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 82
Source File: SerializableBuffer.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 83
Source File: CartesianRDD.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[(T, U)](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 84
Source File: UnionRDD.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.ForkJoinTaskSupport import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 85
Source File: PartitionerAwareUnionRDD.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.nonEmpty) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map { index => new PartitionerAwareUnionRDDPartition(rdds, index) }.toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 86
Source File: Estimator.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.typeclasses import java.io.{FileOutputStream, ObjectOutputStream} // evidence needs to be serializable because it is persisted along with the actual // estimators within the io.picnicml.doddlemodel.pipeline.Pipeline trait Estimator[A] extends Serializable { def isFitted(model: A): Boolean def save(model: A, filePath: String): Unit = { val outputStream = new ObjectOutputStream(new FileOutputStream(filePath)) outputStream.writeObject(model) outputStream.close() } }
Example 87
Source File: Terminator.scala From ingraph with Eclipse Public License 1.0 | 5 votes |
package ingraph.ire.messages import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import akka.actor.ActorRef import ingraph.ire.datatypes.Tuple import ingraph.ire.util.AtomicUniqueCounter import scala.collection.mutable import scala.concurrent.{Future, Promise} class Terminator private(terminatorID: Int, val inputs: Iterable[ReteMessage => Unit], production: ActorRef) extends ReteMessage with Serializable { var lastMessageID = -1 def send(): Future[Iterable[Tuple]] = { val messageID = Terminator.idCounter.getNext lastMessageID = messageID val promise = Promise[Iterable[Tuple]] production ! ExpectTerminator(terminatorID, messageID, promise) val future = promise.future inputs.foreach(input => { input(Pause(messageID)) input(TerminatorMessage(terminatorID, messageID)) }) future } def resend(): Future[Iterable[Tuple]] = { val promise = Promise[Iterable[Tuple]] production ! ExpectTerminator(terminatorID, lastMessageID, promise) val future = promise.future inputs.foreach(input => { input(TerminatorMessage(terminatorID, lastMessageID)) }) future } @throws(classOf[IOException]) private def writeObject(out: ObjectOutputStream): Unit = {} @throws(classOf[IOException]) private def readObject(in: ObjectInputStream): Unit = {} } object Terminator { val idCounter = new AtomicUniqueCounter def apply(inputs: Iterable[ReteMessage => Unit], productionNode: ActorRef): Terminator = { val id = idCounter.getNext productionNode ! ExpectMoreTerminators(id, inputs) new Terminator(id, inputs, productionNode) } } trait TerminatorHandler { val expectedTerminatorCount: Int val terminatorCount = new mutable.HashMap[Int, Int] def forward(terminator: TerminatorMessage) def handleTerminator(terminator: TerminatorMessage): Unit = { val count = terminatorCount.getOrElse(terminator.messageID, 0) + 1 if (count >= expectedTerminatorCount) { forward(terminator) terminatorCount -= terminator.messageID } terminatorCount(terminator.messageID) = count } }
Example 88
Source File: QueueInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() if (oneAtATime && queue.size > 0) { buffer += queue.dequeue() } else { buffer ++= queue.dequeueAll(_ => true) } if (buffer.size > 0) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 89
Source File: SerializableConfiguration.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration private[spark] class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new Configuration(false) value.readFields(in) } }
Example 90
Source File: SerializableJobConf.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 91
Source File: SerializableBuffer.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 92
Source File: CartesianRDD.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[Pair[T, U]](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 93
Source File: UnionRDD.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.length).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 94
Source File: PartitionerAwareUnionRDD.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.length > 0) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map(index => { new PartitionerAwareUnionRDDPartition(rdds, index) }).toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => { val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 95
Source File: SerializationTestHelper.scala From xmlconfect with Apache License 2.0 | 5 votes |
package com.mthaler.xmlconfect import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream } object SerializationTestHelper { def serializeDeserialize[T](obj: T): T = { val bout = new ByteArrayOutputStream() val out = new ObjectOutputStream(bout) out.writeObject(obj) val bin = new ByteArrayInputStream(bout.toByteArray) val in = new ObjectInputStream(bin) in.readObject().asInstanceOf[T] } }
Example 96
Source File: SerializableConfiguration.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.hiveacid.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration private[hiveacid] class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Util.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Util.tryOrIOException { value = new Configuration(false) value.readFields(in) } }
Example 97
Source File: DefineMacroCmd.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op.cmd import java.io.{ObjectInputStream, ByteArrayInputStream, ObjectOutputStream, ByteArrayOutputStream} import dbis.piglet.plan.DataflowPlan import scala.collection.mutable.ListBuffer import dbis.piglet.op.{Pipe,PigOperator} case class DefineMacroCmd( out: Pipe, macroName: String, params: Option[List[String]], stmts: List[PigOperator] ) extends PigOperator(out) { var subPlan: Option[DataflowPlan] = None var inPipes = List[Pipe]() def deepClone(): DefineMacroCmd = { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(this) val bais = new ByteArrayInputStream(baos.toByteArray()) val ois = new ObjectInputStream(bais) ois.readObject().asInstanceOf[DefineMacroCmd] } override def preparePlan: Unit = { def pipeParamPositions(): List[Int] = { val l = ListBuffer[Int]() inPipes.foreach(i => { val pos = params.get.indexOf(i.name.substring(1)) if (pos >= 0) l += pos }) l.toList } }
Example 98
Source File: SerializableConfiguration.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.datasources import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration import org.apache.yetus.audience.InterfaceAudience; import scala.util.control.NonFatal @InterfaceAudience.Private class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = tryOrIOException { value = new Configuration(false) value.readFields(in) } def tryOrIOException(block: => Unit) { try { block } catch { case e: IOException => throw e case NonFatal(t) => throw new IOException(t) } } }
Example 99
Source File: GBDTModel.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.tree.gbdt import java.io.{FileInputStream, FileOutputStream, ObjectInputStream, ObjectOutputStream} import com.tencent.angel.sona.tree.gbdt.tree.{GBDTParam, GBTNode} import com.tencent.angel.sona.tree.regression.RegTree import org.apache.spark.ml.linalg.Vector import scala.collection.mutable.ArrayBuffer object GBDTModel { type GBTTree = RegTree[GBTNode] def save(model: GBDTModel, path: String): Unit = { val oos = new ObjectOutputStream(new FileOutputStream(path)) oos.writeObject(model) oos.close() } def load(path: String): GBDTModel = { val ois = new ObjectInputStream(new FileInputStream(path)) ois.readObject().asInstanceOf[GBDTModel] } } import GBDTModel._ class GBDTModel(val param: GBDTParam) extends Serializable { private var forest: ArrayBuffer[GBTTree] = ArrayBuffer[GBTTree]() private var weights: ArrayBuffer[Float] = ArrayBuffer[Float]() def predict(instance: Vector): Array[Float] = { if (param.isRegression || param.numClass == 2) { var pred = 0.0f for (i <- forest.indices) pred += weights(i) * forest(i).predictBinary(instance) Array(pred) } else if (param.multiTree) { val preds = Array.ofDim[Float](param.numClass) for (i <- forest.indices) preds(i % param.numClass) += weights(i) * forest(i).predictBinary(instance) preds } else { val preds = Array.ofDim[Float](param.numClass) for (i <- forest.indices) { val p = forest(i).predictMulti(instance) val w = weights(i) for (k <- 0 until param.numClass) preds(k) += w * p(k) } preds } } def predict(instances: Array[Vector]): Array[Array[Float]] = { instances.map(predict) } def get(treeId: Int): GBTTree = forest(treeId) def add(tree: GBTTree, weight: Float): Unit = { forest += tree weights += weight } def keepFirstTrees(num: Int): Unit = { forest = forest.slice(0, num) weights = weights.slice(0, num) } def numTree: Int = forest.size }
Example 100
Source File: SerializableConfiguration.scala From glow with Apache License 2.0 | 5 votes |
package io.projectglow.sql.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.conf.Configuration class SerializableConfiguration(@transient var value: Configuration) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = { value = new Configuration(false) value.readFields(in) } }
Example 101
Source File: ModelSerializationTestHelper.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha import java.io.{ObjectInputStream, ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream} trait ModelSerializationTestHelper { def serializeDeserializeRoundTrip[A <: java.io.Serializable](a: A): A = { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(a) val bais = new ByteArrayInputStream(baos.toByteArray) val ois = new ObjectInputStream(bais) val out = ois.readObject() out.asInstanceOf[A] } }
Example 102
Source File: TensorFlowModel.scala From model-serving-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.modelserving.model.tensorflow import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import com.lightbend.model.modeldescriptor.ModelDescriptor import com.lightbend.modelserving.model.Model import org.tensorflow.{Graph, Session} override def getType: ModelDescriptor.ModelType = ModelDescriptor.ModelType.TENSORFLOW override def equals(obj: Any): Boolean = { obj match { case tfModel: TensorFlowModel[RECORD,RESULT] => tfModel.toBytes.toList == inputStream.toList case _ => false } } private def writeObject(output: ObjectOutputStream): Unit = { val start = System.currentTimeMillis() output.writeObject(bytes) println(s"TensorFlow java serialization in ${System.currentTimeMillis() - start} ms") } private def readObject(input: ObjectInputStream): Unit = { val start = System.currentTimeMillis() bytes = input.readObject().asInstanceOf[Array[Byte]] try{ graph = new Graph graph.importGraphDef(bytes) session = new Session(graph) println(s"TensorFlow java deserialization in ${System.currentTimeMillis() - start} ms") } catch { case t: Throwable => t.printStackTrace println(s"TensorFlow java deserialization failed in ${System.currentTimeMillis() - start} ms") println(s"Restored TensorFlow ${new String(bytes)}") } } }
Example 103
Source File: Serialization.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.commons.serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} trait Serialization { def deserialize[T](bytes: Array[Byte]): T = { val bufferIn = new ByteArrayInputStream(bytes) val streamIn = new ObjectInputStream(bufferIn) try { streamIn.readObject().asInstanceOf[T] } finally { streamIn.close() } } def serialize[T](objectToSerialize: T): Array[Byte] = { val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream() val oos = new ObjectOutputStream(byteArrayOutputStream) try { oos.writeObject(objectToSerialize) oos.flush() byteArrayOutputStream.toByteArray } finally { oos.close() } } def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj)) } object Serialization extends Serialization
Example 104
Source File: IncrementalCache.scala From sbt-idea-plugin with Apache License 2.0 | 5 votes |
package org.jetbrains.sbtidea.packaging.artifact import java.io.{BufferedOutputStream, ByteArrayInputStream, ObjectInputStream, ObjectOutputStream} import java.nio.file.{Files, Path} import sbt.Keys.TaskStreams import scala.collection.mutable trait IncrementalCache extends AutoCloseable { def fileChanged(in: Path): Boolean } class DumbIncrementalCache extends IncrementalCache { override def fileChanged(in: Path): Boolean = true override def close(): Unit = () } class PersistentIncrementalCache(private val root: Path)(implicit private val streams: TaskStreams) extends IncrementalCache { private val FILENAME = "sbtidea.cache" private val myFile = root.resolve(FILENAME) private val myData = loadOrCreate() type Data = mutable.HashMap[String, Long] private def loadFromDisk(): Either[String, Data] = { if (!Files.exists(myFile) || Files.size(myFile) <= 0) return Left("Cache file is empty or doesn't exist") val data = Files.readAllBytes(myFile) using(new ObjectInputStream(new ByteArrayInputStream(data))) { stream => Right(stream.readObject().asInstanceOf[Data]) } } private def loadOrCreate(): Data = loadFromDisk() match { case Left(message) => streams.log.info(message) new Data() case Right(value) => value } private def saveToDisk(): Unit = { import java.nio.file.StandardOpenOption._ if (!Files.exists(myFile.getParent)) { Files.createDirectories(myFile.getParent) Files.createFile(myFile) } using(new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream(myFile, CREATE, WRITE, TRUNCATE_EXISTING)))) { stream => stream.writeObject(myData) } } override def close(): Unit = saveToDisk() override def fileChanged(in: Path): Boolean = { val newTimestamp = Files.getLastModifiedTime(in).toMillis val inStr = in.toString val lastTimestamp = myData.getOrElseUpdate(inStr, newTimestamp) val result = newTimestamp > lastTimestamp myData.put(inStr, newTimestamp) result } }