org.apache.spark.network.buffer.ManagedBuffer Scala Examples
The following examples show how to use org.apache.spark.network.buffer.ManagedBuffer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BlockTransferService.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.network import java.io.Closeable import java.nio.ByteBuffer import scala.concurrent.{Future, Promise} import scala.concurrent.duration.Duration import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient} import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.util.ThreadUtils private[spark] abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel, classTag: ClassTag[_]): Unit = { val future = uploadBlock(hostname, port, execId, blockId, blockData, level, classTag) ThreadUtils.awaitResult(future, Duration.Inf) } }
Example 2
Source File: NettyBlockRpcServer.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import org.apache.spark.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel is serialized as bytes using our JavaSerializer. val level: StorageLevel = serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata)) val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level) responseContext.onSuccess(ByteBuffer.allocate(0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 3
Source File: BlockTransferService.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.network import java.io.Closeable import java.nio.ByteBuffer import scala.concurrent.{Promise, Await, Future} import scala.concurrent.duration.Duration import org.apache.spark.Logging import org.apache.spark.network.buffer.{NioManagedBuffer, ManagedBuffer} import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener} import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel} private[spark] abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel): Unit = { Await.result(uploadBlock(hostname, port, execId, blockId, blockData, level), Duration.Inf) } }
Example 4
Source File: NettyStreamManager.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.rpc.netty import java.io.File import java.util.concurrent.ConcurrentHashMap import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.server.StreamManager import org.apache.spark.rpc.RpcEnvFileServer import org.apache.spark.util.Utils private[netty] class NettyStreamManager(rpcEnv: NettyRpcEnv) extends StreamManager with RpcEnvFileServer { private val files = new ConcurrentHashMap[String, File]() private val jars = new ConcurrentHashMap[String, File]() override def getChunk(streamId: Long, chunkIndex: Int): ManagedBuffer = { throw new UnsupportedOperationException() } override def openStream(streamId: String): ManagedBuffer = { val Array(ftype, fname) = streamId.stripPrefix("/").split("/", 2) val file = ftype match { case "files" => files.get(fname) case "jars" => jars.get(fname) case _ => throw new IllegalArgumentException(s"Invalid file type: $ftype") } require(file != null && file.isFile(), s"File not found: $streamId") new FileSegmentManagedBuffer(rpcEnv.transportConf, file, 0, file.length()) } override def addFile(file: File): String = { require(files.putIfAbsent(file.getName(), file) == null, s"File ${file.getName()} already registered.") s"${rpcEnv.address.toSparkURL}/files/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addJar(file: File): String = { require(jars.putIfAbsent(file.getName(), file) == null, s"JAR ${file.getName()} already registered.") s"${rpcEnv.address.toSparkURL}/jars/${Utils.encodeFileNameToURIRawPath(file.getName())}" } }
Example 5
Source File: BlockManagerManagedBuffer.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import java.io.InputStream import java.nio.ByteBuffer import java.util.concurrent.atomic.AtomicInteger import org.apache.spark.network.buffer.ManagedBuffer private[storage] class BlockManagerManagedBuffer( blockInfoManager: BlockInfoManager, blockId: BlockId, data: BlockData, dispose: Boolean) extends ManagedBuffer { private val refCount = new AtomicInteger(1) override def size(): Long = data.size override def nioByteBuffer(): ByteBuffer = data.toByteBuffer() override def createInputStream(): InputStream = data.toInputStream() override def convertToNetty(): Object = data.toNetty() override def retain(): ManagedBuffer = { refCount.incrementAndGet() val locked = blockInfoManager.lockForReading(blockId, blocking = false) assert(locked.isDefined) this } override def release(): ManagedBuffer = { blockInfoManager.unlock(blockId) if (refCount.decrementAndGet() == 0 && dispose) { data.dispose() } this } }
Example 6
Source File: BlockTransferService.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.network import java.io.Closeable import java.nio.ByteBuffer import scala.concurrent.{Future, Promise} import scala.concurrent.duration.Duration import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempFileManager} import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.util.ThreadUtils private[spark] abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel, classTag: ClassTag[_]): Unit = { val future = uploadBlock(hostname, port, execId, blockId, blockData, level, classTag) ThreadUtils.awaitResult(future, Duration.Inf) } }
Example 7
Source File: NettyStreamManager.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.rpc.netty import java.io.File import java.util.concurrent.ConcurrentHashMap import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.server.StreamManager import org.apache.spark.rpc.RpcEnvFileServer import org.apache.spark.util.Utils private[netty] class NettyStreamManager(rpcEnv: NettyRpcEnv) extends StreamManager with RpcEnvFileServer { private val files = new ConcurrentHashMap[String, File]() private val jars = new ConcurrentHashMap[String, File]() private val dirs = new ConcurrentHashMap[String, File]() override def getChunk(streamId: Long, chunkIndex: Int): ManagedBuffer = { throw new UnsupportedOperationException() } override def openStream(streamId: String): ManagedBuffer = { val Array(ftype, fname) = streamId.stripPrefix("/").split("/", 2) val file = ftype match { case "files" => files.get(fname) case "jars" => jars.get(fname) case other => val dir = dirs.get(ftype) require(dir != null, s"Invalid stream URI: $ftype not found.") new File(dir, fname) } if (file != null && file.isFile()) { new FileSegmentManagedBuffer(rpcEnv.transportConf, file, 0, file.length()) } else { null } } override def addFile(file: File): String = { val existingPath = files.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/files/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addJar(file: File): String = { val existingPath = jars.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/jars/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addDirectory(baseUri: String, path: File): String = { val fixedBaseUri = validateDirectoryUri(baseUri) require(dirs.putIfAbsent(fixedBaseUri.stripPrefix("/"), path) == null, s"URI '$fixedBaseUri' already registered.") s"${rpcEnv.address.toSparkURL}$fixedBaseUri" } }
Example 8
Source File: NettyBlockRpcServer.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConversions._ import org.apache.spark.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, messageBytes: Array[Byte], responseContext: RpcResponseCallback): Unit = { //消息解码 val message = BlockTransferMessage.Decoder.fromByteArray(messageBytes) logTrace(s"Received request: $message") message match { //提供下载Block文件的功能, case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = //数据blockIds,存放BlockId,获得块数据 openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(blocks.iterator) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray) //提供上传Block文件的RPC服务 case uploadBlock: UploadBlock => // StorageLevel is serialized as bytes using our JavaSerializer. //使用我们的JavaSerializer将StorageLevel序列化为字节 //存储级别 val level: StorageLevel = serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata)) val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) //存储局部块,使用给定的存储级别 blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level) responseContext.onSuccess(new Array[Byte](0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 9
Source File: HashShuffleManagerSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.hash import java.io.{File, FileWriter} import scala.language.reflectiveCalls import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv, SparkFunSuite} import org.apache.spark.executor.ShuffleWriteMetrics import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.serializer.JavaSerializer import org.apache.spark.shuffle.FileShuffleBlockResolver import org.apache.spark.storage.{ShuffleBlockId, FileSegment} class HashShuffleManagerSuite extends SparkFunSuite with LocalSparkContext { private val testConf = new SparkConf(false) private def checkSegments(expected: FileSegment, buffer: ManagedBuffer) { assert(buffer.isInstanceOf[FileSegmentManagedBuffer]) val segment = buffer.asInstanceOf[FileSegmentManagedBuffer] assert(expected.file.getCanonicalPath === segment.getFile.getCanonicalPath) assert(expected.offset === segment.getOffset) assert(expected.length === segment.getLength) } test("consolidated shuffle can write to shuffle group without messing existing offsets/lengths") { val conf = new SparkConf(false) // reset after EACH object write. This is to ensure that there are bytes appended after // an object is written. So if the codepaths assume writeObject is end of data, this should // flush those bugs out. This was common bug in ExternalAppendOnlyMap, etc. conf.set("spark.serializer.objectStreamReset", "1") conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer") conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.hash.HashShuffleManager") sc = new SparkContext("local", "test", conf) val shuffleBlockResolver = SparkEnv.get.shuffleManager.shuffleBlockResolver.asInstanceOf[FileShuffleBlockResolver] val shuffle1 = shuffleBlockResolver.forMapTask(1, 1, 1, new JavaSerializer(conf), new ShuffleWriteMetrics) for (writer <- shuffle1.writers) { writer.write("test1", "value") writer.write("test2", "value") } for (writer <- shuffle1.writers) { writer.commitAndClose() } val shuffle1Segment = shuffle1.writers(0).fileSegment() shuffle1.releaseWriters(success = true) val shuffle2 = shuffleBlockResolver.forMapTask(1, 2, 1, new JavaSerializer(conf), new ShuffleWriteMetrics) for (writer <- shuffle2.writers) { writer.write("test3", "value") writer.write("test4", "vlue") } for (writer <- shuffle2.writers) { writer.commitAndClose() } val shuffle2Segment = shuffle2.writers(0).fileSegment() shuffle2.releaseWriters(success = true) // Now comes the test : // Write to shuffle 3; and close it, but before registering it, check if the file lengths for // previous task (forof shuffle1) is the same as 'segments'. Earlier, we were inferring length // of block based on remaining data in file : which could mess things up when there is // concurrent read and writes happening to the same shuffle group. val shuffle3 = shuffleBlockResolver.forMapTask(1, 3, 1, new JavaSerializer(testConf), new ShuffleWriteMetrics) for (writer <- shuffle3.writers) { writer.write("test3", "value") writer.write("test4", "value") } for (writer <- shuffle3.writers) { writer.commitAndClose() } // check before we register. checkSegments(shuffle2Segment, shuffleBlockResolver.getBlockData(ShuffleBlockId(1, 2, 0))) shuffle3.releaseWriters(success = true) checkSegments(shuffle2Segment, shuffleBlockResolver.getBlockData(ShuffleBlockId(1, 2, 0))) shuffleBlockResolver.removeShuffle(1) } def writeToFile(file: File, numBytes: Int) { val writer = new FileWriter(file, true) for (i <- 0 until numBytes) writer.write(i) writer.close() } }
Example 10
Source File: IndexShuffleBlockResolver.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle import java.io._ import com.google.common.io.ByteStreams import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.storage._ import org.apache.spark.util.Utils import IndexShuffleBlockResolver.NOOP_REDUCE_ID def writeIndexFile(shuffleId: Int, mapId: Int, lengths: Array[Long]): Unit = { val indexFile = getIndexFile(shuffleId, mapId) val out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(indexFile))) Utils.tryWithSafeFinally { // We take in lengths of each block, need to convert it to offsets. var offset = 0L out.writeLong(offset) for (length <- lengths) { offset += length out.writeLong(offset) } } { out.close() } } override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = { // The block is actually going to be a range of a single map output file for this map, so // find out the consolidated file, then the offset within that from our index val indexFile = getIndexFile(blockId.shuffleId, blockId.mapId) val in = new DataInputStream(new FileInputStream(indexFile)) try { ByteStreams.skipFully(in, blockId.reduceId * 8) val offset = in.readLong() val nextOffset = in.readLong() new FileSegmentManagedBuffer( transportConf, getDataFile(blockId.shuffleId, blockId.mapId), offset, nextOffset - offset) } finally { in.close() } } override def stop(): Unit = {} } private[spark] object IndexShuffleBlockResolver { // No-op reduce ID used in interactions with disk store and BlockObjectWriter. // The disk store currently expects puts to relate to a (map, reduce) pair, but in the sort // shuffle outputs for several reduces are glommed into a single file. // TODO: Avoid this entirely by having the DiskBlockObjectWriter not require a BlockId. val NOOP_REDUCE_ID = 0 }
Example 11
Source File: NettyBlockRpcServer.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConversions._ import org.apache.spark.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, messageBytes: Array[Byte], responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteArray(messageBytes) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(blocks.iterator) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray) case uploadBlock: UploadBlock => // StorageLevel is serialized as bytes using our JavaSerializer. val level: StorageLevel = serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata)) val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level) responseContext.onSuccess(new Array[Byte](0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 12
Source File: BlockTransferService.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.network import java.io.Closeable import java.nio.ByteBuffer import scala.concurrent.{Promise, Await, Future} import scala.concurrent.duration.Duration import org.apache.spark.Logging import org.apache.spark.network.buffer.{NioManagedBuffer, ManagedBuffer} import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener} import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel} private[spark] abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel): Unit = { Await.result(uploadBlock(hostname, port, execId, blockId, blockData, level), Duration.Inf) } }
Example 13
Source File: BlockManagerManagedBuffer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import org.apache.spark.network.buffer.{ManagedBuffer, NettyManagedBuffer} import org.apache.spark.util.io.ChunkedByteBuffer private[storage] class BlockManagerManagedBuffer( blockInfoManager: BlockInfoManager, blockId: BlockId, chunkedBuffer: ChunkedByteBuffer) extends NettyManagedBuffer(chunkedBuffer.toNetty) { override def retain(): ManagedBuffer = { super.retain() val locked = blockInfoManager.lockForReading(blockId, blocking = false) assert(locked.isDefined) this } override def release(): ManagedBuffer = { blockInfoManager.unlock(blockId) super.release() } }
Example 14
Source File: NettyBlockRpcServer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.language.existentials import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. val (level: StorageLevel, classTag: ClassTag[_]) = { serializer .newInstance() .deserialize(ByteBuffer.wrap(uploadBlock.metadata)) .asInstanceOf[(StorageLevel, ClassTag[_])] } val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) val blockId = BlockId(uploadBlock.blockId) blockManager.putBlockData(blockId, data, level, classTag) responseContext.onSuccess(ByteBuffer.allocate(0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 15
Source File: NettyStreamManager.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rpc.netty import java.io.File import java.util.concurrent.ConcurrentHashMap import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.server.StreamManager import org.apache.spark.rpc.RpcEnvFileServer import org.apache.spark.util.Utils private[netty] class NettyStreamManager(rpcEnv: NettyRpcEnv) extends StreamManager with RpcEnvFileServer { private val files = new ConcurrentHashMap[String, File]() private val jars = new ConcurrentHashMap[String, File]() private val dirs = new ConcurrentHashMap[String, File]() override def getChunk(streamId: Long, chunkIndex: Int): ManagedBuffer = { throw new UnsupportedOperationException() } override def openStream(streamId: String): ManagedBuffer = { val Array(ftype, fname) = streamId.stripPrefix("/").split("/", 2) val file = ftype match { case "files" => files.get(fname) case "jars" => jars.get(fname) case other => val dir = dirs.get(ftype) require(dir != null, s"Invalid stream URI: $ftype not found.") new File(dir, fname) } if (file != null && file.isFile()) { new FileSegmentManagedBuffer(rpcEnv.transportConf, file, 0, file.length()) } else { null } } override def addFile(file: File): String = { val existingPath = files.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/files/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addJar(file: File): String = { val existingPath = jars.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/jars/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addDirectory(baseUri: String, path: File): String = { val fixedBaseUri = validateDirectoryUri(baseUri) require(dirs.putIfAbsent(fixedBaseUri.stripPrefix("/"), path) == null, s"URI '$fixedBaseUri' already registered.") s"${rpcEnv.address.toSparkURL}$fixedBaseUri" } }
Example 16
Source File: NettyStreamManager.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rpc.netty import java.io.File import java.util.concurrent.ConcurrentHashMap import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.server.StreamManager import org.apache.spark.rpc.RpcEnvFileServer import org.apache.spark.util.Utils private[netty] class NettyStreamManager(rpcEnv: NettyRpcEnv) extends StreamManager with RpcEnvFileServer { private val files = new ConcurrentHashMap[String, File]() private val jars = new ConcurrentHashMap[String, File]() private val dirs = new ConcurrentHashMap[String, File]() override def getChunk(streamId: Long, chunkIndex: Int): ManagedBuffer = { throw new UnsupportedOperationException() } override def openStream(streamId: String): ManagedBuffer = { val Array(ftype, fname) = streamId.stripPrefix("/").split("/", 2) val file = ftype match { case "files" => files.get(fname) case "jars" => jars.get(fname) case other => val dir = dirs.get(ftype) require(dir != null, s"Invalid stream URI: $ftype not found.") new File(dir, fname) } if (file != null && file.isFile()) { new FileSegmentManagedBuffer(rpcEnv.transportConf, file, 0, file.length()) } else { null } } override def addFile(file: File): String = { val existingPath = files.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/files/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addJar(file: File): String = { val existingPath = jars.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/jars/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addDirectory(baseUri: String, path: File): String = { val fixedBaseUri = validateDirectoryUri(baseUri) require(dirs.putIfAbsent(fixedBaseUri.stripPrefix("/"), path) == null, s"URI '$fixedBaseUri' already registered.") s"${rpcEnv.address.toSparkURL}$fixedBaseUri" } }
Example 17
Source File: HashShuffleManagerSuite.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.hash import java.io.{File, FileWriter} import scala.language.reflectiveCalls import org.scalatest.FunSuite import org.apache.spark.{SparkEnv, SparkContext, LocalSparkContext, SparkConf} import org.apache.spark.executor.ShuffleWriteMetrics import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.serializer.JavaSerializer import org.apache.spark.shuffle.FileShuffleBlockManager import org.apache.spark.storage.{ShuffleBlockId, FileSegment} class HashShuffleManagerSuite extends FunSuite with LocalSparkContext { private val testConf = new SparkConf(false) private def checkSegments(expected: FileSegment, buffer: ManagedBuffer) { assert(buffer.isInstanceOf[FileSegmentManagedBuffer]) val segment = buffer.asInstanceOf[FileSegmentManagedBuffer] assert(expected.file.getCanonicalPath === segment.getFile.getCanonicalPath) assert(expected.offset === segment.getOffset) assert(expected.length === segment.getLength) } test("consolidated shuffle can write to shuffle group without messing existing offsets/lengths") { val conf = new SparkConf(false) // reset after EACH object write. This is to ensure that there are bytes appended after // an object is written. So if the codepaths assume writeObject is end of data, this should // flush those bugs out. This was common bug in ExternalAppendOnlyMap, etc. conf.set("spark.serializer.objectStreamReset", "1") conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer") conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.hash.HashShuffleManager") sc = new SparkContext("local", "test", conf) val shuffleBlockManager = SparkEnv.get.shuffleManager.shuffleBlockManager.asInstanceOf[FileShuffleBlockManager] val shuffle1 = shuffleBlockManager.forMapTask(1, 1, 1, new JavaSerializer(conf), new ShuffleWriteMetrics) for (writer <- shuffle1.writers) { writer.write("test1") writer.write("test2") } for (writer <- shuffle1.writers) { writer.commitAndClose() } val shuffle1Segment = shuffle1.writers(0).fileSegment() shuffle1.releaseWriters(success = true) val shuffle2 = shuffleBlockManager.forMapTask(1, 2, 1, new JavaSerializer(conf), new ShuffleWriteMetrics) for (writer <- shuffle2.writers) { writer.write("test3") writer.write("test4") } for (writer <- shuffle2.writers) { writer.commitAndClose() } val shuffle2Segment = shuffle2.writers(0).fileSegment() shuffle2.releaseWriters(success = true) // Now comes the test : // Write to shuffle 3; and close it, but before registering it, check if the file lengths for // previous task (forof shuffle1) is the same as 'segments'. Earlier, we were inferring length // of block based on remaining data in file : which could mess things up when there is concurrent read // and writes happening to the same shuffle group. val shuffle3 = shuffleBlockManager.forMapTask(1, 3, 1, new JavaSerializer(testConf), new ShuffleWriteMetrics) for (writer <- shuffle3.writers) { writer.write("test3") writer.write("test4") } for (writer <- shuffle3.writers) { writer.commitAndClose() } // check before we register. checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0))) shuffle3.releaseWriters(success = true) checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0))) shuffleBlockManager.removeShuffle(1) } def writeToFile(file: File, numBytes: Int) { val writer = new FileWriter(file, true) for (i <- 0 until numBytes) writer.write(i) writer.close() } }
Example 18
Source File: NettyBlockTransferSecuritySuite.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio._ import java.util.concurrent.TimeUnit import scala.concurrent.duration._ import scala.concurrent.{Await, Promise} import scala.util.{Failure, Success, Try} import org.apache.commons.io.IOUtils import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.BlockFetchingListener import org.apache.spark.network.{BlockDataManager, BlockTransferService} import org.apache.spark.storage.{BlockId, ShuffleBlockId} import org.apache.spark.{SecurityManager, SparkConf} import org.mockito.Mockito._ import org.scalatest.mock.MockitoSugar import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite, ShouldMatchers} class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with ShouldMatchers { test("security default off") { val conf = new SparkConf() .set("spark.app.id", "app-id") testConnection(conf, conf) match { case Success(_) => // expected case Failure(t) => fail(t) } } test("security on same password") { val conf = new SparkConf() .set("spark.authenticate", "true") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") testConnection(conf, conf) match { case Success(_) => // expected case Failure(t) => fail(t) } } test("security on mismatch password") { val conf0 = new SparkConf() .set("spark.authenticate", "true") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") val conf1 = conf0.clone.set("spark.authenticate.secret", "bad") testConnection(conf0, conf1) match { case Success(_) => fail("Should have failed") case Failure(t) => t.getMessage should include ("Mismatched response") } } test("security mismatch auth off on server") { val conf0 = new SparkConf() .set("spark.authenticate", "true") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") val conf1 = conf0.clone.set("spark.authenticate", "false") testConnection(conf0, conf1) match { case Success(_) => fail("Should have failed") case Failure(t) => // any funny error may occur, sever will interpret SASL token as RPC } } test("security mismatch auth off on client") { val conf0 = new SparkConf() .set("spark.authenticate", "false") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") val conf1 = conf0.clone.set("spark.authenticate", "true") testConnection(conf0, conf1) match { case Success(_) => fail("Should have failed") case Failure(t) => t.getMessage should include ("Expected SaslMessage") } } private def fetchBlock( self: BlockTransferService, from: BlockTransferService, execId: String, blockId: BlockId): Try[ManagedBuffer] = { val promise = Promise[ManagedBuffer]() self.fetchBlocks(from.hostName, from.port, execId, Array(blockId.toString), new BlockFetchingListener { override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = { promise.failure(exception) } override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = { promise.success(data.retain()) } }) Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS)) promise.future.value.get } }
Example 19
Source File: IndexShuffleBlockManager.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle import java.io._ import java.nio.ByteBuffer import com.google.common.io.ByteStreams import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.storage._ def writeIndexFile(shuffleId: Int, mapId: Int, lengths: Array[Long]) = { val indexFile = getIndexFile(shuffleId, mapId) val out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(indexFile))) try { // We take in lengths of each block, need to convert it to offsets. var offset = 0L out.writeLong(offset) for (length <- lengths) { offset += length out.writeLong(offset) } } finally { out.close() } } override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = { Some(getBlockData(blockId).nioByteBuffer()) } override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = { // The block is actually going to be a range of a single map output file for this map, so // find out the consolidated file, then the offset within that from our index val indexFile = getIndexFile(blockId.shuffleId, blockId.mapId) val in = new DataInputStream(new FileInputStream(indexFile)) try { ByteStreams.skipFully(in, blockId.reduceId * 8) val offset = in.readLong() val nextOffset = in.readLong() new FileSegmentManagedBuffer( transportConf, getDataFile(blockId.shuffleId, blockId.mapId), offset, nextOffset - offset) } finally { in.close() } } override def stop() = {} }
Example 20
Source File: NettyBlockRpcServer.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConversions._ import org.apache.spark.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, messageBytes: Array[Byte], responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteArray(messageBytes) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(blocks.iterator) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray) case uploadBlock: UploadBlock => // StorageLevel is serialized as bytes using our JavaSerializer. val level: StorageLevel = serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata)) val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level) responseContext.onSuccess(new Array[Byte](0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 21
Source File: BlockTransferService.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.network import java.io.Closeable import java.nio.ByteBuffer import scala.concurrent.{Promise, Await, Future} import scala.concurrent.duration.Duration import org.apache.spark.Logging import org.apache.spark.network.buffer.{NioManagedBuffer, ManagedBuffer} import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener} import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel} private[spark] abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel): Unit = { Await.result(uploadBlock(hostname, port, execId, blockId, blockData, level), Duration.Inf) } }
Example 22
Source File: BlockManagerManagedBuffer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import org.apache.spark.network.buffer.{ManagedBuffer, NettyManagedBuffer} import org.apache.spark.util.io.ChunkedByteBuffer private[storage] class BlockManagerManagedBuffer( blockInfoManager: BlockInfoManager, blockId: BlockId, chunkedBuffer: ChunkedByteBuffer) extends NettyManagedBuffer(chunkedBuffer.toNetty) { override def retain(): ManagedBuffer = { super.retain() val locked = blockInfoManager.lockForReading(blockId, blocking = false) assert(locked.isDefined) this } override def release(): ManagedBuffer = { blockInfoManager.unlock(blockId) super.release() } }
Example 23
Source File: NettyBlockRpcServer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.language.existentials import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. val (level: StorageLevel, classTag: ClassTag[_]) = { serializer .newInstance() .deserialize(ByteBuffer.wrap(uploadBlock.metadata)) .asInstanceOf[(StorageLevel, ClassTag[_])] } val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) val blockId = BlockId(uploadBlock.blockId) blockManager.putBlockData(blockId, data, level, classTag) responseContext.onSuccess(ByteBuffer.allocate(0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 24
Source File: BlockTransferService.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.network import java.io.Closeable import java.nio.ByteBuffer import scala.concurrent.{Future, Promise} import scala.concurrent.duration.Duration import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient} import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.util.ThreadUtils private[spark] abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel, classTag: ClassTag[_]): Unit = { val future = uploadBlock(hostname, port, execId, blockId, blockData, level, classTag) ThreadUtils.awaitResult(future, Duration.Inf) } }
Example 25
Source File: NettyStreamManager.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.rpc.netty import java.io.File import java.util.concurrent.ConcurrentHashMap import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.server.StreamManager import org.apache.spark.rpc.RpcEnvFileServer import org.apache.spark.util.Utils private[netty] class NettyStreamManager(rpcEnv: NettyRpcEnv) extends StreamManager with RpcEnvFileServer { private val files = new ConcurrentHashMap[String, File]() private val jars = new ConcurrentHashMap[String, File]() private val dirs = new ConcurrentHashMap[String, File]() override def getChunk(streamId: Long, chunkIndex: Int): ManagedBuffer = { throw new UnsupportedOperationException() } override def openStream(streamId: String): ManagedBuffer = { val Array(ftype, fname) = streamId.stripPrefix("/").split("/", 2) val file = ftype match { case "files" => files.get(fname) case "jars" => jars.get(fname) case other => val dir = dirs.get(ftype) require(dir != null, s"Invalid stream URI: $ftype not found.") new File(dir, fname) } if (file != null && file.isFile()) { new FileSegmentManagedBuffer(rpcEnv.transportConf, file, 0, file.length()) } else { null } } override def addFile(file: File): String = { val existingPath = files.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/files/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addJar(file: File): String = { val existingPath = jars.putIfAbsent(file.getName, file) require(existingPath == null || existingPath == file, s"File ${file.getName} was already registered with a different path " + s"(old path = $existingPath, new path = $file") s"${rpcEnv.address.toSparkURL}/jars/${Utils.encodeFileNameToURIRawPath(file.getName())}" } override def addDirectory(baseUri: String, path: File): String = { val fixedBaseUri = validateDirectoryUri(baseUri) require(dirs.putIfAbsent(fixedBaseUri.stripPrefix("/"), path) == null, s"URI '$fixedBaseUri' already registered.") s"${rpcEnv.address.toSparkURL}$fixedBaseUri" } }
Example 26
Source File: BlockManagerManagedBuffer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import org.apache.spark.network.buffer.{ManagedBuffer, NettyManagedBuffer} import org.apache.spark.util.io.ChunkedByteBuffer private[storage] class BlockManagerManagedBuffer( blockInfoManager: BlockInfoManager, blockId: BlockId, chunkedBuffer: ChunkedByteBuffer) extends NettyManagedBuffer(chunkedBuffer.toNetty) { override def retain(): ManagedBuffer = { super.retain() val locked = blockInfoManager.lockForReading(blockId, blocking = false) assert(locked.isDefined) this } override def release(): ManagedBuffer = { blockInfoManager.unlock(blockId) super.release() } }
Example 27
Source File: NettyBlockRpcServer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.language.existentials import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, MapOutputReady, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.scheduler.MapStatus import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. val (level: StorageLevel, classTag: ClassTag[_]) = { serializer .newInstance() .deserialize(ByteBuffer.wrap(uploadBlock.metadata)) .asInstanceOf[(StorageLevel, ClassTag[_])] } val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) val blockId = BlockId(uploadBlock.blockId) blockManager.putBlockData(blockId, data, level, classTag) responseContext.onSuccess(ByteBuffer.allocate(0)) case mapOutputReady: MapOutputReady => val mapStatus: MapStatus = serializer.newInstance().deserialize(ByteBuffer.wrap(mapOutputReady.serializedMapStatus)) blockManager.mapOutputReady( mapOutputReady.shuffleId, mapOutputReady.mapId, mapOutputReady.numReduces, mapStatus) } } override def getStreamManager(): StreamManager = streamManager }
Example 28
Source File: BlockTransferService.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.network import java.io.Closeable import java.nio.ByteBuffer import scala.concurrent.{Future, Promise} import scala.concurrent.duration.Duration import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient} import org.apache.spark.scheduler.MapStatus import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.util.ThreadUtils private[spark] abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel, classTag: ClassTag[_]): Unit = { val future = uploadBlock(hostname, port, execId, blockId, blockData, level, classTag) ThreadUtils.awaitResult(future, Duration.Inf) } }