com.google.common.io.ByteStreams Scala Examples
The following examples show how to use com.google.common.io.ByteStreams.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: WholeTextFileRecordReader.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.input import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.conf.{Configurable => HConfigurable, Configuration} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.mapreduce.lib.input.{CombineFileRecordReader, CombineFileSplit} private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }
Example 2
Source File: WholeTextFileRecordReader.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.input import org.apache.hadoop.conf.{Configuration, Configurable => HConfigurable} import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, CombineFileRecordReader} import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.spark.deploy.SparkHadoopUtil private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }
Example 3
Source File: ChunkedByteBufferSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.io import java.nio.ByteBuffer import com.google.common.io.ByteStreams import org.apache.spark.{SharedSparkContext, SparkFunSuite} import org.apache.spark.internal.config import org.apache.spark.network.util.ByteArrayWritableChannel import org.apache.spark.util.io.ChunkedByteBuffer class ChunkedByteBufferSuite extends SparkFunSuite with SharedSparkContext { test("no chunks") { val emptyChunkedByteBuffer = new ChunkedByteBuffer(Array.empty[ByteBuffer]) assert(emptyChunkedByteBuffer.size === 0) assert(emptyChunkedByteBuffer.getChunks().isEmpty) assert(emptyChunkedByteBuffer.toArray === Array.empty) assert(emptyChunkedByteBuffer.toByteBuffer.capacity() === 0) assert(emptyChunkedByteBuffer.toNetty.capacity() === 0) emptyChunkedByteBuffer.toInputStream(dispose = false).close() emptyChunkedByteBuffer.toInputStream(dispose = true).close() } test("getChunks() duplicates chunks") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.getChunks().head.position(4) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("copy() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.copy(ByteBuffer.allocate) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("writeFully() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.writeFully(new ByteArrayWritableChannel(chunkedByteBuffer.size.toInt)) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("SPARK-24107: writeFully() write buffer which is larger than bufferWriteChunkSize") { try { sc.conf.set(config.BUFFER_WRITE_CHUNK_SIZE, 32L * 1024L * 1024L) val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(40 * 1024 * 1024))) val byteArrayWritableChannel = new ByteArrayWritableChannel(chunkedByteBuffer.size.toInt) chunkedByteBuffer.writeFully(byteArrayWritableChannel) assert(byteArrayWritableChannel.length() === chunkedByteBuffer.size) } finally { sc.conf.remove(config.BUFFER_WRITE_CHUNK_SIZE) } } test("toArray()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes = ByteBuffer.wrap(Array.tabulate(8)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(bytes, bytes, empty)) assert(chunkedByteBuffer.toArray === bytes.array() ++ bytes.array()) } test("toArray() throws UnsupportedOperationException if size exceeds 2GB") { val fourMegabyteBuffer = ByteBuffer.allocate(1024 * 1024 * 4) fourMegabyteBuffer.limit(fourMegabyteBuffer.capacity()) val chunkedByteBuffer = new ChunkedByteBuffer(Array.fill(1024)(fourMegabyteBuffer)) assert(chunkedByteBuffer.size === (1024L * 1024L * 1024L * 4L)) intercept[UnsupportedOperationException] { chunkedByteBuffer.toArray } } test("toInputStream()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes1 = ByteBuffer.wrap(Array.tabulate(256)(_.toByte)) val bytes2 = ByteBuffer.wrap(Array.tabulate(128)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(empty, bytes1, bytes2)) assert(chunkedByteBuffer.size === bytes1.limit() + bytes2.limit()) val inputStream = chunkedByteBuffer.toInputStream(dispose = false) val bytesFromStream = new Array[Byte](chunkedByteBuffer.size.toInt) ByteStreams.readFully(inputStream, bytesFromStream) assert(bytesFromStream === bytes1.array() ++ bytes2.array()) assert(chunkedByteBuffer.getChunks().head.position() === 0) } }
Example 4
Source File: WholeTextFileRecordReader.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.input import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.conf.{Configurable => HConfigurable, Configuration} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.mapreduce.lib.input.{CombineFileRecordReader, CombineFileSplit} private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }
Example 5
Source File: AvroIO.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{File, InputStream, OutputStream} import java.nio.ByteBuffer import java.nio.channels.SeekableByteChannel import com.google.common.io.ByteStreams import org.apache.avro.Schema import org.apache.avro.file.{DataFileReader, DataFileWriter, SeekableByteArrayInput, SeekableInput} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DatumReader, DatumWriter} import org.apache.avro.reflect.{ReflectDatumReader, ReflectDatumWriter} import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter, SpecificRecord} import org.apache.beam.sdk.io.FileSystems import org.apache.beam.sdk.io.fs.MatchResult.Metadata import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag def writeToOutputStream[T: ClassTag](data: Iterable[T], schema: Schema, os: OutputStream): Unit = { val fileWriter = new DataFileWriter(createDatumWriter[T]).create(schema, os) data.foreach(fileWriter.append) fileWriter.close() } def getAvroSchemaFromFile(path: String): Schema = { require(FileStorage(path).exists, s"File `$path` does not exist!") val files = FileStorage(path).listFiles.filter(_.resourceId.getFilename.endsWith(".avro")) require(files.nonEmpty, s"File `$path` does not contain avro files") val reader = new GenericDatumReader[GenericRecord]() val dfr = new DataFileReader[GenericRecord](AvroIO.getAvroSeekableInput(files.head), reader) dfr.getSchema } private def getAvroSeekableInput(meta: Metadata): SeekableInput = new SeekableInput { require(meta.isReadSeekEfficient) private val in = FileSystems.open(meta.resourceId()).asInstanceOf[SeekableByteChannel] override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(ByteBuffer.wrap(b, off, len)) override def tell(): Long = in.position() override def length(): Long = in.size() override def seek(p: Long): Unit = in.position(p) override def close(): Unit = in.close() } }
Example 6
Source File: WholeTextFileRecordReader.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.input import org.apache.hadoop.conf.{Configuration, Configurable => HConfigurable} import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, CombineFileRecordReader} import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.spark.deploy.SparkHadoopUtil private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }
Example 7
Source File: WholeTextFileRecordReader.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.input import org.apache.hadoop.conf.{Configuration, Configurable => HConfigurable} import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, CombineFileRecordReader} import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.spark.deploy.SparkHadoopUtil private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }
Example 8
Source File: IndexShuffleBlockResolver.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle import java.io._ import com.google.common.io.ByteStreams import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.storage._ import org.apache.spark.util.Utils import IndexShuffleBlockResolver.NOOP_REDUCE_ID def writeIndexFile(shuffleId: Int, mapId: Int, lengths: Array[Long]): Unit = { val indexFile = getIndexFile(shuffleId, mapId) val out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(indexFile))) Utils.tryWithSafeFinally { // We take in lengths of each block, need to convert it to offsets. var offset = 0L out.writeLong(offset) for (length <- lengths) { offset += length out.writeLong(offset) } } { out.close() } } override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = { // The block is actually going to be a range of a single map output file for this map, so // find out the consolidated file, then the offset within that from our index val indexFile = getIndexFile(blockId.shuffleId, blockId.mapId) val in = new DataInputStream(new FileInputStream(indexFile)) try { ByteStreams.skipFully(in, blockId.reduceId * 8) val offset = in.readLong() val nextOffset = in.readLong() new FileSegmentManagedBuffer( transportConf, getDataFile(blockId.shuffleId, blockId.mapId), offset, nextOffset - offset) } finally { in.close() } } override def stop(): Unit = {} } private[spark] object IndexShuffleBlockResolver { // No-op reduce ID used in interactions with disk store and BlockObjectWriter. // The disk store currently expects puts to relate to a (map, reduce) pair, but in the sort // shuffle outputs for several reduces are glommed into a single file. // TODO: Avoid this entirely by having the DiskBlockObjectWriter not require a BlockId. val NOOP_REDUCE_ID = 0 }
Example 9
Source File: ChunkedByteBufferSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.io import java.nio.ByteBuffer import com.google.common.io.ByteStreams import org.apache.spark.SparkFunSuite import org.apache.spark.network.util.ByteArrayWritableChannel import org.apache.spark.util.io.ChunkedByteBuffer class ChunkedByteBufferSuite extends SparkFunSuite { test("no chunks") { val emptyChunkedByteBuffer = new ChunkedByteBuffer(Array.empty[ByteBuffer]) assert(emptyChunkedByteBuffer.size === 0) assert(emptyChunkedByteBuffer.getChunks().isEmpty) assert(emptyChunkedByteBuffer.toArray === Array.empty) assert(emptyChunkedByteBuffer.toByteBuffer.capacity() === 0) assert(emptyChunkedByteBuffer.toNetty.capacity() === 0) emptyChunkedByteBuffer.toInputStream(dispose = false).close() emptyChunkedByteBuffer.toInputStream(dispose = true).close() } test("getChunks() duplicates chunks") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.getChunks().head.position(4) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("copy() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.copy(ByteBuffer.allocate) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("writeFully() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.writeFully(new ByteArrayWritableChannel(chunkedByteBuffer.size.toInt)) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("toArray()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes = ByteBuffer.wrap(Array.tabulate(8)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(bytes, bytes, empty)) assert(chunkedByteBuffer.toArray === bytes.array() ++ bytes.array()) } test("toArray() throws UnsupportedOperationException if size exceeds 2GB") { val fourMegabyteBuffer = ByteBuffer.allocate(1024 * 1024 * 4) fourMegabyteBuffer.limit(fourMegabyteBuffer.capacity()) val chunkedByteBuffer = new ChunkedByteBuffer(Array.fill(1024)(fourMegabyteBuffer)) assert(chunkedByteBuffer.size === (1024L * 1024L * 1024L * 4L)) intercept[UnsupportedOperationException] { chunkedByteBuffer.toArray } } test("toInputStream()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes1 = ByteBuffer.wrap(Array.tabulate(256)(_.toByte)) val bytes2 = ByteBuffer.wrap(Array.tabulate(128)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(empty, bytes1, bytes2)) assert(chunkedByteBuffer.size === bytes1.limit() + bytes2.limit()) val inputStream = chunkedByteBuffer.toInputStream(dispose = false) val bytesFromStream = new Array[Byte](chunkedByteBuffer.size.toInt) ByteStreams.readFully(inputStream, bytesFromStream) assert(bytesFromStream === bytes1.array() ++ bytes2.array()) assert(chunkedByteBuffer.getChunks().head.position() === 0) } }
Example 10
Source File: PortableDataStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.input import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import scala.collection.JavaConverters._ import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext} import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit} def toArray(): Array[Byte] = { val stream = open() try { ByteStreams.toByteArray(stream) } finally { Closeables.close(stream, true) } } def getPath(): String = path }
Example 11
Source File: ExampleData.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.examples.util import java.io.FileOutputStream import com.google.common.io.{ByteStreams, Files} import scala.util.control.NonFatal object ExampleData { lazy val path: String = { try { val resource = "data.tsv" val tmpfile = Files.createTempDir().getAbsolutePath + resource val input = getClass.getResourceAsStream(resource) val output = new FileOutputStream(tmpfile) ByteStreams.copy(input, output) input.close() output.close() tmpfile } catch { case NonFatal(e) => throw new RuntimeException("Could not copy example data file to temp directory", e) } } }
Example 12
Source File: WholeTextFileRecordReader.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.input import org.apache.hadoop.conf.{Configuration, Configurable => HConfigurable} import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, CombineFileRecordReader} import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.spark.deploy.SparkHadoopUtil private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }
Example 13
Source File: IndexShuffleBlockManager.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle import java.io._ import java.nio.ByteBuffer import com.google.common.io.ByteStreams import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.storage._ def writeIndexFile(shuffleId: Int, mapId: Int, lengths: Array[Long]) = { val indexFile = getIndexFile(shuffleId, mapId) val out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(indexFile))) try { // We take in lengths of each block, need to convert it to offsets. var offset = 0L out.writeLong(offset) for (length <- lengths) { offset += length out.writeLong(offset) } } finally { out.close() } } override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = { Some(getBlockData(blockId).nioByteBuffer()) } override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = { // The block is actually going to be a range of a single map output file for this map, so // find out the consolidated file, then the offset within that from our index val indexFile = getIndexFile(blockId.shuffleId, blockId.mapId) val in = new DataInputStream(new FileInputStream(indexFile)) try { ByteStreams.skipFully(in, blockId.reduceId * 8) val offset = in.readLong() val nextOffset = in.readLong() new FileSegmentManagedBuffer( transportConf, getDataFile(blockId.shuffleId, blockId.mapId), offset, nextOffset - offset) } finally { in.close() } } override def stop() = {} }
Example 14
Source File: ChunkedByteBufferSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.io import java.nio.ByteBuffer import com.google.common.io.ByteStreams import org.apache.spark.SparkFunSuite import org.apache.spark.network.util.ByteArrayWritableChannel import org.apache.spark.util.io.ChunkedByteBuffer class ChunkedByteBufferSuite extends SparkFunSuite { test("no chunks") { val emptyChunkedByteBuffer = new ChunkedByteBuffer(Array.empty[ByteBuffer]) assert(emptyChunkedByteBuffer.size === 0) assert(emptyChunkedByteBuffer.getChunks().isEmpty) assert(emptyChunkedByteBuffer.toArray === Array.empty) assert(emptyChunkedByteBuffer.toByteBuffer.capacity() === 0) assert(emptyChunkedByteBuffer.toNetty.capacity() === 0) emptyChunkedByteBuffer.toInputStream(dispose = false).close() emptyChunkedByteBuffer.toInputStream(dispose = true).close() } test("getChunks() duplicates chunks") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.getChunks().head.position(4) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("copy() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.copy(ByteBuffer.allocate) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("writeFully() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.writeFully(new ByteArrayWritableChannel(chunkedByteBuffer.size.toInt)) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("toArray()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes = ByteBuffer.wrap(Array.tabulate(8)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(bytes, bytes, empty)) assert(chunkedByteBuffer.toArray === bytes.array() ++ bytes.array()) } test("toArray() throws UnsupportedOperationException if size exceeds 2GB") { val fourMegabyteBuffer = ByteBuffer.allocate(1024 * 1024 * 4) fourMegabyteBuffer.limit(fourMegabyteBuffer.capacity()) val chunkedByteBuffer = new ChunkedByteBuffer(Array.fill(1024)(fourMegabyteBuffer)) assert(chunkedByteBuffer.size === (1024L * 1024L * 1024L * 4L)) intercept[UnsupportedOperationException] { chunkedByteBuffer.toArray } } test("toInputStream()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes1 = ByteBuffer.wrap(Array.tabulate(256)(_.toByte)) val bytes2 = ByteBuffer.wrap(Array.tabulate(128)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(empty, bytes1, bytes2)) assert(chunkedByteBuffer.size === bytes1.limit() + bytes2.limit()) val inputStream = chunkedByteBuffer.toInputStream(dispose = false) val bytesFromStream = new Array[Byte](chunkedByteBuffer.size.toInt) ByteStreams.readFully(inputStream, bytesFromStream) assert(bytesFromStream === bytes1.array() ++ bytes2.array()) assert(chunkedByteBuffer.getChunks().head.position() === 0) } }
Example 15
Source File: WholeTextFileRecordReader.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.input import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.conf.{Configurable => HConfigurable, Configuration} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.mapreduce.lib.input.{CombineFileRecordReader, CombineFileSplit} private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }
Example 16
Source File: ChunkedByteBufferSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.io import java.nio.ByteBuffer import com.google.common.io.ByteStreams import org.apache.spark.SparkFunSuite import org.apache.spark.network.util.ByteArrayWritableChannel import org.apache.spark.util.io.ChunkedByteBuffer class ChunkedByteBufferSuite extends SparkFunSuite { test("no chunks") { val emptyChunkedByteBuffer = new ChunkedByteBuffer(Array.empty[ByteBuffer]) assert(emptyChunkedByteBuffer.size === 0) assert(emptyChunkedByteBuffer.getChunks().isEmpty) assert(emptyChunkedByteBuffer.toArray === Array.empty) assert(emptyChunkedByteBuffer.toByteBuffer.capacity() === 0) assert(emptyChunkedByteBuffer.toNetty.capacity() === 0) emptyChunkedByteBuffer.toInputStream(dispose = false).close() emptyChunkedByteBuffer.toInputStream(dispose = true).close() } test("getChunks() duplicates chunks") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.getChunks().head.position(4) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("copy() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.copy(ByteBuffer.allocate) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("writeFully() does not affect original buffer's position") { val chunkedByteBuffer = new ChunkedByteBuffer(Array(ByteBuffer.allocate(8))) chunkedByteBuffer.writeFully(new ByteArrayWritableChannel(chunkedByteBuffer.size.toInt)) assert(chunkedByteBuffer.getChunks().head.position() === 0) } test("toArray()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes = ByteBuffer.wrap(Array.tabulate(8)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(bytes, bytes, empty)) assert(chunkedByteBuffer.toArray === bytes.array() ++ bytes.array()) } test("toArray() throws UnsupportedOperationException if size exceeds 2GB") { val fourMegabyteBuffer = ByteBuffer.allocate(1024 * 1024 * 4) fourMegabyteBuffer.limit(fourMegabyteBuffer.capacity()) val chunkedByteBuffer = new ChunkedByteBuffer(Array.fill(1024)(fourMegabyteBuffer)) assert(chunkedByteBuffer.size === (1024L * 1024L * 1024L * 4L)) intercept[UnsupportedOperationException] { chunkedByteBuffer.toArray } } test("toInputStream()") { val empty = ByteBuffer.wrap(Array.empty[Byte]) val bytes1 = ByteBuffer.wrap(Array.tabulate(256)(_.toByte)) val bytes2 = ByteBuffer.wrap(Array.tabulate(128)(_.toByte)) val chunkedByteBuffer = new ChunkedByteBuffer(Array(empty, bytes1, bytes2)) assert(chunkedByteBuffer.size === bytes1.limit() + bytes2.limit()) val inputStream = chunkedByteBuffer.toInputStream(dispose = false) val bytesFromStream = new Array[Byte](chunkedByteBuffer.size.toInt) ByteStreams.readFully(inputStream, bytesFromStream) assert(bytesFromStream === bytes1.array() ++ bytes2.array()) assert(chunkedByteBuffer.getChunks().head.position() === 0) } }
Example 17
Source File: WholeTextFileRecordReader.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.input import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.conf.{Configurable => HConfigurable, Configuration} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.mapreduce.lib.input.{CombineFileRecordReader, CombineFileSplit} private[spark] class ConfigurableCombineFileRecordReader[K, V]( split: InputSplit, context: TaskAttemptContext, recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable]) extends CombineFileRecordReader[K, V]( split.asInstanceOf[CombineFileSplit], context, recordReaderClass ) with Configurable { override def initNextRecordReader(): Boolean = { val r = super.initNextRecordReader() if (r) { this.curReader.asInstanceOf[HConfigurable].setConf(getConf) } r } }