java.io.EOFException Scala Example

Source File: FileBasedWriteAheadLogReader.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration
import org.apache.spark.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = false
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: MetadataStream.scala From spark-bam with Apache License 2.0

5 votes

package org.hammerlab.bgzf.block

import java.io.{ Closeable, EOFException }

import hammerlab.iterator.SimpleIterator
import org.hammerlab.bgzf.block.Block.FOOTER_SIZE
import org.hammerlab.bgzf.block.Header.EXPECTED_HEADER_SIZE
import org.hammerlab.channel.ByteChannel
import org.hammerlab.io.Buffer


case class MetadataStream(ch: ByteChannel)
  extends SimpleIterator[Metadata]
    with Closeable {

  // Buffer for the standard bits of the header that we care about
  implicit val buf = Buffer(EXPECTED_HEADER_SIZE)

  override protected def _advance: Option[Metadata] = {

    val start = ch.position()

    buf.clear()
    val Header(actualHeaderSize, compressedSize) =
      try {
        Header(ch)
      } catch {
        case e: EOFException ⇒
          return None
      }

    val remainingBytes = compressedSize - actualHeaderSize

    ch.skip(remainingBytes - 4)
    val uncompressedSize = ch.getInt

    val dataLength = remainingBytes - FOOTER_SIZE

    if (dataLength == 2) {
      // Skip empty block at end of file
      None
    } else
      Some(
        Metadata(
          start,
          compressedSize,
          uncompressedSize
        )
      )
  }

  override def close(): Unit =
    ch.close()
}

Source File: TruncatableSeekableStream.scala From spark-bam with Apache License 2.0

5 votes

package org.hammerlab.bam.check.seqdoop

import java.io.EOFException

import hammerlab.path._
import htsjdk.samtools.seekablestream.SeekableStream
import org.hammerlab.channel.SeekableByteChannel

import scala.math.min

case class TruncatableSeekableStream(channel: SeekableByteChannel,
                                     source: Path)
  extends SeekableStream {

  var limit = channel.size

  def clear(): Unit =
    limit = channel.size

  override def length(): Long = limit

  override def seek(position: Long): Unit =
    channel.seek(
      min(
        limit,
        position
      )
    )

  override def getSource: String = source.toString()

  override def position(): Long = channel.position()
  override def eof(): Boolean = channel.position() == length()

  def remaining: Long = length() - position()

  override def read(): Int =
    if (position() < length())
      channel.read()
    else
      -1

  override def read(b: Array[Byte],
                    off: Int,
                    len: Int): Int = {
    if (len > remaining) {
      channel.read(b, off, remaining.toInt)
      throw new EOFException(
        s"Attempting to read $len bytes from offset $off when channel is at ${position()} with length ${length()} (only $remaining bytes available)"
      )
    }
    channel.read(b, off, len)
  }

  override def close(): Unit = channel.close()
}

Source File: CopyCsvFileTrait.scala From cloud-integration with Apache License 2.0

5 votes

package com.cloudera.spark.cloud.common

import java.io.{EOFException, FileNotFoundException}

import org.apache.hadoop.fs.Path



  override def prepareTestCSVFile(): Unit = {
    require(hasCSVTestFile(), "No CSV file")
    require(isFilesystemDefined, "Test FS is not defined; call initFS() first")
    // here the CSV file is copied over
    val source = sourceCSVFilePath.get
    if (source.toUri.getScheme == "wasb") {
      // source is already in Azure
      testCSVFile = sourceCSVFilePath
      deleteTestCSVFile = false
    } else {
      val srcStatus = source.getFileSystem(getConf).getFileStatus(source)
      if (srcStatus.getLen == 0) {
        throw new EOFException(s"File $source is an empty file")
      }
      // need to copy over
      val destFile = path(source.getName)
      testCSVFile = Some(destFile)
      var toCopy = false
      try {
        val status = filesystem.getFileStatus(destFile)
        if (status.getLen != srcStatus.getLen) {
          logInfo(s"Dest file exists, but length of $status != source data $srcStatus")
        } else {
          logInfo(s"Datafile exists; no copy needed: $status")
          toCopy = false
        }
      } catch {
        case _ : FileNotFoundException =>
          toCopy = true
      }
      if (toCopy) {
        copyFile(sourceCSVFilePath.get, destFile, getConf, true)
      }
    }
  }

}

Source File: SerializableBuffer.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: FileBasedWriteAheadLogReader.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{IOException, Closeable, EOFException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration
import org.apache.spark.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: SerializableBuffer.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: FileBasedWriteAheadLogReader.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException, IOException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: SerializableBuffer.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    //ByteBuffer.allocate在能够读和写之前,必须有一个缓冲区,用静态方法 allocate() 来分配缓冲区
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: FileBasedWriteAheadLogReader.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration
import org.apache.spark.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = false
  //None被声明为一个对象,而不是一个类,在没有值的时候,使用None,如果有值可以引用,就使用Some来包含这个值,都是Option的子类
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
       //如果已关闭,就肯定不hasNext了
      return false
    }
  
    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
         //读出来下一条,如果有,就说明还确实 hasNext
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    //确保下一个调用hasNext加载新的数据
    //None被声明为一个对象,而不是一个类,在没有值的时候,使用None,如果有值可以引用,就使用Some来包含这个值,都是Option的子类
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: SerializableBuffer.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: FileBasedWriteAheadLogReader.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException, IOException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: SerializableBuffer.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: FileBasedWriteAheadLogReader.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException, IOException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: WholeFileInputFormat.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.io

import java.io.{EOFException, IOException, InputStream}

import org.apache.flink.api.common.io.FileInputFormat
import org.apache.flink.configuration.Configuration
import org.apache.flink.core.fs._
import org.apache.flink.util.Preconditions.checkState


  @throws[IOException]
  def readRecord(reuse: T, filePath: Path, fileStream: FSDataInputStream, fileLength: Long): T

  // --------------------------------------------------------------------------------------------
  //  Lifecycle
  // --------------------------------------------------------------------------------------------

  override def nextRecord(reuse: T): T = {
    checkState(!reachedEnd())
    checkState(currentSplit != null && currentSplit.getStart == 0)
    checkState(stream != null)
    readRecord(reuse, currentSplit.getPath, stream, currentSplit.getLength)
  }

  override def reachedEnd(): Boolean = {
    stream.getPos != 0
  }
}

@SerialVersionUID(1L)
object WholeFileInputFormat {

  @throws[IOException]
  def readFully(fileStream: FSDataInputStream, fileLength: Long): Array[Byte] = {
    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val buf = new Array[Byte](fileLength.toInt)
    readFully(fileStream, buf, 0, fileLength.toInt)
    buf
  }

  @throws[IOException]
  def readFully(inputStream: InputStream, buf: Array[Byte], off: Int, len: Int): Array[Byte] = {
    var bytesRead = 0
    while (bytesRead < len) {
      val read = inputStream.read(buf, off + bytesRead, len - bytesRead)
      if (read < 0) throw new EOFException("Premature end of stream")
      bytesRead += read
    }
    buf
  }
}

Source File: SerializableBuffer.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: SerializableBuffer.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: FileBasedWriteAheadLogReader.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException, IOException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: HadoopCheckpointStoreReader.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.hadoop.lib

import java.io.EOFException

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

import org.apache.gearpump.Time.MilliSeconds

class HadoopCheckpointStoreReader(
    path: Path,
    hadoopConfig: Configuration)
  extends Iterator[(MilliSeconds, Array[Byte])] {

  private val stream = HadoopUtil.getInputStream(path, hadoopConfig)
  private var nextTimeStamp: Option[MilliSeconds] = None
  private var nextData: Option[Array[Byte]] = None

  override def hasNext: Boolean = {
    if (nextTimeStamp.isDefined) {
      true
    } else {
      try {
        nextTimeStamp = Some(stream.readLong())
        val length = stream.readInt()
        val buffer = new Array[Byte](length)
        stream.readFully(buffer)
        nextData = Some(buffer)
        true
      } catch {
        case e: EOFException =>
          close()
          false
        case e: Exception =>
          close()
          throw e
      }
    }
  }

  override def next(): (MilliSeconds, Array[Byte]) = {
    val timeAndData = for {
      time <- nextTimeStamp
      data <- nextData
    } yield (time, data)
    nextTimeStamp = None
    nextData = None
    timeAndData.get
  }

  def close(): Unit = {
    stream.close()
  }
}

Source File: Decode.scala From scala-stellar-sdk with Apache License 2.0

5 votes

package stellar.sdk.model.xdr

import java.io.EOFException
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import java.time.Instant

import cats.Eval
import cats.data.{IndexedStateT, State}
import com.typesafe.scalalogging.LazyLogging

import scala.util.Try

trait Decode extends LazyLogging {

  private def decode[T](bs: Seq[Byte], len: Int)(decoder: Seq[Byte] => T): (Seq[Byte], T) = {
    if (bs.length < len) throw new EOFException("Insufficient data remains to parse.")
    val t = decoder(bs.take(len))
    logger.trace(s"Dropping {} to make {}", len, t)
    bs.drop(len) -> t
  }

  val int: State[Seq[Byte], Int] = State[Seq[Byte], Int] { bs =>
    decode(bs, 4) { in => ByteBuffer.wrap(in.toArray).getInt }
  }

  val long: State[Seq[Byte], Long] = State[Seq[Byte], Long] { bs =>
    decode(bs, 8) { in => ByteBuffer.wrap(in.toArray).getLong }
  }

  val instant: State[Seq[Byte], Instant] = long.map(Instant.ofEpochSecond)

  val bool: State[Seq[Byte], Boolean] = int.map(_ == 1)

  def bytes(len: Int): State[Seq[Byte], Seq[Byte]] = State[Seq[Byte], Seq[Byte]] { bs =>
    decode(bs, len) { _.take(len) }
  }

  val bytes: State[Seq[Byte], Seq[Byte]] = for {
    len <- int
    bs <- bytes(len)
  } yield bs

  def padded(multipleOf: Int = 4): State[Seq[Byte], Seq[Byte]] = for {
    len <- int
    bs <- bytes(len)
    _ <- bytes((multipleOf - (len % multipleOf)) % multipleOf)
  } yield bs

  val string: State[Seq[Byte], String] = padded().map(_.toArray).map(new String(_, StandardCharsets.UTF_8))

  def switch[T](zero: State[Seq[Byte], T], others: State[Seq[Byte], T]*): IndexedStateT[Eval, Seq[Byte], Seq[Byte], T] = int.flatMap {
    case 0 => zero
    case n =>  Try(others(n - 1)).getOrElse {
      throw new IllegalArgumentException(s"No parser defined for discriminant $n")
    }
  }

  // TODO (jem) - All switches should use this instead and Discriminators should be held in the parent (switcher not switchee).
  def switchInt[T](zero: State[Seq[Byte], T], others: State[Seq[Byte], T]*): State[Seq[Byte], (T, Int)] = int.flatMap {
    case 0 => zero.map(_ -> 0)
    case n => Try(others(n - 1).map(_ -> n)).getOrElse {
      throw new IllegalArgumentException(s"No parser defined for discriminant $n")
    }
  }

  def opt[T](parseT: State[Seq[Byte], T]): State[Seq[Byte], Option[T]] = bool.flatMap {
    case true => parseT.map(Some(_))
    case false => State.pure(None)
  }

  def arr[T](parseT: State[Seq[Byte], T]): State[Seq[Byte], Seq[T]] = int.flatMap(seq(_, parseT))

  // $COVERAGE-OFF$
  // For debugging XDR only.
  def log[T](t: T): State[Seq[Byte], Unit] = State[Seq[Byte], Unit] { bs =>
    logger.debug("{}\n", t)
    bs -> ()
  }
  // $COVERAGE-ON$

  def seq[T](qty: Int, parseT: State[Seq[Byte], T]): State[Seq[Byte], Seq[T]] = {
    (0 until qty).foldLeft(State.pure[Seq[Byte], Seq[T]](Seq.empty[T])) { case (state, _) =>
      for {
        ts <- state
        t <- parseT
      } yield ts :+ t
    }
  }

  def drop[T](parse: State[Seq[Byte], _])(t: T): State[Seq[Byte], T] = for {
    _ <- parse
  } yield t

  def widen[A, W, O <: W](s: State[A, O]): State[A, W] = s.map(w => w: W)
}

Source File: TypeInformationDataInputFormat.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.dataformats

import java.io.{ByteArrayInputStream, EOFException, InputStream}

import com.amazon.milan.dataformats.DataInputFormat
import com.amazon.milan.typeutil.TypeDescriptor
import org.apache.flink.api.common.ExecutionConfig
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.core.memory.DataInputViewStreamWrapper



class TypeInformationDataInputFormat[T](typeInfo: TypeInformation[T]) extends DataInputFormat[T] {
  @transient private lazy val serializer = this.createSerializer()

  override def getGenericArguments: List[TypeDescriptor[_]] = {
    // This class is not intended to be serialized by GenericTypedJsonSerializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = {
    // This class is not intended to be deserialized by GenericTypedJsonDeserializer, so this should not be called.
    throw new UnsupportedOperationException()
  }

  override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = {
    val input = new DataInputViewStreamWrapper(new ByteArrayInputStream(bytes, offset, length))
    Some(this.serializer.deserialize(input))
  }

  override def readValues(stream: InputStream): TraversableOnce[T] = {
    val input = new DataInputViewStreamWrapper(stream)
    Stream.continually(0)
      .map(_ =>
        try {
          Some(this.serializer.deserialize(input))
        }
        catch {
          case _: EOFException => None
        })
      .takeWhile(_.isDefined)
      .map(_.get)
  }

  private def createSerializer(): TypeSerializer[T] = {
    val config = new ExecutionConfig()
    this.typeInfo.createSerializer(config)
  }
}

Source File: SerializableBuffer.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

java.io.EOFException Scala Examples