java.nio.channels.Channels Scala Examples

The following examples show how to use java.nio.channels.Channels. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SerializableBuffer.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 2
Source File: ProtobufUtilTest.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.util

import java.io.File
import java.nio.channels.Channels
import java.nio.file.Files

import com.spotify.scio.ScioContext
import com.spotify.scio.avro._
import com.spotify.scio.coders.Coder
import com.spotify.scio.proto.Track.TrackPB
import org.apache.avro.file.DataFileStream
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
import org.apache.beam.sdk.io.{FileSystems, LocalResources}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

import scala.jdk.CollectionConverters._

class ProtobufUtilTest extends AnyFlatSpec with Matchers {

  "ProtobufUtil" should "convert Message -> GenericRecords that can be written and read" in {
    val sc = ScioContext()

    val dir = Files.createTempDirectory("protobuf-util-")
    val (path1, path2) = (new File(s"$dir/1"), new File(s"$dir/2"))
    path1.deleteOnExit()
    path2.deleteOnExit()
    dir.toFile.deleteOnExit()

    implicit val grCoder: Coder[GenericRecord] = ProtobufUtil.AvroMessageCoder

    val messages = sc
      .parallelize(1 to 10)
      .map(i => TrackPB.newBuilder().setTrackId(i.toString).build())

    messages
      .map(ProtobufUtil.toAvro[TrackPB])
      .saveAsAvroFile(
        path1.getPath,
        suffix = ".protobuf",
        metadata = ProtobufUtil.schemaMetadataOf[TrackPB],
        schema = ProtobufUtil.AvroMessageSchema,
        numShards = 1
      )

    val protoWriteTap = messages.saveAsProtobufFile(path2.getPath, numShards = 1)

    val result = sc.run().waitUntilDone()

    val (tapFromAvroWrite, tapFromProtoWrite) = (
      ObjectFileTap[TrackPB](ScioUtil.addPartSuffix(path1.getPath)),
      protoWriteTap.get(result)
    )

    tapFromAvroWrite.value.toList should contain theSameElementsAs tapFromProtoWrite.value.toList
    getMetadata(path1) should contain theSameElementsAs getMetadata(path2)
  }

  private def getMetadata(dir: File): Map[String, AnyRef] = {
    val files = dir.listFiles()
    if (files.length != 1) {
      fail(s"Directory $dir should contain 1 Avro file. Instead, found ${files.toList}")
    }

    val dfs = new DataFileStream[GenericRecord](
      Channels.newInputStream(FileSystems.open(LocalResources.fromFile(files(0), false))),
      new GenericDatumReader[GenericRecord]
    )

    dfs.getMetaKeys.asScala.map(k => (k, dfs.getMetaString(k))).toMap
  }
} 
Example 3
Source File: TFRecordCodec.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.tensorflow

import java.io.{InputStream, PushbackInputStream}
import java.nio.channels.Channels
import java.nio.{ByteBuffer, ByteOrder}
import java.util.zip.GZIPInputStream

import org.apache.beam.sdk.io.Compression
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.Hashing
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.Ints
import org.apache.commons.compress.compressors.deflate._
import org.apache.commons.compress.compressors.gzip._

private object TFRecordCodec {
  private val headerLength: Int =
    (java.lang.Long.SIZE + java.lang.Integer.SIZE) / java.lang.Byte.SIZE
  private val footerLength: Int = java.lang.Integer.SIZE / java.lang.Byte.SIZE
  private val crc32c = Hashing.crc32c()

  private def mask(crc: Int): Int = ((crc >>> 15) | (crc << 17)) + 0xa282ead8

  def read(input: InputStream): Array[Byte] = {
    val headerBytes = readFully(input, headerLength)
    if (headerBytes != null) {
      val headerBuf =
        ByteBuffer.wrap(headerBytes).order(ByteOrder.LITTLE_ENDIAN)
      val length = headerBuf.getLong
      val maskedCrc32OfLength = headerBuf.getInt
      require(hashLong(length) == maskedCrc32OfLength, "Invalid masked CRC32 of length")

      val data = readFully(input, length.toInt)

      val footerBytes = readFully(input, footerLength)
      val footerBuf =
        ByteBuffer.wrap(footerBytes).order(ByteOrder.LITTLE_ENDIAN)
      val maskedCrc32OfData = footerBuf.getInt
      require(hashBytes(data) == maskedCrc32OfData, "Invalid masked CRC32 of data")
      data
    } else {
      null
    }
  }

  // InflaterInputStream#read may not fill a buffer fully even when there are more data available
  private def readFully(input: InputStream, length: Int): Array[Byte] = {
    val data = Array.ofDim[Byte](length)
    var n = 0
    var off = 0
    do {
      n = input.read(data, off, data.length - off)
      if (n > 0) {
        off += n
      }
    } while (n > 0 && off < data.length)
    if (n <= 0) null else data
  }

  def wrapInputStream(stream: InputStream, compression: Compression): InputStream = {
    val deflateParam = new DeflateParameters()
    deflateParam.setWithZlibHeader(true)

    compression match {
      case Compression.AUTO =>
        val pushback = new PushbackInputStream(stream, 2)
        if (isInflaterInputStream(pushback)) {
          new DeflateCompressorInputStream(pushback, deflateParam)
        } else if (isGzipInputStream(pushback)) {
          new GzipCompressorInputStream(pushback)
        } else {
          pushback
        }
      case Compression.UNCOMPRESSED => stream
      case _ =>
        Channels.newInputStream(compression.readDecompressed(Channels.newChannel(stream)))
    }
  }

  private def hashLong(x: Long): Int = mask(crc32c.hashLong(x).asInt())
  private def hashBytes(x: Array[Byte]): Int = mask(crc32c.hashBytes(x).asInt())

  private def isGzipInputStream(pushback: PushbackInputStream): Boolean = {
    val b1 = pushback.read()
    val b2 = pushback.read()
    if (b2 != -1) pushback.unread(b2)
    if (b1 != -1) pushback.unread(b1)
    val zero: Byte = 0x00
    val header = Ints.fromBytes(zero, zero, b2.toByte, b1.toByte)
    (b1 != -1 && b2 != -1) && header == GZIPInputStream.GZIP_MAGIC
  }

  private def isInflaterInputStream(pushback: PushbackInputStream): Boolean = {
    val b1 = pushback.read()
    val b2 = pushback.read()
    if (b2 != -1) pushback.unread(b2)
    if (b1 != -1) pushback.unread(b1)
    (b1 != -1 && b2 != -1) && (b1 == 0x78 && (b1 * 256 + b2) % 31 == 0)
  }
} 
Example 4
Source File: AttachmentService.scala    From BacklogMigration-Redmine   with MIT License 5 votes vote down vote up
package com.nulabinc.backlog.r2b.exporter.service

import java.io.{File, FileOutputStream}
import java.net.{HttpURLConnection, URL}
import java.nio.channels.Channels

import com.nulabinc.backlog.migration.common.utils.ControlUtil.using
import com.nulabinc.backlog.migration.common.utils.Logging

object AttachmentService extends Logging {
  private val MAX_REDIRECT_COUNT = 10

  def download(url: URL, file: File): Unit = {
    val redirected = followRedirect(url)

    doDownload(redirected, file)
  }

  private def doDownload(url: URL, file: File): Unit =
    try {
      val rbc = Channels.newChannel(url.openStream())
      val fos = new FileOutputStream(file)
      fos.getChannel.transferFrom(rbc, 0, java.lang.Long.MAX_VALUE)

      rbc.close()
      fos.close()
    } catch {
      case e: Throwable => logger.warn("Download attachment failed: " + e.getMessage)
    }

  private def followRedirect(url: URL, count: Int = 0): URL =
    url.openConnection match {
      case http: HttpURLConnection =>
        http.setRequestMethod("GET")
        http.connect()
        using(http) { connection =>
          connection.getResponseCode match {
            case 301 | 302 | 303 =>
              val newUrl = new URL(connection.getHeaderField("Location"))
              if (count < MAX_REDIRECT_COUNT) followRedirect(newUrl, count + 1) else newUrl
            case _ =>
              url
          }
        }
      case _ =>
        url
    }
} 
Example 5
Source File: SerializableBuffer.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 6
Source File: SerializableBuffer.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 7
Source File: SerializableBuffer.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    //ByteBuffer.allocate在能够读和写之前,必须有一个缓冲区,用静态方法 allocate() 来分配缓冲区
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 8
Source File: SerializableBuffer.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 9
Source File: SerializableBuffer.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 10
Source File: SerializableBuffer.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 11
Source File: SerializableBuffer.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
} 
Example 12
Source File: PythonSQLUtils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.api.python

import java.io.InputStream
import java.nio.channels.Channels

import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.python.PythonRDDServer
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.execution.arrow.ArrowConverters
import org.apache.spark.sql.types.DataType

private[sql] object PythonSQLUtils {
  def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)

  // This is needed when generating SQL documentation for built-in functions.
  def listBuiltinFunctionInfos(): Array[ExpressionInfo] = {
    FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
  }

  
private[sql] class ArrowRDDServer(sqlContext: SQLContext) extends PythonRDDServer {

  override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = {
    // Create array to consume iterator so that we can safely close the inputStream
    val batches = ArrowConverters.getBatchesFromStream(Channels.newChannel(input)).toArray
    // Parallelize the record batches to create an RDD
    JavaRDD.fromRDD(sqlContext.sparkContext.parallelize(batches, batches.length))
  }

} 
Example 13
Source File: CreateJacksonParser.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.json

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
import java.nio.channels.Channels
import java.nio.charset.Charset

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
import sun.nio.cs.StreamDecoder

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.unsafe.types.UTF8String

private[sql] object CreateJacksonParser extends Serializable {
  def string(jsonFactory: JsonFactory, record: String): JsonParser = {
    jsonFactory.createParser(record)
  }

  def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = {
    val bb = record.getByteBuffer
    assert(bb.hasArray)

    val bain = new ByteArrayInputStream(
      bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())

    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
  }

  def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
    jsonFactory.createParser(record.getBytes, 0, record.getLength)
  }

  // Jackson parsers can be ranked according to their performance:
  // 1. Array based with actual encoding UTF-8 in the array. This is the fastest parser
  //    but it doesn't allow to set encoding explicitly. Actual encoding is detected automatically
  //    by checking leading bytes of the array.
  // 2. InputStream based with actual encoding UTF-8 in the stream. Encoding is detected
  //    automatically by analyzing first bytes of the input stream.
  // 3. Reader based parser. This is the slowest parser used here but it allows to create
  //    a reader with specific encoding.
  // The method creates a reader for an array with given encoding and sets size of internal
  // decoding buffer according to size of input array.
  private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = {
    val bais = new ByteArrayInputStream(in, 0, length)
    val byteChannel = Channels.newChannel(bais)
    val decodingBufferSize = Math.min(length, 8192)
    val decoder = Charset.forName(enc).newDecoder()

    StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize)
  }

  def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = {
    val sd = getStreamDecoder(enc, record.getBytes, record.getLength)
    jsonFactory.createParser(sd)
  }

  def inputStream(jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(is)
  }

  def inputStream(enc: String, jsonFactory: JsonFactory, is: InputStream): JsonParser = {
    jsonFactory.createParser(new InputStreamReader(is, enc))
  }

  def internalRow(jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val ba = row.getBinary(0)

    jsonFactory.createParser(ba, 0, ba.length)
  }

  def internalRow(enc: String, jsonFactory: JsonFactory, row: InternalRow): JsonParser = {
    val binary = row.getBinary(0)
    val sd = getStreamDecoder(enc, binary, binary.length)

    jsonFactory.createParser(sd)
  }
} 
Example 14
Source File: GcsStore.scala    From fs2-blobstore   with Apache License 2.0 5 votes vote down vote up
package blobstore.gcs

import java.nio.channels.Channels
import java.time.Instant
import java.util.Date

import blobstore.{Path, Store}
import cats.effect.{Blocker, ContextShift, Sync}
import com.google.api.gax.paging.Page
import com.google.cloud.storage.{Acl, Blob, BlobId, BlobInfo, Storage}
import com.google.cloud.storage.Storage.{BlobListOption, CopyRequest}
import fs2.{Chunk, Pipe, Stream}

import scala.jdk.CollectionConverters._

final class GcsStore[F[_]](storage: Storage, blocker: Blocker, acls: List[Acl] = Nil)(implicit F: Sync[F], CS: ContextShift[F]) extends Store[F] {

  private def _chunk(pg: Page[Blob]): Chunk[Path] = {
    val (dirs, files) = pg.getValues.asScala.toSeq.partition(_.isDirectory)
    val dirPaths = Chunk.seq(dirs.map(b => Path(root = b.getBucket, key = b.getName.stripSuffix("/"), size = None, isDir = true, lastModified = None)))
    val filePaths = Chunk.seq(files.map{b =>
      val size = Option(b.getSize: java.lang.Long).map(_.toLong) // Prevent throwing NPE (see https://github.com/scala/bug/issues/9634)
      val lastModified = Option(b.getUpdateTime: java.lang.Long).map(millis => Date.from(Instant.ofEpochMilli(millis))) // Prevent throwing NPE (see https://github.com/scala/bug/issues/9634)
      Path(b.getBucket, key = b.getName, size = size, isDir = false, lastModified = lastModified)
    })
    Chunk.concat(List(dirPaths, filePaths))
  }

  def list(path: Path): fs2.Stream[F, Path] = {
    Stream.unfoldChunkEval[F, () => Option[Page[Blob]], Path]{
      () => Some(storage.list(path.root, BlobListOption.currentDirectory(), BlobListOption.prefix(path.key)))
    }{getPage =>
      blocker.delay{
        getPage().map{pg =>
          if (pg.hasNextPage){
            (_chunk(pg), () => Some(pg.getNextPage))
          } else {
            (_chunk(pg), () => None)
          }
        }
      }
    }
  }

  def get(path: Path, chunkSize: Int): fs2.Stream[F, Byte] = {
    val is = blocker.delay(Channels.newInputStream(storage.get(path.root, path.key).reader()))
    fs2.io.readInputStream(is, chunkSize, blocker, closeAfterUse = true)
  }

  def put(path: Path): Pipe[F, Byte, Unit] = {
    val fos = Sync[F].delay{
      val builder = {
        val b = BlobInfo.newBuilder(path.root, path.key)
        if (acls.nonEmpty) b.setAcl(acls.asJava) else b
      }
      val blobInfo = builder.build()
      val writer = storage.writer(blobInfo)
      Channels.newOutputStream(writer)
    }
    fs2.io.writeOutputStream(fos, blocker, closeAfterUse = true)
  }

  def move(src: Path, dst: Path): F[Unit] = F.productR(copy(src, dst))(remove(src))

  def copy(src: Path, dst: Path): F[Unit] = {
    val req = CopyRequest.newBuilder().setSource(src.root, src.key).setTarget(BlobId.of(dst.root, dst.key)).build()
    F.void(blocker.delay(storage.copy(req).getResult))
  }

  def remove(path: Path): F[Unit] =
    F.void(blocker.delay(storage.delete(path.root, path.key)))
}


object GcsStore{
  def apply[F[_]](
    storage: Storage,
    blocker: Blocker,
    acls: List[Acl]
  )(implicit F: Sync[F], CS: ContextShift[F]): GcsStore[F] = new GcsStore(storage, blocker, acls)
} 
Example 15
Source File: ConcatArrowAndExplodeSpec.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.timeseries

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels
import java.util.concurrent.TimeUnit

import com.twosigma.flint.arrow.ArrowUtils
import org.apache.arrow.memory.RootAllocator
import org.apache.arrow.vector.ipc.ArrowFileWriter
import org.apache.arrow.vector.{ BigIntVector, Float8Vector, VectorSchemaRoot }
import org.apache.spark.sql.functions.{ array, col, lit, struct }
import org.apache.spark.sql.types._

class ConcatArrowAndExplodeSpec extends TimeSeriesSuite {

  "ConcatArrowAndExplode" should "work" in {

    val batchSize = 10

    var df = spark.range(1000, 2000, 1000).toDF("time")
    val columns = (0 until batchSize).map(v => struct((df("time") + v).as("time"), lit(v.toDouble).as("v")))
    df = df.withColumn("base_rows", array(columns: _*))

    val allocator = new RootAllocator(Long.MaxValue)

    val schema1 = StructType(Seq(StructField("v1", DoubleType)))
    val root1 = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(schema1), allocator)
    val vector1 = root1.getVector("v1").asInstanceOf[Float8Vector]
    vector1.allocateNew()

    for (i <- 0 until batchSize) {
      vector1.set(i, i + 10.0)
    }
    vector1.setValueCount(batchSize)
    val out1 = new ByteArrayOutputStream()
    val arrowWriter1 = new ArrowFileWriter(root1, null, Channels.newChannel(out1))
    arrowWriter1.writeBatch()
    arrowWriter1.close()
    root1.close()
    df = df.withColumn("f1_schema", struct(lit(0.0).as("v1")))
    df = df.withColumn("f1_data", lit(out1.toByteArray))

    val schema2 = StructType(Seq(StructField("v2", DoubleType), StructField("v3", LongType)))
    val root2 = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(schema2), allocator)
    val vector2 = root2.getVector("v2").asInstanceOf[Float8Vector]
    val vector3 = root2.getVector("v3").asInstanceOf[BigIntVector]
    vector2.allocateNew()
    vector3.allocateNew()

    for (i <- 0 until batchSize) {
      vector2.set(i, i + 20.0)
    }
    vector2.setValueCount(batchSize)

    for (i <- 0 until batchSize) {
      vector3.set(i, i + 30L)
    }
    vector3.setValueCount(batchSize)
    val out2 = new ByteArrayOutputStream()
    val arrowWriter2 = new ArrowFileWriter(root2, null, Channels.newChannel(out2))
    arrowWriter2.writeBatch()
    arrowWriter2.close()
    root2.close()
    df = df.withColumn("f2_schema", struct(lit(0.0).as("v2"), lit(0L).as("v3")))
    df = df.withColumn("f2_data", lit(out2.toByteArray))

    var tsrdd = TimeSeriesRDD.fromDF(df)(isSorted = false, timeUnit = TimeUnit.NANOSECONDS)
    tsrdd = tsrdd.concatArrowAndExplode("base_rows", Seq("f1_schema", "f2_schema"), Seq("f1_data", "f2_data"))
    tsrdd.toDF.show()

    var expected = spark.range(1000, 1000 + batchSize).toDF("time")
    expected = expected.withColumn("v", col("time") - 1000.0)
    expected = expected.withColumn("v1", col("time") - 1000 + 10.0)
    expected = expected.withColumn("v2", col("time") - 1000 + 20.0)
    expected = expected.withColumn("v3", col("time") - 1000 + 30)

    val expectedTsrdd = TimeSeriesRDD.fromDF(expected)(isSorted = false, timeUnit = TimeUnit.NANOSECONDS)
    assertEquals(tsrdd, expectedTsrdd)
  }

} 
Example 16
Source File: ArrowSummarizer.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.rdd.function.summarize.summarizer

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels
import java.util

import com.twosigma.flint.arrow.{ ArrowFieldWriter, ArrowPayload, ArrowUtils, ArrowWriter }
import org.apache.arrow.memory.{ BufferAllocator, RootAllocator }
import org.apache.arrow.vector.VectorSchemaRoot
import org.apache.arrow.vector.ipc.ArrowFileWriter
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.GenericArrayData
import org.apache.spark.sql.types.StructType

import scala.collection.JavaConverters._


case class ArrowSummarizer(inputSchema: StructType, outputSchema: StructType, includeBaseRows: Boolean)
  extends Summarizer[InternalRow, ArrowSummarizerState, ArrowSummarizerResult] {
  private[this] val size = outputSchema.size
  require(size > 0, "Cannot create summarizer with no input columns")

  // This function will allocate memory from the BufferAllocator to initialize arrow vectors.
  override def zero(): ArrowSummarizerState = {
    new ArrowSummarizerState(false, null, null, null, null)
  }

  private def init(u: ArrowSummarizerState): Unit = {
    if (!u.initialized) {
      val arrowSchema = ArrowUtils.toArrowSchema(outputSchema)
      val allocator = new RootAllocator(Int.MaxValue)
      val root = VectorSchemaRoot.create(arrowSchema, allocator)
      val arrowWriter = ArrowWriter.create(inputSchema, outputSchema, root)

      u.initialized = true
      u.baseRows = new util.ArrayList[InternalRow]()
      u.allocator = allocator
      u.root = root
      u.arrowWriter = arrowWriter
    }
  }

  override def add(u: ArrowSummarizerState, row: InternalRow): ArrowSummarizerState = {
    if (!u.initialized) {
      init(u)
    }

    if (includeBaseRows) {
      u.baseRows.add(row)
    }
    u.arrowWriter.write(row)
    u
  }

  override def merge(
    u1: ArrowSummarizerState,
    u2: ArrowSummarizerState
  ): ArrowSummarizerState = throw new UnsupportedOperationException()

  // This can only be called once
  override def render(u: ArrowSummarizerState): ArrowSummarizerResult = {
    if (u.initialized) {
      val out = new ByteArrayOutputStream()
      val writer = new ArrowFileWriter(u.root, null, Channels.newChannel(out))

      u.arrowWriter.finish()
      writer.writeBatch()

      writer.close()
      u.root.close()
      u.allocator.close()

      val rows = u.baseRows.toArray.asInstanceOf[Array[Any]]
      ArrowSummarizerResult(rows, out.toByteArray)
    } else {
      ArrowSummarizerResult(Array.empty, Array.empty)
    }
  }

  override def close(u: ArrowSummarizerState): Unit = {
    if (u.initialized) {
      u.arrowWriter.reset()
      u.root.close()
      u.allocator.close()
    }
  }
} 
Example 17
Source File: ArrowConverters.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.arrow

import java.io.ByteArrayOutputStream
import java.nio.channels.Channels

import org.apache.arrow.memory.BufferAllocator
import org.apache.arrow.vector._
import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
import org.apache.spark.TaskContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.types._
import com.twosigma.flint.util.Utils
import org.apache.arrow.vector.ipc.{ ArrowFileReader, ArrowFileWriter }
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch

trait ClosableIterator[T] extends Iterator[T] with AutoCloseable

class ConcatClosableIterator[T](iters: Iterator[ClosableIterator[T]])
  extends ClosableIterator[T] {
  var curIter: ClosableIterator[T] = _

  private def advance(): Unit = {
    require(curIter == null || !curIter.hasNext, "Should not advance if curIter is not empty")
    require(iters.hasNext, "Should not advance if iters doesn't have next")
    closeCurrent()
    curIter = iters.next()
  }

  private def closeCurrent(): Unit = if (curIter != null) curIter.close()

  override def close(): Unit = closeCurrent()

  override def hasNext: Boolean = {
    if (curIter == null || !curIter.hasNext) {
      if (iters.hasNext) {
        advance()
        hasNext
      } else {
        false
      }
    } else {
      true
    }
  }

  override def next(): T = curIter.next()
}


  def byteArrayToBatch(
    batchBytes: Array[Byte],
    allocator: BufferAllocator
  ): ArrowRecordBatch = {
    val in = new ByteArrayReadableSeekableByteChannel(batchBytes)
    val reader = new ArrowFileReader(in, allocator)

    // Read a batch from a byte stream, ensure the reader is closed
    Utils.tryWithSafeFinally {
      val root = reader.getVectorSchemaRoot
      // throws IOException
      val unloader = new VectorUnloader(root)
      reader.loadNextBatch() // throws IOException
      unloader.getRecordBatch
    } {
      reader.close()
    }
  }
} 
Example 18
Source File: FileManager.scala    From slide-desktop   with GNU General Public License v2.0 5 votes vote down vote up
package slide

import java.io.{File, FileOutputStream}
import java.net.{URL, URLConnection}
import java.nio.channels.{Channels, ReadableByteChannel}

class FileManager {

    var currentFile: String = ""
    var numberOfDownloads: Int = 0

    def downloadFile(dlsite: String, path: String): Unit = {
        val url: URL = new URL(dlsite)
        val file: File = new File(path)

        if (isConnected(url)) {
            currentFile = path
            onDownloadStart()

            new Thread(new Runnable {
                override def run(): Unit = {
                    try {
                        val rbc: ReadableByteChannel = Channels.newChannel(url.openStream())
                        val fos: FileOutputStream = new FileOutputStream(file)

                        fos.getChannel.transferFrom(rbc, 0, java.lang.Long.MAX_VALUE)
                        fos.close()

                        numberOfDownloads += 1
                        onDownloadFinished()
                    } catch {
                        case e: Exception =>
                            println("Error: Could not download ADB, please run as Administrator")
                    }
                }
            }).start()
        }
    }

    def isConnected(site: URL): Boolean = {
        try {
            // test connection
            val conn: URLConnection = site.openConnection()
            conn.setConnectTimeout(5000)
            conn.getContent

            true
        } catch {
            case e: Exception => false
        }
    }

    def onDownloadStart(): Unit = {}

    def onDownloadFinished(): Unit = {}

    // var onDownloadStart: () => Unit = null
    // var onDownloadFinished: () => Unit = null
}