scala.collection.Iterator Scala Example

Source File: BGRImgToBatch.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{MiniBatch, Transformer, Utils}
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}

import scala.collection.Iterator

object BGRImgToBatch {
  def apply(batchSize: Int, toRGB: Boolean = true): BGRImgToBatch
    = new BGRImgToBatch(batchSize, toRGB)
}


class BGRImgToBatch(totalBatch: Int, toRGB: Boolean = true)
  extends Transformer[LabeledBGRImage, MiniBatch[Float]] {

  private val batchPerCore = Utils.getBatchSize(totalBatch)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[MiniBatch[Float]] = {
    val batchSizePerCore = batchPerCore

    new Iterator[MiniBatch[Float]] {
      private val featureTensor: Tensor[Float] = Tensor[Float]()
      private val labelTensor: Tensor[Float] = Tensor[Float]()
      private var featureData: Array[Float] = null
      private var labelData: Array[Float] = null
      private var width = 0
      private var height = 0
      private val batchSize = batchSizePerCore

      override def hasNext: Boolean = prev.hasNext

      override def next(): MiniBatch[Float] = {
        if (prev.hasNext) {
          var i = 0
          while (i < batchSize && prev.hasNext) {
            val img = prev.next()
            if (featureData == null) {
              featureData = new Array[Float](batchSize * 3 * img.height() * img.width())
              labelData = new Array[Float](batchSize)
              height = img.height()
              width = img.width()
            }
            img.copyTo(featureData, i * img.width() * img.height() * 3, toRGB)
            labelData(i) = img.label()
            i += 1
          }

          if (labelTensor.nElement() != i) {
            featureTensor.set(Storage[Float](featureData),
              storageOffset = 1, sizes = Array(i, 3, height, width))
            labelTensor.set(Storage[Float](labelData),
              storageOffset = 1, sizes = Array(i))
          }

          MiniBatch(featureTensor, labelTensor)
        } else {
          null
        }
      }
    }
  }
}

Source File: ParRange.scala From scala-parallel-collections with Apache License 2.0

5 votes

package scala
package collection.parallel.immutable

import scala.collection.immutable.Range
import scala.collection.parallel.Combiner
import scala.collection.parallel.SeqSplitter
import scala.collection.Iterator



    override def map2combiner[S, That](f: Int => S, cb: Combiner[S, That]): Combiner[S, That] = {
      while (hasNext) {
        cb += f(next)
      }
      cb
    }
  }

  override def toString = s"Par$range"
}

object ParRange {
  def apply(start: Int, end: Int, step: Int, inclusive: Boolean) = new ParRange(
    if (inclusive) Range.inclusive(start, end, step)
    else Range(start, end, step)
  )
}

Source File: BufferedSource.scala From perf_tester with Apache License 2.0

5 votes

package scala.io

import java.io.{ InputStream, BufferedReader, InputStreamReader, PushbackReader }
import Source.DefaultBufSize
import scala.collection.{ Iterator, AbstractIterator }


  override def mkString = {
    // Speed up slurping of whole data set in the simplest cases.
    val allReader = decachedReader
    val sb = new StringBuilder
    val buf = new Array[Char](bufferSize)
    var n = 0
    while (n != -1) {
      n = allReader.read(buf)
      if (n>0) sb.appendAll(buf, 0, n)
    }
    sb.result
  }
}

Source File: SystemProperties.scala From perf_tester with Apache License 2.0

5 votes

package scala
package sys

import scala.collection.{ mutable, Iterator }
import scala.collection.JavaConverters._
import java.security.AccessControlException
import scala.language.implicitConversions



  def exclusively[T](body: => T) = this synchronized body

  implicit def systemPropertiesToCompanion(p: SystemProperties): SystemProperties.type = this

  private final val HeadlessKey            = "java.awt.headless"
  private final val PreferIPv4StackKey     = "java.net.preferIPv4Stack"
  private final val PreferIPv6AddressesKey = "java.net.preferIPv6Addresses"
  private final val NoTraceSuppressionKey  = "scala.control.noTraceSuppression"

  def help(key: String): String = key match {
    case HeadlessKey            => "system should not utilize a display device"
    case PreferIPv4StackKey     => "system should prefer IPv4 sockets"
    case PreferIPv6AddressesKey => "system should prefer IPv6 addresses"
    case NoTraceSuppressionKey  => "scala should not suppress any stack trace creation"
    case _                      => ""
  }

  lazy val headless: BooleanProp            = BooleanProp.keyExists(HeadlessKey)
  lazy val preferIPv4Stack: BooleanProp     = BooleanProp.keyExists(PreferIPv4StackKey)
  lazy val preferIPv6Addresses: BooleanProp = BooleanProp.keyExists(PreferIPv6AddressesKey)
  lazy val noTraceSuppression: BooleanProp  = BooleanProp.valueIsTrue(NoTraceSuppressionKey)
}

Source File: RandomAlterAspect.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.transform.vision.image.augmentation

import breeze.numerics.sqrt
import org.opencv.core.{CvType, Mat, Rect}
import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.dataset.image.LabeledBGRImage
import com.intel.analytics.bigdl.opencv.OpenCV
import org.opencv.imgproc.Imgproc

import scala.collection.Iterator
import com.intel.analytics.bigdl.opencv
import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature}
import com.intel.analytics.bigdl.transform.vision.image.opencv.OpenCVMat
import org.apache.spark.ml
import org.apache.spark.ml.feature
import org.opencv.core.Size

object RandomAlterAspect {
  def apply(min_area_ratio: Float = 0.08f,
            max_area_ratio: Int = 1,
            min_aspect_ratio_change: Float = 0.75f,
            interp_mode: String = "CUBIC",
            cropLength: Int = 224): RandomAlterAspect = {
    OpenCV.isOpenCVLoaded
    new RandomAlterAspect(min_area_ratio, max_area_ratio,
      min_aspect_ratio_change, interp_mode, cropLength)
  }
}


class RandomAlterAspect(min_area_ratio: Float = 0.08f,
                           max_area_ratio: Int = 1,
                           min_aspect_ratio_change: Float = 0.75f,
                           interp_mode: String = "CUBIC",
                           cropLength: Int = 224)
  extends FeatureTransformer {

  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  @inline
  private def randRatio(min: Float, max: Float): Float = {
    val res = (RNG.uniform(1e-2, (max - min) * 1000 + 1) + min * 1000) / 1000
    res.toFloat
  }

  override protected def transformMat(feature: ImageFeature): Unit = {
    val h = feature.opencvMat().size().height
    val w = feature.opencvMat().size().width
    val area = h * w

    require(min_area_ratio <= max_area_ratio, "min_area_ratio should <= max_area_ratio")

    var attempt = 0
    while (attempt < 10) {
      val area_ratio = randRatio(min_area_ratio, max_area_ratio)
      val aspect_ratio_change = randRatio(min_aspect_ratio_change, 1 / min_aspect_ratio_change)
      val new_area = area_ratio * area
      var new_h = (sqrt(new_area) * aspect_ratio_change).toInt
      var new_w = (sqrt(new_area) / aspect_ratio_change).toInt
      if (randRatio(0, 1) < 0.5) {
        val tmp = new_h
        new_h = new_w
        new_w = tmp
      }
      if (new_h <= h && new_w <= w) {
        val y = RNG.uniform(1e-2, h - new_h + 1).toInt
        val x = RNG.uniform(1e-2, w - new_w + 1).toInt
        Crop.transform(feature.opencvMat(),
          feature.opencvMat(), x, y, x + new_w, y + new_h, false, false)

        Imgproc.resize(feature.opencvMat(), feature.opencvMat(),
            new Size(cropLength, cropLength), 0, 0, 2)
        attempt = 100
      }
      attempt += 1
    }
    if (attempt < 20) {
      val (new_h, new_w) = resizeImagePerShorterSize(feature.opencvMat(), cropLength)
      Imgproc.resize(feature.opencvMat(),
        feature.opencvMat(), new Size(cropLength, cropLength), 0, 0, 2)
    }
  }

  private def resizeImagePerShorterSize(img: Mat, shorter_size: Int) : (Int, Int) = {
    val h = img.size().height
    val w = img.size().width
    var new_h = shorter_size
    var new_w = shorter_size

    if (h < w) {
      new_w = (w / h * shorter_size).toInt
    } else {
      new_h = (h / w * shorter_size).toInt
    }
    (new_h, new_w)
  }
}

Source File: ChannelScaledNormalizer.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.transform.vision.image.augmentation

import com.intel.analytics.bigdl.dataset.image.LabeledBGRImage
import com.intel.analytics.bigdl.dataset.{LocalDataSet, Transformer}
import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature}
import com.intel.analytics.bigdl.transform.vision.image.opencv.OpenCVMat
import org.apache.log4j.Logger

import scala.collection.Iterator

object ChannelScaledNormalizer {

  def apply(meanR: Int, meanG: Int, meanB: Int, scale: Double): ChannelScaledNormalizer = {
    new ChannelScaledNormalizer(meanR, meanG, meanB, scale)
  }
}



class ChannelScaledNormalizer(meanR: Int, meanG: Int, meanB: Int, scale: Double)
  extends FeatureTransformer {

  override protected def transformMat(feature: ImageFeature): Unit = {
    val mat = feature.opencvMat()
    val toFloats = OpenCVMat.toFloatPixels(mat)
    val content = toFloats._1
    require(content.length % 3 == 0, "Content should be multiple of 3 channels")
    var i = 0
    val frameLength = content.length / 3
    val height = toFloats._2
    val width = toFloats._3
    val bufferContent = new Array[Float](width * height * 3)

    val channels = 3
    val mean = Array(meanR, meanG, meanB)
    var c = 0
    while (c < channels) {
      i = 0
      while (i < frameLength) {
        val data_index = c * frameLength + i
        bufferContent(data_index) = ((content(data_index) - mean(c)) * scale).toFloat
        i += 1
      }
      c += 1
    }
    if (mat != null) {
      mat.release()
    }
    val newMat = OpenCVMat.fromFloats(bufferContent, height, width)
    feature(ImageFeature.mat) = newMat
  }

}

Source File: BGRImgToSample.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{Sample, Transformer}
import com.intel.analytics.bigdl.tensor.Tensor

import scala.collection.Iterator

object BGRImgToSample {
  def apply(toRGB: Boolean = true): BGRImgToSample = {
    new BGRImgToSample(toRGB)
  }
}


class BGRImgToSample(toRGB: Boolean = true) extends Transformer[LabeledBGRImage, Sample[Float]] {

  private val featureBuffer = Tensor[Float]()
  private val labelBuffer = Tensor[Float](1)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[Sample[Float]] = {
    prev.map(img => {
      labelBuffer.storage.array()(0) = img.label()
      if (featureBuffer.nElement() != 3 * img.height() * img.width()) {
        featureBuffer.resize(3, img.height(), img.width())
      }

      img.copyTo(featureBuffer.storage().array(), 0, toRGB)
      Sample(featureBuffer, labelBuffer)
    })
  }
}

Source File: BGRImgRdmCropper.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object BGRImgRdmCropper {
  def apply(cropWidth: Int, cropHeight: Int, padding: Int): BGRImgRdmCropper =
    new BGRImgRdmCropper(cropHeight, cropWidth, padding)
}


class BGRImgRdmCropper(cropHeight: Int, cropWidth: Int, padding: Int)
  extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  private val buffer = new LabeledBGRImage(cropWidth, cropHeight)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      val curImg = padding > 0 match {
        case true =>
          val widthTmp = img.width()
          val heightTmp = img.height()
          val sourceTmp = img.content
          val padWidth = widthTmp + 2 * padding
          val padHeight = heightTmp + 2 * padding
          val temp = new LabeledBGRImage(padWidth, padHeight)
          val tempBuffer = temp.content
          val startIndex = (padding + padding * padWidth) * 3
          val frameLength = widthTmp * heightTmp
          var i = 0
          while (i < frameLength) {
            tempBuffer(startIndex +
              ((i / widthTmp) * padWidth + (i % widthTmp)) * 3 + 2) = sourceTmp(i * 3 + 2)
            tempBuffer(startIndex +
              ((i / widthTmp) * padWidth + (i % widthTmp)) * 3 + 1) = sourceTmp(i * 3 + 1)
            tempBuffer(startIndex +
              ((i / widthTmp) * padWidth + (i % widthTmp)) * 3) = sourceTmp(i * 3)
            i += 1
          }
          temp.setLabel(img.label())
          temp
        case _ => img
      }

      val width = curImg.width()
      val height = curImg.height()
      val source = curImg.content

      val startW = RNG.uniform(0, width - cropWidth).toInt
      val startH = RNG.uniform(0, height - cropHeight).toInt
      val startIndex = (startW + startH * width) * 3
      val frameLength = cropWidth * cropHeight

      val target = buffer.content
      var i = 0
      while (i < frameLength) {
        target(i * 3 + 2) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 2)
        target(i * 3 + 1) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 1)
        target(i * 3) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3)
        i += 1
      }
      buffer.setLabel(curImg.label())
    })
  }
}

Source File: GreyImgToSample.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{Sample, Transformer}
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}

import scala.collection.Iterator

object GreyImgToSample {
  def apply(): GreyImgToSample = {
    new GreyImgToSample()
  }
}


class GreyImgToSample() extends Transformer[LabeledGreyImage, Sample[Float]] {

  private val featureBuffer = Tensor[Float]()
  private val labelBuffer = Tensor[Float](1)
  private val featureSize = new Array[Int](2)

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[Sample[Float]] = {
    prev.map(img => {
      labelBuffer.storage.array()(0) = img.label()
      featureSize(0) = img.height()
      featureSize(1) = img.width()
      featureBuffer.set(Storage(img.content), sizes = featureSize)

      Sample(featureBuffer, labelBuffer)
    })
  }
}

Source File: ColorJitter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator
import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

import scala.collection.Iterator
import scala.util.Random

object ColorJitter {
  def apply(): ColorJitter = {
    new ColorJitter()
  }
}


class ColorJitter extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  // TODO: make the bcs parameter configurable
  private val bcsParameters = Map("brightness" -> 0.4f, "contrast" -> 0.4f, "saturation" -> 0.4f)
  private var gs: Array[Float] = null

  private def grayScale(dst: Array[Float], img: Array[Float]): Array[Float] = {
    var i = 0
    while (i < img.length) {
      dst(i) = img(i)*0.299f + img(i + 1)*0.587f + img(i + 2)*0.114f
      dst(i + 1) = dst(i)
      dst(i + 2) = dst(i)
      i += 3
    }
    dst
  }

  private def blend(img1: Array[Float], img2: Array[Float], alpha: Float): Array[Float] = {
    var i = 0
    while (i < img1.length) {
      img1(i) = img1(i) * alpha + (1 - alpha) * img2(i)
      i += 1
    }
    img1
  }

  private def saturation(variance: Float)(input: Array[Float]): Array[Float] = {
    if (gs == null || gs.length < input.length) gs = new Array[Float](input.length)
    grayScale(gs, input)
    val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat
    blend(input, gs, alpha)
    input
  }

  private def brightness(variance: Float)(input: Array[Float]): Array[Float] = {
    if (gs == null || gs.length < input.length) gs = new Array[Float](input.length)
    java.util.Arrays.fill(gs, 0, gs.length, 0.0f)
     val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat
    blend(input, gs, alpha)
    input
  }

  private def contrast(variance: Float)(input: Array[Float]): Array[Float] = {
    if (gs == null || gs.length < input.length) gs = new Array[Float](input.length)
    grayScale(gs, input)
    val mean = gs.sum / gs.length
    java.util.Arrays.fill(gs, 0, gs.length, mean)
    val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat
    blend(input, gs, alpha)
    input
  }

  private val ts = Map(
    1 -> {
      brightness(bcsParameters.get("brightness").get)(_)},
    2 -> {contrast(bcsParameters.get("contrast").get)(_)},
    3 -> {saturation(bcsParameters.get("saturation").get)(_)}
  )

  private def randomOrder(input: Array[Float]): Unit = {
    val order = Tensor.randperm[Float](3)
    var i = 1
    while (i <= order.size(1)) {
      val idx = order(i).value().toInt
      ts(idx)(input)
      i += 1
    }
  }

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      val content = img.content
      require(content.length % 3 == 0)
      randomOrder(content)
      img
    })
  }
}

Source File: BytesToBGRImg.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import java.awt.Color
import java.awt.image.{BufferedImage, DataBufferByte}
import java.nio.ByteBuffer

import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer}

import scala.collection.Iterator

object BytesToBGRImg {
  def apply(normalize: Float = 255f, resizeW : Int = -1, resizeH : Int = -1): BytesToBGRImg =
    new BytesToBGRImg(normalize, resizeW, resizeH)
}


class BytesToBGRImg(normalize: Float, resizeW : Int = -1, resizeH : Int = -1)
  extends Transformer[ByteRecord, LabeledBGRImage] {

  private val buffer = new LabeledBGRImage()

  override def apply(prev: Iterator[ByteRecord]): Iterator[LabeledBGRImage] = {
    prev.map(rawData => {
      buffer.copy(getImgData(rawData, resizeW, resizeH), normalize).setLabel(rawData.label)
    })
  }

  private def getImgData (record : ByteRecord, resizeW : Int, resizeH : Int)
  : Array[Byte] = {
    if (resizeW == -1) {
      return record.data
    } else {
      val rawData = record.data
      val imgBuffer = ByteBuffer.wrap(rawData)
      val width = imgBuffer.getInt
      val height = imgBuffer.getInt
      val bufferedImage : BufferedImage
      = new BufferedImage(width, height, BufferedImage.TYPE_3BYTE_BGR)
      val outputImagePixelData = bufferedImage.getRaster.getDataBuffer
        .asInstanceOf[DataBufferByte].getData
      System.arraycopy(imgBuffer.array(), 8,
        outputImagePixelData, 0, outputImagePixelData.length)
      BGRImage.resizeImage(bufferedImage, resizeW, resizeH)
    }
  }
}

Source File: Lighting.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

import scala.collection.Iterator

object Lighting {
  def apply(): Lighting = {
    new Lighting()
  }
}


class Lighting extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  val alphastd = 0.1f
  val eigval = Tensor[Float](Storage(Array(0.2175f, 0.0188f, 0.0045f)), 1, Array(3))
  val eigvec = Tensor[Float](Storage(Array(-0.5675f, 0.7192f, 0.4009f,
    -0.5808f, -0.0045f, -0.8140f,
    -0.5836f, -0.6948f, 0.4203f)), 1, Array(3, 3))

  def lighting(input: Array[Float]): Unit = {
    if (alphastd != 0) {
      val alpha = Tensor[Float](3).apply1(_ => RNG.uniform(0, alphastd).toFloat)
      val rgb = eigvec.clone
        .cmul(alpha.view(1, 3).expand(Array(3, 3)))
        .cmul(eigval.view(1, 3).expand(Array(3, 3)))
        .sum(2).squeeze
      var i = 0
      while (i < input.length) {
        input(i) = input(i) + rgb.storage().array()(0)
        input(i + 1) = input(i + 1) + rgb.storage().array()(1)
        input(i + 2) = input(i + 2) + rgb.storage().array()(2)
        i += 3
      }
    }
  }

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      lighting(img.content)
      img
    })
  }
}

Source File: BGRImgToLocalSeqFile.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import java.nio.ByteBuffer
import java.nio.file.Path

import com.intel.analytics.bigdl.dataset.Transformer
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path => hadoopPath}
import org.apache.hadoop.io.{SequenceFile, Text}

import scala.collection.Iterator

object BGRImgToLocalSeqFile {
  def apply(blockSize: Int, baseFileName: Path, hasName: Boolean = false): BGRImgToLocalSeqFile = {
    new BGRImgToLocalSeqFile(blockSize, baseFileName, hasName)
  }
}


class BGRImgToLocalSeqFile(blockSize: Int, baseFileName: Path, hasName: Boolean = false) extends
  Transformer[(LabeledBGRImage, String), String] {
  private val conf: Configuration = new Configuration
  private var index = 0
  private val preBuffer: ByteBuffer = ByteBuffer.allocate(4 * 2)

  override def apply(prev: Iterator[(LabeledBGRImage, String)]): Iterator[String] = {
    new Iterator[String] {
      override def hasNext: Boolean = prev.hasNext

      override def next(): String = {
        val fileName = baseFileName + s"_$index.seq"
        val path = new hadoopPath(fileName)
        val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(path),
          SequenceFile.Writer.keyClass(classOf[Text]),
          SequenceFile.Writer.valueClass(classOf[Text]))
        var i = 0
        while (i < blockSize && prev.hasNext) {
          val (image, imageName) = prev.next()

          preBuffer.putInt(image.width())
          preBuffer.putInt(image.height())
          val imageByteData = image.convertToByte()
          val data: Array[Byte] = new Array[Byte](preBuffer.capacity + imageByteData.length)
          System.arraycopy(preBuffer.array, 0, data, 0, preBuffer.capacity)
          System.arraycopy(imageByteData, 0, data, preBuffer.capacity, imageByteData.length)
          preBuffer.clear
          val imageKey = if (hasName) s"${imageName}\n${image.label().toInt}"
            else s"${image.label().toInt}"
          writer.append(new Text(imageKey), new Text(data))
          i += 1
        }
        writer.close()
        index += 1
        fileName
      }
    }
  }
}

Source File: LocalImgReader.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import java.awt.color.ColorSpace

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object LocalImgReader {
  Class.forName("javax.imageio.ImageIO")
  Class.forName("java.awt.color.ICC_ColorSpace")
  // Class.forName("sun.java2d.cmm.lcms.LCMS")
  ColorSpace.getInstance(ColorSpace.CS_sRGB).toRGB(Array[Float](0, 0, 0))

  def apply(scaleTo: Int = BGRImage.NO_SCALE, normalize: Float = 255f)
  : Transformer[LocalLabeledImagePath, LabeledBGRImage]
  = new LocalScaleImgReader(scaleTo, normalize)

  def apply(resizeW: Int, resizeH: Int, normalize: Float)
  : Transformer[LocalLabeledImagePath, LabeledBGRImage]
  = new LocalResizeImgReader(resizeW, resizeH, normalize)
}


class LocalResizeImgReader private[dataset](resizeW: Int, resizeH: Int, normalize: Float)
  extends Transformer[LocalLabeledImagePath, LabeledBGRImage] {


  private val buffer = new LabeledBGRImage()

  override def apply(prev: Iterator[LocalLabeledImagePath]): Iterator[LabeledBGRImage] = {
    prev.map(data => {
      val imgData = BGRImage.readImage(data.path, resizeW, resizeH)
      val label = data.label
      buffer.copy(imgData, normalize).setLabel(label)
    })
  }
}

Source File: RowToByteRecords.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.example.imageclassification

import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer}
import org.apache.log4j.Logger
import org.apache.spark.sql.Row

import scala.collection.Iterator

object RowToByteRecords {
  val logger = Logger.getLogger(getClass)

  def apply(colName: String = "data"): RowToByteRecords = {
    new RowToByteRecords(colName)
  }
}


class RowToByteRecords(colName: String)
  extends Transformer[Row, ByteRecord] {

  override def apply(prev: Iterator[Row]): Iterator[ByteRecord] = {
    prev.map(
      img => {
        ByteRecord(img.getAs[Array[Byte]](colName), -1.0f)
      }
    )
  }
}

Source File: BGRImgCropper.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator


class BGRImgCropper(cropWidth: Int, cropHeight: Int, cropperMethod: CropperMethod = CropRandom)
  extends Transformer[LabeledBGRImage, LabeledBGRImage] {

  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  private val buffer = new LabeledBGRImage(cropWidth, cropHeight)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      val width = img.width()
      val height = img.height()
      val (startH, startW) = cropperMethod match {
        case CropRandom =>
          (math.ceil(RNG.uniform(1e-2, height - cropHeight)).toInt,
            math.ceil(RNG.uniform(1e-2, width - cropWidth)).toInt)
        case CropCenter =>
          ((height - cropHeight) / 2, (width - cropWidth) / 2)
      }
      val startIndex = (startW + startH * width) * 3
      val frameLength = cropWidth * cropHeight
      val source = img.content
      val target = buffer.content
      var i = 0
      while (i < frameLength) {
        target(i * 3 + 2) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 2)
        target(i * 3 + 1) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 1)
        target(i * 3) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3)
        i += 1
      }
      buffer.setLabel(img.label())
    })
  }
}

Source File: BytesToGreyImg.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer}

import scala.collection.Iterator

object BytesToGreyImg {
  def apply(row: Int, col: Int): BytesToGreyImg
  = new BytesToGreyImg(row, col)
}


class BytesToGreyImg(row: Int, col: Int)
  extends Transformer[ByteRecord, LabeledGreyImage] {
  private val buffer = new LabeledGreyImage(row, col)

  override def apply(prev: Iterator[ByteRecord]): Iterator[LabeledGreyImage] = {
    prev.map(rawData => {
      require(row * col == rawData.data.length)
      buffer.setLabel(rawData.label).copy(rawData.data, 255.0f)
    })
  }
}

Source File: GreyImgToBatch.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{Utils, MiniBatch, Transformer}
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}

import scala.collection.Iterator

object GreyImgToBatch {
  def apply(batchSize : Int) : GreyImgToBatch = {
    new GreyImgToBatch(batchSize)
  }
}


class GreyImgToBatch private[dataset](totalBatchSize: Int)
  extends Transformer[LabeledGreyImage, MiniBatch[Float]] {

  private def copyImage(img: GreyImage, storage: Array[Float], offset: Int): Unit = {
    val content = img.content
    val frameLength = img.width() * img.height()
    var j = 0
    while (j < frameLength) {
      storage(offset + j) = content(j)
      j += 1
    }
  }

  private val batchPerCore = Utils.getBatchSize(totalBatchSize)

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[MiniBatch[Float]] = {
    val batchSizePerCore = batchPerCore

    new Iterator[MiniBatch[Float]] {
      private val featureTensor: Tensor[Float] = Tensor[Float]()
      private val labelTensor: Tensor[Float] = Tensor[Float]()
      private var featureData: Array[Float] = null
      private var labelData: Array[Float] = null
      private val batchSize = batchSizePerCore
      private var width = 0
      private var height = 0

      override def hasNext: Boolean = prev.hasNext

      override def next(): MiniBatch[Float] = {
        if (prev.hasNext) {
          var i = 0
          while (i < batchSize && prev.hasNext) {
            val img = prev.next()
            if (featureData == null) {
              featureData = new Array[Float](batchSize * img.height() * img.width())
              labelData = new Array[Float](batchSize)
              height = img.height()
              width = img.width()
            }
            copyImage(img, featureData, i * img.width() * img.height())
            labelData(i) = img.label()
            i += 1
          }
          if (labelTensor.nElement() != i) {
            featureTensor.set(Storage[Float](featureData),
              storageOffset = 1, sizes = Array(i, height, width))
            labelTensor.set(Storage[Float](labelData),
              storageOffset = 1, sizes = Array(i))
          }
          MiniBatch(featureTensor, labelTensor)
        } else {
          null
        }
      }
    }
  }
}

Source File: BGRImgToImageVector.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import org.apache.log4j.Logger
import org.apache.spark.mllib.linalg.DenseVector

import scala.collection.Iterator

object BGRImgToImageVector {
  val logger = Logger.getLogger(getClass)

  def apply(): BGRImgToImageVector = {
    new BGRImgToImageVector()
  }
}


class BGRImgToImageVector()
  extends Transformer[LabeledBGRImage, DenseVector] {

  private var featureData: Array[Float] = null

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[DenseVector] = {
    prev.map(
      img => {
        if (null == featureData) {
          featureData = new Array[Float](3 * img.height() * img.width())
        }
        img.copyTo(featureData, 0, true)
        new DenseVector(featureData.map(_.toDouble))
      }
    )
  }
}

Source File: GreyImgNormalizer.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.DataSet
import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object GreyImgNormalizer {
  def apply(dataSet: DataSet[LabeledGreyImage], samples: Int = Int.MaxValue)
  : GreyImgNormalizer = {
    var sum: Double = 0
    var total: Int = 0
    dataSet.shuffle()
    var iter = dataSet.toLocal().data(train = false)
    var i = 0
    while (i < math.min(samples, dataSet.size())) {
      val img = iter.next()
      img.content.foreach(e => {
        sum += e
        total += 1
      })
      i += 1
    }

    val mean = sum / total

    sum = 0
    i = 0
    iter = dataSet.toLocal().data(train = false)
    while (i < math.min(samples, dataSet.size())) {
      val img = iter.next()
      img.content.foreach(e => {
        val diff = e - mean
        sum += diff * diff
      })
      i += 1
    }
    val std = math.sqrt(sum / total).toFloat
    new GreyImgNormalizer(mean, std)
  }

  def apply(mean : Double, std : Double): GreyImgNormalizer = {
    new GreyImgNormalizer(mean, std)
  }
}


class GreyImgNormalizer(mean : Double, std : Double)
  extends Transformer[LabeledGreyImage, LabeledGreyImage] {

  def getMean(): Double = mean

  def getStd(): Double = std

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[LabeledGreyImage] = {
    prev.map(img => {
      var i = 0
      val content = img.content
      while (i < content.length) {
        content(i) = ((content(i) - mean) / std).toFloat
        i += 1
      }
      img
    })
  }
}

Source File: HFlip.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.utils.RandomGenerator

import scala.collection.Iterator

object HFlip {
  def apply(threshold: Double = 0.0): HFlip = {
    new HFlip(threshold)
  }
}


class HFlip(threshold: Double) extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      if (RandomGenerator.RNG.uniform(0, 1) >= threshold) {
        img.hflip()
      } else {
        img
      }
    })
  }
}

Source File: LocalSeqFileToBytes.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.DataSet.SeqFileFolder
import com.intel.analytics.bigdl.dataset.{ByteRecord, LocalSeqFilePath, Transformer}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.SequenceFile.Reader
import org.apache.hadoop.io.{SequenceFile, Text}

import scala.collection.Iterator

object LocalSeqFileToBytes {
  def apply(): LocalSeqFileToBytes = new LocalSeqFileToBytes()
}


class LocalSeqFileToBytes extends Transformer[LocalSeqFilePath, ByteRecord] {

  import org.apache.hadoop.fs.{Path => hPath}


  @transient
  private var key: Text = null

  @transient
  private var value: Text = null

  @transient
  private var reader: SequenceFile.Reader = null

  @transient
  private var oneRecordBuffer: ByteRecord = null

  override def apply(prev: Iterator[LocalSeqFilePath]): Iterator[ByteRecord] = {
    new Iterator[ByteRecord] {
      override def next(): ByteRecord = {
        if (oneRecordBuffer != null) {
          val res = oneRecordBuffer
          oneRecordBuffer = null
          return res
        }

        if (key == null) {
          key = new Text()
        }
        if (value == null) {
          value = new Text
        }
        if (reader == null || !reader.next(key, value)) {
          if (reader != null) {
            reader.close()
          }

          reader = new SequenceFile.Reader(new Configuration,
            Reader.file(new hPath(prev.next().path.toAbsolutePath.toString)))
          reader.next(key, value)
        }

        ByteRecord(value.copyBytes(), SeqFileFolder.readLabel(key).toFloat)
      }

      override def hasNext: Boolean = {
        if (oneRecordBuffer != null) {
          true
        } else if (reader == null) {
          prev.hasNext
        } else {
          if (reader.next(key, value)) {
            oneRecordBuffer = ByteRecord(value.copyBytes(),
              SeqFileFolder.readLabel(key).toFloat)
            return true
          } else {
            prev.hasNext
          }
        }
      }
    }
  }
}

Source File: GreyImgCropper.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object GreyImgCropper {
  def apply(cropWidth: Int, cropHeight: Int) : GreyImgCropper = {
    new GreyImgCropper(cropWidth, cropHeight)
  }
}


class GreyImgCropper(cropWidth: Int, cropHeight: Int)
  extends Transformer[LabeledGreyImage, LabeledGreyImage] {

  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  private val buffer = new LabeledGreyImage(cropWidth, cropHeight)

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[LabeledGreyImage] = {
    prev.map(img => {
      val width = img.width()
      val height = img.height()
      val startW = RNG.uniform(0, width - cropWidth).toInt
      val startH = RNG.uniform(0, height - cropHeight).toInt
      val startIndex = startW + startH * width
      val frameLength = cropWidth * cropHeight
      val source = img.content
      val target = buffer.content
      var i = 0
      while (i < frameLength) {
        target(i) = source(startIndex + (i / cropWidth) * width +
          (i % cropWidth))
        i += 1
      }

      buffer.setLabel(img.label())
    })
  }
}

Source File: SentenceBiPadding.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.dataset.text.utils.SentenceToken

import scala.collection.Iterator



class SentenceBiPadding(
  start: Option[String] = None,
  end: Option[String] = None)
  extends Transformer[String, String] {

  val sentenceStart = start.getOrElse(SentenceToken.start)
  val sentenceEnd = end.getOrElse(SentenceToken.end)

  override def apply(prev: Iterator[String]): Iterator[String] = {
    prev.map(x => {
      val sentence = sentenceStart + " " + x + " " + sentenceEnd
      sentence
    })
  }
}

object SentenceBiPadding {
  def apply(start: Option[String] = None,
            end: Option[String] = None):
  SentenceBiPadding = new SentenceBiPadding(start, end)
}

Source File: SentenceSplitter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import java.io.FileInputStream
import java.net.{URI, URL}

import com.intel.analytics.bigdl.dataset.Transformer
import opennlp.tools.sentdetect.{SentenceDetector, SentenceDetectorME, SentenceModel}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.collection.Iterator


class SentenceSplitter(sentFile: Option[String] = None)
  extends Transformer[String, Array[String]] {

  var modelIn: FileInputStream = _
  var model: SentenceModel = _
  var sentenceDetector: SentenceDetector = _

  def this(sentFileURL: URL) {
    this(Some(sentFileURL.getPath))
  }

  def this(sentFile: String) {
    this(Some(sentFile))
  }

  def close(): Unit = {
    if (modelIn != null) {
      modelIn.close()
    }
  }

  override def apply(prev: Iterator[String]): Iterator[Array[String]] =
    prev.map(x => {
      if (!sentFile.isDefined) {
        x.split('.')
      } else {
        if (sentenceDetector == null) {
          val src: Path = new Path(sentFile.get)
          val fs = src.getFileSystem(new Configuration())
          val in = fs.open(src)

          model = new SentenceModel(in)
          sentenceDetector = new SentenceDetectorME(model)
        }
        sentenceDetector.sentDetect(x)
      }
    })
}

object SentenceSplitter {
  def apply(sentFile: Option[String] = None):
    SentenceSplitter = new SentenceSplitter(sentFile)
  def apply(sentFileURL: URL):
    SentenceSplitter = new SentenceSplitter(sentFileURL)
  def apply(sentFile: String):
  SentenceSplitter = new SentenceSplitter(sentFile)
}

Source File: TextToLabeledSentence.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.collection.Iterator
import scala.reflect.ClassTag

object TextToLabeledSentence {
  def apply[T: ClassTag](dictionary: Dictionary)
           (implicit ev: TensorNumeric[T])
  : TextToLabeledSentence[T] =
    new TextToLabeledSentence[T](dictionary)
  def apply[T: ClassTag](numSteps: Int)(implicit ev: TensorNumeric[T])
  : TextToSentenceWithSteps[T] = new TextToSentenceWithSteps[T](numSteps)
}


private[bigdl] class TextToSentenceWithSteps[T: ClassTag](numSteps: Int)
  (implicit ev: TensorNumeric[T])
  extends Transformer[Array[T], LabeledSentence[T]] {
  val xbuffer = new Array[T](numSteps)
  val ybuffer = new Array[T](numSteps)
  val buffer = new LabeledSentence[T]()

  override def apply(prev: Iterator[Array[T]]): Iterator[LabeledSentence[T]] = {
    prev.map(sentence => {
      require(sentence.length >= numSteps + 1,
        "input sentence length should be numSteps + 1, " +
          s"sentence.length = ${sentence.length}, numSteps = ${numSteps}")
      Array.copy(sentence, 0, xbuffer, 0, numSteps)
      Array.copy(sentence, 1, ybuffer, 0, numSteps)

      buffer.copy(xbuffer, ybuffer)
      buffer
    })
  }
}

Source File: LabeledSentenceToSample.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import com.intel.analytics.bigdl.dataset.{Sample, Transformer}

import scala.collection.Iterator
import java.util

import com.intel.analytics.bigdl.tensor.{DoubleType, FloatType, Tensor}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.reflect.ClassTag

object LabeledSentenceToSample {
  def apply[T: ClassTag]
  (vocabLength: Int,
   fixDataLength: Option[Int] = None,
   fixLabelLength: Option[Int] = None)
  (implicit ev: TensorNumeric[T])
  : LabeledSentenceToSample[T] =
    new LabeledSentenceToSample[T](
      vocabLength,
      fixDataLength,
      fixLabelLength,
      true)
  def apply[T: ClassTag]
  (oneHot: Boolean,
   fixDataLength: Option[Int],
   fixLabelLength: Option[Int])
  (implicit ev: TensorNumeric[T])
  : LabeledSentenceToSample[T] =
    new LabeledSentenceToSample[T](
      vocabLength = 0,
      fixDataLength,
      fixLabelLength,
      oneHot)
}




        val startTokenIndex = sentence.getData(0)
        val endTokenIndex = if (labelLength == 1) 0
          else ev.toType[Int](sentence.getLabel(sentence.labelLength - 1))

        var i = 0
        while (i < sentence.dataLength) {
          featureBuffer(i * vocabLength + ev.toType[Int](sentence.getData(i)))
            = ev.fromType[Float](1.0f)
          i += 1
        }
        while (i < dataLength) {
          featureBuffer(i * vocabLength + endTokenIndex) = ev.fromType[Float](1.0f)
          i += 1
        }

        i = 0
        while (i < sentence.labelLength) {
          labelBuffer(i) = ev.plus(sentence.label()(i), ev.fromType[Float](1.0f))
          i += 1
        }
        while (i < labelLength) {
          labelBuffer(i) = ev.plus(startTokenIndex, ev.fromType[Float](1.0f))
          i += 1
        }
      } else {
        feature.resize(dataLength).zero
        label.resize(labelLength).zero

        val featureBuffer = feature.storage().array()
        val labelBuffer = label.storage().array()

        Array.copy(sentence.data, 0, featureBuffer, 0, dataLength)
        Array.copy(sentence.label, 0, labelBuffer, 0, labelLength)
      }
      Sample[T](feature, label)
    })
  }
}

Source File: SentenceTokenizer.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import java.io.FileInputStream
import java.net.{URI, URL}

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator
import opennlp.tools.tokenize.{SimpleTokenizer, Tokenizer, TokenizerME, TokenizerModel}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}



class SentenceTokenizer(tokenFile: Option[String] = None)
  extends Transformer[String, Array[String]] {

  var modelIn: FileInputStream = _
  var model: TokenizerModel = _

  var tokenizer: Tokenizer = _

  def this(tokenFile: URL) {
    this(Some(tokenFile.getPath))
  }

  def close(): Unit = {
    if (modelIn != null) {
      modelIn.close()
    }
  }

  override def apply(prev: Iterator[String]): Iterator[Array[String]] =
    prev.map(x => {
      if (tokenizer == null) {
        if (!tokenFile.isDefined) {
          tokenizer = SimpleTokenizer.INSTANCE
        } else {
          val src: Path = new Path(tokenFile.get)
          val fs = src.getFileSystem(new Configuration())
          val in = fs.open(src)
          model = new TokenizerModel(in)
          tokenizer = new TokenizerME(model)
        }
      }
      val words = tokenizer.tokenize(x)
      words
    })
}

object SentenceTokenizer {
  def apply(tokenFile: Option[String] = None):
    SentenceTokenizer = new SentenceTokenizer(tokenFile)
  def apply(tokenFile: URL):
    SentenceTokenizer = new SentenceTokenizer(tokenFile)
}

scala.collection.Iterator Scala Examples