scala.collection.Iterator Scala Examples

The following examples show how to use scala.collection.Iterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: BGRImgToBatch.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{MiniBatch, Transformer, Utils}
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}

import scala.collection.Iterator

object BGRImgToBatch {
  def apply(batchSize: Int, toRGB: Boolean = true): BGRImgToBatch
    = new BGRImgToBatch(batchSize, toRGB)
}


class BGRImgToBatch(totalBatch: Int, toRGB: Boolean = true)
  extends Transformer[LabeledBGRImage, MiniBatch[Float]] {

  private val batchPerCore = Utils.getBatchSize(totalBatch)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[MiniBatch[Float]] = {
    val batchSizePerCore = batchPerCore

    new Iterator[MiniBatch[Float]] {
      private val featureTensor: Tensor[Float] = Tensor[Float]()
      private val labelTensor: Tensor[Float] = Tensor[Float]()
      private var featureData: Array[Float] = null
      private var labelData: Array[Float] = null
      private var width = 0
      private var height = 0
      private val batchSize = batchSizePerCore

      override def hasNext: Boolean = prev.hasNext

      override def next(): MiniBatch[Float] = {
        if (prev.hasNext) {
          var i = 0
          while (i < batchSize && prev.hasNext) {
            val img = prev.next()
            if (featureData == null) {
              featureData = new Array[Float](batchSize * 3 * img.height() * img.width())
              labelData = new Array[Float](batchSize)
              height = img.height()
              width = img.width()
            }
            img.copyTo(featureData, i * img.width() * img.height() * 3, toRGB)
            labelData(i) = img.label()
            i += 1
          }

          if (labelTensor.nElement() != i) {
            featureTensor.set(Storage[Float](featureData),
              storageOffset = 1, sizes = Array(i, 3, height, width))
            labelTensor.set(Storage[Float](labelData),
              storageOffset = 1, sizes = Array(i))
          }

          MiniBatch(featureTensor, labelTensor)
        } else {
          null
        }
      }
    }
  }
} 
Example 2
Source File: ParRange.scala    From scala-parallel-collections   with Apache License 2.0 5 votes vote down vote up
package scala
package collection.parallel.immutable

import scala.collection.immutable.Range
import scala.collection.parallel.Combiner
import scala.collection.parallel.SeqSplitter
import scala.collection.Iterator



    override def map2combiner[S, That](f: Int => S, cb: Combiner[S, That]): Combiner[S, That] = {
      while (hasNext) {
        cb += f(next)
      }
      cb
    }
  }

  override def toString = s"Par$range"
}

object ParRange {
  def apply(start: Int, end: Int, step: Int, inclusive: Boolean) = new ParRange(
    if (inclusive) Range.inclusive(start, end, step)
    else Range(start, end, step)
  )
} 
Example 3
Source File: BufferedSource.scala    From perf_tester   with Apache License 2.0 5 votes vote down vote up
package scala.io

import java.io.{ InputStream, BufferedReader, InputStreamReader, PushbackReader }
import Source.DefaultBufSize
import scala.collection.{ Iterator, AbstractIterator }


  override def mkString = {
    // Speed up slurping of whole data set in the simplest cases.
    val allReader = decachedReader
    val sb = new StringBuilder
    val buf = new Array[Char](bufferSize)
    var n = 0
    while (n != -1) {
      n = allReader.read(buf)
      if (n>0) sb.appendAll(buf, 0, n)
    }
    sb.result
  }
} 
Example 4
Source File: SystemProperties.scala    From perf_tester   with Apache License 2.0 5 votes vote down vote up
package scala
package sys

import scala.collection.{ mutable, Iterator }
import scala.collection.JavaConverters._
import java.security.AccessControlException
import scala.language.implicitConversions



  def exclusively[T](body: => T) = this synchronized body

  implicit def systemPropertiesToCompanion(p: SystemProperties): SystemProperties.type = this

  private final val HeadlessKey            = "java.awt.headless"
  private final val PreferIPv4StackKey     = "java.net.preferIPv4Stack"
  private final val PreferIPv6AddressesKey = "java.net.preferIPv6Addresses"
  private final val NoTraceSuppressionKey  = "scala.control.noTraceSuppression"

  def help(key: String): String = key match {
    case HeadlessKey            => "system should not utilize a display device"
    case PreferIPv4StackKey     => "system should prefer IPv4 sockets"
    case PreferIPv6AddressesKey => "system should prefer IPv6 addresses"
    case NoTraceSuppressionKey  => "scala should not suppress any stack trace creation"
    case _                      => ""
  }

  lazy val headless: BooleanProp            = BooleanProp.keyExists(HeadlessKey)
  lazy val preferIPv4Stack: BooleanProp     = BooleanProp.keyExists(PreferIPv4StackKey)
  lazy val preferIPv6Addresses: BooleanProp = BooleanProp.keyExists(PreferIPv6AddressesKey)
  lazy val noTraceSuppression: BooleanProp  = BooleanProp.valueIsTrue(NoTraceSuppressionKey)
} 
Example 5
Source File: RandomAlterAspect.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.transform.vision.image.augmentation

import breeze.numerics.sqrt
import org.opencv.core.{CvType, Mat, Rect}
import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.dataset.image.LabeledBGRImage
import com.intel.analytics.bigdl.opencv.OpenCV
import org.opencv.imgproc.Imgproc

import scala.collection.Iterator
import com.intel.analytics.bigdl.opencv
import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature}
import com.intel.analytics.bigdl.transform.vision.image.opencv.OpenCVMat
import org.apache.spark.ml
import org.apache.spark.ml.feature
import org.opencv.core.Size

object RandomAlterAspect {
  def apply(min_area_ratio: Float = 0.08f,
            max_area_ratio: Int = 1,
            min_aspect_ratio_change: Float = 0.75f,
            interp_mode: String = "CUBIC",
            cropLength: Int = 224): RandomAlterAspect = {
    OpenCV.isOpenCVLoaded
    new RandomAlterAspect(min_area_ratio, max_area_ratio,
      min_aspect_ratio_change, interp_mode, cropLength)
  }
}


class RandomAlterAspect(min_area_ratio: Float = 0.08f,
                           max_area_ratio: Int = 1,
                           min_aspect_ratio_change: Float = 0.75f,
                           interp_mode: String = "CUBIC",
                           cropLength: Int = 224)
  extends FeatureTransformer {

  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  @inline
  private def randRatio(min: Float, max: Float): Float = {
    val res = (RNG.uniform(1e-2, (max - min) * 1000 + 1) + min * 1000) / 1000
    res.toFloat
  }

  override protected def transformMat(feature: ImageFeature): Unit = {
    val h = feature.opencvMat().size().height
    val w = feature.opencvMat().size().width
    val area = h * w

    require(min_area_ratio <= max_area_ratio, "min_area_ratio should <= max_area_ratio")

    var attempt = 0
    while (attempt < 10) {
      val area_ratio = randRatio(min_area_ratio, max_area_ratio)
      val aspect_ratio_change = randRatio(min_aspect_ratio_change, 1 / min_aspect_ratio_change)
      val new_area = area_ratio * area
      var new_h = (sqrt(new_area) * aspect_ratio_change).toInt
      var new_w = (sqrt(new_area) / aspect_ratio_change).toInt
      if (randRatio(0, 1) < 0.5) {
        val tmp = new_h
        new_h = new_w
        new_w = tmp
      }
      if (new_h <= h && new_w <= w) {
        val y = RNG.uniform(1e-2, h - new_h + 1).toInt
        val x = RNG.uniform(1e-2, w - new_w + 1).toInt
        Crop.transform(feature.opencvMat(),
          feature.opencvMat(), x, y, x + new_w, y + new_h, false, false)

        Imgproc.resize(feature.opencvMat(), feature.opencvMat(),
            new Size(cropLength, cropLength), 0, 0, 2)
        attempt = 100
      }
      attempt += 1
    }
    if (attempt < 20) {
      val (new_h, new_w) = resizeImagePerShorterSize(feature.opencvMat(), cropLength)
      Imgproc.resize(feature.opencvMat(),
        feature.opencvMat(), new Size(cropLength, cropLength), 0, 0, 2)
    }
  }

  private def resizeImagePerShorterSize(img: Mat, shorter_size: Int) : (Int, Int) = {
    val h = img.size().height
    val w = img.size().width
    var new_h = shorter_size
    var new_w = shorter_size

    if (h < w) {
      new_w = (w / h * shorter_size).toInt
    } else {
      new_h = (h / w * shorter_size).toInt
    }
    (new_h, new_w)
  }
} 
Example 6
Source File: ChannelScaledNormalizer.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.transform.vision.image.augmentation

import com.intel.analytics.bigdl.dataset.image.LabeledBGRImage
import com.intel.analytics.bigdl.dataset.{LocalDataSet, Transformer}
import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature}
import com.intel.analytics.bigdl.transform.vision.image.opencv.OpenCVMat
import org.apache.log4j.Logger

import scala.collection.Iterator

object ChannelScaledNormalizer {

  def apply(meanR: Int, meanG: Int, meanB: Int, scale: Double): ChannelScaledNormalizer = {
    new ChannelScaledNormalizer(meanR, meanG, meanB, scale)
  }
}



class ChannelScaledNormalizer(meanR: Int, meanG: Int, meanB: Int, scale: Double)
  extends FeatureTransformer {

  override protected def transformMat(feature: ImageFeature): Unit = {
    val mat = feature.opencvMat()
    val toFloats = OpenCVMat.toFloatPixels(mat)
    val content = toFloats._1
    require(content.length % 3 == 0, "Content should be multiple of 3 channels")
    var i = 0
    val frameLength = content.length / 3
    val height = toFloats._2
    val width = toFloats._3
    val bufferContent = new Array[Float](width * height * 3)

    val channels = 3
    val mean = Array(meanR, meanG, meanB)
    var c = 0
    while (c < channels) {
      i = 0
      while (i < frameLength) {
        val data_index = c * frameLength + i
        bufferContent(data_index) = ((content(data_index) - mean(c)) * scale).toFloat
        i += 1
      }
      c += 1
    }
    if (mat != null) {
      mat.release()
    }
    val newMat = OpenCVMat.fromFloats(bufferContent, height, width)
    feature(ImageFeature.mat) = newMat
  }

} 
Example 7
Source File: BGRImgToSample.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{Sample, Transformer}
import com.intel.analytics.bigdl.tensor.Tensor

import scala.collection.Iterator

object BGRImgToSample {
  def apply(toRGB: Boolean = true): BGRImgToSample = {
    new BGRImgToSample(toRGB)
  }
}


class BGRImgToSample(toRGB: Boolean = true) extends Transformer[LabeledBGRImage, Sample[Float]] {

  private val featureBuffer = Tensor[Float]()
  private val labelBuffer = Tensor[Float](1)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[Sample[Float]] = {
    prev.map(img => {
      labelBuffer.storage.array()(0) = img.label()
      if (featureBuffer.nElement() != 3 * img.height() * img.width()) {
        featureBuffer.resize(3, img.height(), img.width())
      }

      img.copyTo(featureBuffer.storage().array(), 0, toRGB)
      Sample(featureBuffer, labelBuffer)
    })
  }
} 
Example 8
Source File: BGRImgRdmCropper.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object BGRImgRdmCropper {
  def apply(cropWidth: Int, cropHeight: Int, padding: Int): BGRImgRdmCropper =
    new BGRImgRdmCropper(cropHeight, cropWidth, padding)
}


class BGRImgRdmCropper(cropHeight: Int, cropWidth: Int, padding: Int)
  extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  private val buffer = new LabeledBGRImage(cropWidth, cropHeight)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      val curImg = padding > 0 match {
        case true =>
          val widthTmp = img.width()
          val heightTmp = img.height()
          val sourceTmp = img.content
          val padWidth = widthTmp + 2 * padding
          val padHeight = heightTmp + 2 * padding
          val temp = new LabeledBGRImage(padWidth, padHeight)
          val tempBuffer = temp.content
          val startIndex = (padding + padding * padWidth) * 3
          val frameLength = widthTmp * heightTmp
          var i = 0
          while (i < frameLength) {
            tempBuffer(startIndex +
              ((i / widthTmp) * padWidth + (i % widthTmp)) * 3 + 2) = sourceTmp(i * 3 + 2)
            tempBuffer(startIndex +
              ((i / widthTmp) * padWidth + (i % widthTmp)) * 3 + 1) = sourceTmp(i * 3 + 1)
            tempBuffer(startIndex +
              ((i / widthTmp) * padWidth + (i % widthTmp)) * 3) = sourceTmp(i * 3)
            i += 1
          }
          temp.setLabel(img.label())
          temp
        case _ => img
      }

      val width = curImg.width()
      val height = curImg.height()
      val source = curImg.content

      val startW = RNG.uniform(0, width - cropWidth).toInt
      val startH = RNG.uniform(0, height - cropHeight).toInt
      val startIndex = (startW + startH * width) * 3
      val frameLength = cropWidth * cropHeight

      val target = buffer.content
      var i = 0
      while (i < frameLength) {
        target(i * 3 + 2) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 2)
        target(i * 3 + 1) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 1)
        target(i * 3) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3)
        i += 1
      }
      buffer.setLabel(curImg.label())
    })
  }
} 
Example 9
Source File: GreyImgToSample.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{Sample, Transformer}
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}

import scala.collection.Iterator

object GreyImgToSample {
  def apply(): GreyImgToSample = {
    new GreyImgToSample()
  }
}


class GreyImgToSample() extends Transformer[LabeledGreyImage, Sample[Float]] {

  private val featureBuffer = Tensor[Float]()
  private val labelBuffer = Tensor[Float](1)
  private val featureSize = new Array[Int](2)

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[Sample[Float]] = {
    prev.map(img => {
      labelBuffer.storage.array()(0) = img.label()
      featureSize(0) = img.height()
      featureSize(1) = img.width()
      featureBuffer.set(Storage(img.content), sizes = featureSize)

      Sample(featureBuffer, labelBuffer)
    })
  }
} 
Example 10
Source File: ColorJitter.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator
import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

import scala.collection.Iterator
import scala.util.Random

object ColorJitter {
  def apply(): ColorJitter = {
    new ColorJitter()
  }
}


class ColorJitter extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  // TODO: make the bcs parameter configurable
  private val bcsParameters = Map("brightness" -> 0.4f, "contrast" -> 0.4f, "saturation" -> 0.4f)
  private var gs: Array[Float] = null

  private def grayScale(dst: Array[Float], img: Array[Float]): Array[Float] = {
    var i = 0
    while (i < img.length) {
      dst(i) = img(i)*0.299f + img(i + 1)*0.587f + img(i + 2)*0.114f
      dst(i + 1) = dst(i)
      dst(i + 2) = dst(i)
      i += 3
    }
    dst
  }

  private def blend(img1: Array[Float], img2: Array[Float], alpha: Float): Array[Float] = {
    var i = 0
    while (i < img1.length) {
      img1(i) = img1(i) * alpha + (1 - alpha) * img2(i)
      i += 1
    }
    img1
  }

  private def saturation(variance: Float)(input: Array[Float]): Array[Float] = {
    if (gs == null || gs.length < input.length) gs = new Array[Float](input.length)
    grayScale(gs, input)
    val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat
    blend(input, gs, alpha)
    input
  }

  private def brightness(variance: Float)(input: Array[Float]): Array[Float] = {
    if (gs == null || gs.length < input.length) gs = new Array[Float](input.length)
    java.util.Arrays.fill(gs, 0, gs.length, 0.0f)
     val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat
    blend(input, gs, alpha)
    input
  }

  private def contrast(variance: Float)(input: Array[Float]): Array[Float] = {
    if (gs == null || gs.length < input.length) gs = new Array[Float](input.length)
    grayScale(gs, input)
    val mean = gs.sum / gs.length
    java.util.Arrays.fill(gs, 0, gs.length, mean)
    val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat
    blend(input, gs, alpha)
    input
  }

  private val ts = Map(
    1 -> {
      brightness(bcsParameters.get("brightness").get)(_)},
    2 -> {contrast(bcsParameters.get("contrast").get)(_)},
    3 -> {saturation(bcsParameters.get("saturation").get)(_)}
  )

  private def randomOrder(input: Array[Float]): Unit = {
    val order = Tensor.randperm[Float](3)
    var i = 1
    while (i <= order.size(1)) {
      val idx = order(i).value().toInt
      ts(idx)(input)
      i += 1
    }
  }

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      val content = img.content
      require(content.length % 3 == 0)
      randomOrder(content)
      img
    })
  }
} 
Example 11
Source File: BytesToBGRImg.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import java.awt.Color
import java.awt.image.{BufferedImage, DataBufferByte}
import java.nio.ByteBuffer

import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer}

import scala.collection.Iterator

object BytesToBGRImg {
  def apply(normalize: Float = 255f, resizeW : Int = -1, resizeH : Int = -1): BytesToBGRImg =
    new BytesToBGRImg(normalize, resizeW, resizeH)
}


class BytesToBGRImg(normalize: Float, resizeW : Int = -1, resizeH : Int = -1)
  extends Transformer[ByteRecord, LabeledBGRImage] {

  private val buffer = new LabeledBGRImage()

  override def apply(prev: Iterator[ByteRecord]): Iterator[LabeledBGRImage] = {
    prev.map(rawData => {
      buffer.copy(getImgData(rawData, resizeW, resizeH), normalize).setLabel(rawData.label)
    })
  }

  private def getImgData (record : ByteRecord, resizeW : Int, resizeH : Int)
  : Array[Byte] = {
    if (resizeW == -1) {
      return record.data
    } else {
      val rawData = record.data
      val imgBuffer = ByteBuffer.wrap(rawData)
      val width = imgBuffer.getInt
      val height = imgBuffer.getInt
      val bufferedImage : BufferedImage
      = new BufferedImage(width, height, BufferedImage.TYPE_3BYTE_BGR)
      val outputImagePixelData = bufferedImage.getRaster.getDataBuffer
        .asInstanceOf[DataBufferByte].getData
      System.arraycopy(imgBuffer.array(), 8,
        outputImagePixelData, 0, outputImagePixelData.length)
      BGRImage.resizeImage(bufferedImage, resizeW, resizeH)
    }
  }
} 
Example 12
Source File: Lighting.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

import scala.collection.Iterator

object Lighting {
  def apply(): Lighting = {
    new Lighting()
  }
}


class Lighting extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  val alphastd = 0.1f
  val eigval = Tensor[Float](Storage(Array(0.2175f, 0.0188f, 0.0045f)), 1, Array(3))
  val eigvec = Tensor[Float](Storage(Array(-0.5675f, 0.7192f, 0.4009f,
    -0.5808f, -0.0045f, -0.8140f,
    -0.5836f, -0.6948f, 0.4203f)), 1, Array(3, 3))

  def lighting(input: Array[Float]): Unit = {
    if (alphastd != 0) {
      val alpha = Tensor[Float](3).apply1(_ => RNG.uniform(0, alphastd).toFloat)
      val rgb = eigvec.clone
        .cmul(alpha.view(1, 3).expand(Array(3, 3)))
        .cmul(eigval.view(1, 3).expand(Array(3, 3)))
        .sum(2).squeeze
      var i = 0
      while (i < input.length) {
        input(i) = input(i) + rgb.storage().array()(0)
        input(i + 1) = input(i + 1) + rgb.storage().array()(1)
        input(i + 2) = input(i + 2) + rgb.storage().array()(2)
        i += 3
      }
    }
  }

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      lighting(img.content)
      img
    })
  }
} 
Example 13
Source File: BGRImgToLocalSeqFile.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import java.nio.ByteBuffer
import java.nio.file.Path

import com.intel.analytics.bigdl.dataset.Transformer
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path => hadoopPath}
import org.apache.hadoop.io.{SequenceFile, Text}

import scala.collection.Iterator

object BGRImgToLocalSeqFile {
  def apply(blockSize: Int, baseFileName: Path, hasName: Boolean = false): BGRImgToLocalSeqFile = {
    new BGRImgToLocalSeqFile(blockSize, baseFileName, hasName)
  }
}


class BGRImgToLocalSeqFile(blockSize: Int, baseFileName: Path, hasName: Boolean = false) extends
  Transformer[(LabeledBGRImage, String), String] {
  private val conf: Configuration = new Configuration
  private var index = 0
  private val preBuffer: ByteBuffer = ByteBuffer.allocate(4 * 2)

  override def apply(prev: Iterator[(LabeledBGRImage, String)]): Iterator[String] = {
    new Iterator[String] {
      override def hasNext: Boolean = prev.hasNext

      override def next(): String = {
        val fileName = baseFileName + s"_$index.seq"
        val path = new hadoopPath(fileName)
        val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(path),
          SequenceFile.Writer.keyClass(classOf[Text]),
          SequenceFile.Writer.valueClass(classOf[Text]))
        var i = 0
        while (i < blockSize && prev.hasNext) {
          val (image, imageName) = prev.next()

          preBuffer.putInt(image.width())
          preBuffer.putInt(image.height())
          val imageByteData = image.convertToByte()
          val data: Array[Byte] = new Array[Byte](preBuffer.capacity + imageByteData.length)
          System.arraycopy(preBuffer.array, 0, data, 0, preBuffer.capacity)
          System.arraycopy(imageByteData, 0, data, preBuffer.capacity, imageByteData.length)
          preBuffer.clear
          val imageKey = if (hasName) s"${imageName}\n${image.label().toInt}"
            else s"${image.label().toInt}"
          writer.append(new Text(imageKey), new Text(data))
          i += 1
        }
        writer.close()
        index += 1
        fileName
      }
    }
  }
} 
Example 14
Source File: LocalImgReader.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import java.awt.color.ColorSpace

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object LocalImgReader {
  Class.forName("javax.imageio.ImageIO")
  Class.forName("java.awt.color.ICC_ColorSpace")
  // Class.forName("sun.java2d.cmm.lcms.LCMS")
  ColorSpace.getInstance(ColorSpace.CS_sRGB).toRGB(Array[Float](0, 0, 0))

  def apply(scaleTo: Int = BGRImage.NO_SCALE, normalize: Float = 255f)
  : Transformer[LocalLabeledImagePath, LabeledBGRImage]
  = new LocalScaleImgReader(scaleTo, normalize)

  def apply(resizeW: Int, resizeH: Int, normalize: Float)
  : Transformer[LocalLabeledImagePath, LabeledBGRImage]
  = new LocalResizeImgReader(resizeW, resizeH, normalize)
}


class LocalResizeImgReader private[dataset](resizeW: Int, resizeH: Int, normalize: Float)
  extends Transformer[LocalLabeledImagePath, LabeledBGRImage] {


  private val buffer = new LabeledBGRImage()

  override def apply(prev: Iterator[LocalLabeledImagePath]): Iterator[LabeledBGRImage] = {
    prev.map(data => {
      val imgData = BGRImage.readImage(data.path, resizeW, resizeH)
      val label = data.label
      buffer.copy(imgData, normalize).setLabel(label)
    })
  }
} 
Example 15
Source File: RowToByteRecords.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.example.imageclassification

import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer}
import org.apache.log4j.Logger
import org.apache.spark.sql.Row

import scala.collection.Iterator

object RowToByteRecords {
  val logger = Logger.getLogger(getClass)

  def apply(colName: String = "data"): RowToByteRecords = {
    new RowToByteRecords(colName)
  }
}


class RowToByteRecords(colName: String)
  extends Transformer[Row, ByteRecord] {

  override def apply(prev: Iterator[Row]): Iterator[ByteRecord] = {
    prev.map(
      img => {
        ByteRecord(img.getAs[Array[Byte]](colName), -1.0f)
      }
    )
  }
} 
Example 16
Source File: BGRImgCropper.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator


class BGRImgCropper(cropWidth: Int, cropHeight: Int, cropperMethod: CropperMethod = CropRandom)
  extends Transformer[LabeledBGRImage, LabeledBGRImage] {

  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  private val buffer = new LabeledBGRImage(cropWidth, cropHeight)

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      val width = img.width()
      val height = img.height()
      val (startH, startW) = cropperMethod match {
        case CropRandom =>
          (math.ceil(RNG.uniform(1e-2, height - cropHeight)).toInt,
            math.ceil(RNG.uniform(1e-2, width - cropWidth)).toInt)
        case CropCenter =>
          ((height - cropHeight) / 2, (width - cropWidth) / 2)
      }
      val startIndex = (startW + startH * width) * 3
      val frameLength = cropWidth * cropHeight
      val source = img.content
      val target = buffer.content
      var i = 0
      while (i < frameLength) {
        target(i * 3 + 2) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 2)
        target(i * 3 + 1) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 1)
        target(i * 3) =
          source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3)
        i += 1
      }
      buffer.setLabel(img.label())
    })
  }
} 
Example 17
Source File: BytesToGreyImg.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer}

import scala.collection.Iterator

object BytesToGreyImg {
  def apply(row: Int, col: Int): BytesToGreyImg
  = new BytesToGreyImg(row, col)
}


class BytesToGreyImg(row: Int, col: Int)
  extends Transformer[ByteRecord, LabeledGreyImage] {
  private val buffer = new LabeledGreyImage(row, col)

  override def apply(prev: Iterator[ByteRecord]): Iterator[LabeledGreyImage] = {
    prev.map(rawData => {
      require(row * col == rawData.data.length)
      buffer.setLabel(rawData.label).copy(rawData.data, 255.0f)
    })
  }
} 
Example 18
Source File: GreyImgToBatch.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.{Utils, MiniBatch, Transformer}
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}

import scala.collection.Iterator

object GreyImgToBatch {
  def apply(batchSize : Int) : GreyImgToBatch = {
    new GreyImgToBatch(batchSize)
  }
}


class GreyImgToBatch private[dataset](totalBatchSize: Int)
  extends Transformer[LabeledGreyImage, MiniBatch[Float]] {

  private def copyImage(img: GreyImage, storage: Array[Float], offset: Int): Unit = {
    val content = img.content
    val frameLength = img.width() * img.height()
    var j = 0
    while (j < frameLength) {
      storage(offset + j) = content(j)
      j += 1
    }
  }

  private val batchPerCore = Utils.getBatchSize(totalBatchSize)

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[MiniBatch[Float]] = {
    val batchSizePerCore = batchPerCore

    new Iterator[MiniBatch[Float]] {
      private val featureTensor: Tensor[Float] = Tensor[Float]()
      private val labelTensor: Tensor[Float] = Tensor[Float]()
      private var featureData: Array[Float] = null
      private var labelData: Array[Float] = null
      private val batchSize = batchSizePerCore
      private var width = 0
      private var height = 0

      override def hasNext: Boolean = prev.hasNext

      override def next(): MiniBatch[Float] = {
        if (prev.hasNext) {
          var i = 0
          while (i < batchSize && prev.hasNext) {
            val img = prev.next()
            if (featureData == null) {
              featureData = new Array[Float](batchSize * img.height() * img.width())
              labelData = new Array[Float](batchSize)
              height = img.height()
              width = img.width()
            }
            copyImage(img, featureData, i * img.width() * img.height())
            labelData(i) = img.label()
            i += 1
          }
          if (labelTensor.nElement() != i) {
            featureTensor.set(Storage[Float](featureData),
              storageOffset = 1, sizes = Array(i, height, width))
            labelTensor.set(Storage[Float](labelData),
              storageOffset = 1, sizes = Array(i))
          }
          MiniBatch(featureTensor, labelTensor)
        } else {
          null
        }
      }
    }
  }
} 
Example 19
Source File: BGRImgToImageVector.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import org.apache.log4j.Logger
import org.apache.spark.mllib.linalg.DenseVector

import scala.collection.Iterator

object BGRImgToImageVector {
  val logger = Logger.getLogger(getClass)

  def apply(): BGRImgToImageVector = {
    new BGRImgToImageVector()
  }
}


class BGRImgToImageVector()
  extends Transformer[LabeledBGRImage, DenseVector] {

  private var featureData: Array[Float] = null

  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[DenseVector] = {
    prev.map(
      img => {
        if (null == featureData) {
          featureData = new Array[Float](3 * img.height() * img.width())
        }
        img.copyTo(featureData, 0, true)
        new DenseVector(featureData.map(_.toDouble))
      }
    )
  }
} 
Example 20
Source File: GreyImgNormalizer.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.DataSet
import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object GreyImgNormalizer {
  def apply(dataSet: DataSet[LabeledGreyImage], samples: Int = Int.MaxValue)
  : GreyImgNormalizer = {
    var sum: Double = 0
    var total: Int = 0
    dataSet.shuffle()
    var iter = dataSet.toLocal().data(train = false)
    var i = 0
    while (i < math.min(samples, dataSet.size())) {
      val img = iter.next()
      img.content.foreach(e => {
        sum += e
        total += 1
      })
      i += 1
    }

    val mean = sum / total

    sum = 0
    i = 0
    iter = dataSet.toLocal().data(train = false)
    while (i < math.min(samples, dataSet.size())) {
      val img = iter.next()
      img.content.foreach(e => {
        val diff = e - mean
        sum += diff * diff
      })
      i += 1
    }
    val std = math.sqrt(sum / total).toFloat
    new GreyImgNormalizer(mean, std)
  }

  def apply(mean : Double, std : Double): GreyImgNormalizer = {
    new GreyImgNormalizer(mean, std)
  }
}


class GreyImgNormalizer(mean : Double, std : Double)
  extends Transformer[LabeledGreyImage, LabeledGreyImage] {

  def getMean(): Double = mean

  def getStd(): Double = std

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[LabeledGreyImage] = {
    prev.map(img => {
      var i = 0
      val content = img.content
      while (i < content.length) {
        content(i) = ((content(i) - mean) / std).toFloat
        i += 1
      }
      img
    })
  }
} 
Example 21
Source File: HFlip.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.utils.RandomGenerator

import scala.collection.Iterator

object HFlip {
  def apply(threshold: Double = 0.0): HFlip = {
    new HFlip(threshold)
  }
}


class HFlip(threshold: Double) extends Transformer[LabeledBGRImage, LabeledBGRImage] {
  override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = {
    prev.map(img => {
      if (RandomGenerator.RNG.uniform(0, 1) >= threshold) {
        img.hflip()
      } else {
        img
      }
    })
  }
} 
Example 22
Source File: LocalSeqFileToBytes.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.DataSet.SeqFileFolder
import com.intel.analytics.bigdl.dataset.{ByteRecord, LocalSeqFilePath, Transformer}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.SequenceFile.Reader
import org.apache.hadoop.io.{SequenceFile, Text}

import scala.collection.Iterator

object LocalSeqFileToBytes {
  def apply(): LocalSeqFileToBytes = new LocalSeqFileToBytes()
}


class LocalSeqFileToBytes extends Transformer[LocalSeqFilePath, ByteRecord] {

  import org.apache.hadoop.fs.{Path => hPath}


  @transient
  private var key: Text = null

  @transient
  private var value: Text = null

  @transient
  private var reader: SequenceFile.Reader = null

  @transient
  private var oneRecordBuffer: ByteRecord = null

  override def apply(prev: Iterator[LocalSeqFilePath]): Iterator[ByteRecord] = {
    new Iterator[ByteRecord] {
      override def next(): ByteRecord = {
        if (oneRecordBuffer != null) {
          val res = oneRecordBuffer
          oneRecordBuffer = null
          return res
        }

        if (key == null) {
          key = new Text()
        }
        if (value == null) {
          value = new Text
        }
        if (reader == null || !reader.next(key, value)) {
          if (reader != null) {
            reader.close()
          }

          reader = new SequenceFile.Reader(new Configuration,
            Reader.file(new hPath(prev.next().path.toAbsolutePath.toString)))
          reader.next(key, value)
        }

        ByteRecord(value.copyBytes(), SeqFileFolder.readLabel(key).toFloat)
      }

      override def hasNext: Boolean = {
        if (oneRecordBuffer != null) {
          true
        } else if (reader == null) {
          prev.hasNext
        } else {
          if (reader.next(key, value)) {
            oneRecordBuffer = ByteRecord(value.copyBytes(),
              SeqFileFolder.readLabel(key).toFloat)
            return true
          } else {
            prev.hasNext
          }
        }
      }
    }
  }
} 
Example 23
Source File: GreyImgCropper.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.image

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator

object GreyImgCropper {
  def apply(cropWidth: Int, cropHeight: Int) : GreyImgCropper = {
    new GreyImgCropper(cropWidth, cropHeight)
  }
}


class GreyImgCropper(cropWidth: Int, cropHeight: Int)
  extends Transformer[LabeledGreyImage, LabeledGreyImage] {

  import com.intel.analytics.bigdl.utils.RandomGenerator.RNG

  private val buffer = new LabeledGreyImage(cropWidth, cropHeight)

  override def apply(prev: Iterator[LabeledGreyImage]): Iterator[LabeledGreyImage] = {
    prev.map(img => {
      val width = img.width()
      val height = img.height()
      val startW = RNG.uniform(0, width - cropWidth).toInt
      val startH = RNG.uniform(0, height - cropHeight).toInt
      val startIndex = startW + startH * width
      val frameLength = cropWidth * cropHeight
      val source = img.content
      val target = buffer.content
      var i = 0
      while (i < frameLength) {
        target(i) = source(startIndex + (i / cropWidth) * width +
          (i % cropWidth))
        i += 1
      }

      buffer.setLabel(img.label())
    })
  }
} 
Example 24
Source File: SentenceBiPadding.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.text

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.dataset.text.utils.SentenceToken

import scala.collection.Iterator



class SentenceBiPadding(
  start: Option[String] = None,
  end: Option[String] = None)
  extends Transformer[String, String] {

  val sentenceStart = start.getOrElse(SentenceToken.start)
  val sentenceEnd = end.getOrElse(SentenceToken.end)

  override def apply(prev: Iterator[String]): Iterator[String] = {
    prev.map(x => {
      val sentence = sentenceStart + " " + x + " " + sentenceEnd
      sentence
    })
  }
}

object SentenceBiPadding {
  def apply(start: Option[String] = None,
            end: Option[String] = None):
  SentenceBiPadding = new SentenceBiPadding(start, end)
} 
Example 25
Source File: SentenceSplitter.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.text

import java.io.FileInputStream
import java.net.{URI, URL}

import com.intel.analytics.bigdl.dataset.Transformer
import opennlp.tools.sentdetect.{SentenceDetector, SentenceDetectorME, SentenceModel}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.collection.Iterator


class SentenceSplitter(sentFile: Option[String] = None)
  extends Transformer[String, Array[String]] {

  var modelIn: FileInputStream = _
  var model: SentenceModel = _
  var sentenceDetector: SentenceDetector = _

  def this(sentFileURL: URL) {
    this(Some(sentFileURL.getPath))
  }

  def this(sentFile: String) {
    this(Some(sentFile))
  }

  def close(): Unit = {
    if (modelIn != null) {
      modelIn.close()
    }
  }

  override def apply(prev: Iterator[String]): Iterator[Array[String]] =
    prev.map(x => {
      if (!sentFile.isDefined) {
        x.split('.')
      } else {
        if (sentenceDetector == null) {
          val src: Path = new Path(sentFile.get)
          val fs = src.getFileSystem(new Configuration())
          val in = fs.open(src)

          model = new SentenceModel(in)
          sentenceDetector = new SentenceDetectorME(model)
        }
        sentenceDetector.sentDetect(x)
      }
    })
}

object SentenceSplitter {
  def apply(sentFile: Option[String] = None):
    SentenceSplitter = new SentenceSplitter(sentFile)
  def apply(sentFileURL: URL):
    SentenceSplitter = new SentenceSplitter(sentFileURL)
  def apply(sentFile: String):
  SentenceSplitter = new SentenceSplitter(sentFile)
} 
Example 26
Source File: TextToLabeledSentence.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.text

import com.intel.analytics.bigdl.dataset.Transformer
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.collection.Iterator
import scala.reflect.ClassTag

object TextToLabeledSentence {
  def apply[T: ClassTag](dictionary: Dictionary)
           (implicit ev: TensorNumeric[T])
  : TextToLabeledSentence[T] =
    new TextToLabeledSentence[T](dictionary)
  def apply[T: ClassTag](numSteps: Int)(implicit ev: TensorNumeric[T])
  : TextToSentenceWithSteps[T] = new TextToSentenceWithSteps[T](numSteps)
}


private[bigdl] class TextToSentenceWithSteps[T: ClassTag](numSteps: Int)
  (implicit ev: TensorNumeric[T])
  extends Transformer[Array[T], LabeledSentence[T]] {
  val xbuffer = new Array[T](numSteps)
  val ybuffer = new Array[T](numSteps)
  val buffer = new LabeledSentence[T]()

  override def apply(prev: Iterator[Array[T]]): Iterator[LabeledSentence[T]] = {
    prev.map(sentence => {
      require(sentence.length >= numSteps + 1,
        "input sentence length should be numSteps + 1, " +
          s"sentence.length = ${sentence.length}, numSteps = ${numSteps}")
      Array.copy(sentence, 0, xbuffer, 0, numSteps)
      Array.copy(sentence, 1, ybuffer, 0, numSteps)

      buffer.copy(xbuffer, ybuffer)
      buffer
    })
  }
} 
Example 27
Source File: LabeledSentenceToSample.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.text

import com.intel.analytics.bigdl.dataset.{Sample, Transformer}

import scala.collection.Iterator
import java.util

import com.intel.analytics.bigdl.tensor.{DoubleType, FloatType, Tensor}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.reflect.ClassTag

object LabeledSentenceToSample {
  def apply[T: ClassTag]
  (vocabLength: Int,
   fixDataLength: Option[Int] = None,
   fixLabelLength: Option[Int] = None)
  (implicit ev: TensorNumeric[T])
  : LabeledSentenceToSample[T] =
    new LabeledSentenceToSample[T](
      vocabLength,
      fixDataLength,
      fixLabelLength,
      true)
  def apply[T: ClassTag]
  (oneHot: Boolean,
   fixDataLength: Option[Int],
   fixLabelLength: Option[Int])
  (implicit ev: TensorNumeric[T])
  : LabeledSentenceToSample[T] =
    new LabeledSentenceToSample[T](
      vocabLength = 0,
      fixDataLength,
      fixLabelLength,
      oneHot)
}




        val startTokenIndex = sentence.getData(0)
        val endTokenIndex = if (labelLength == 1) 0
          else ev.toType[Int](sentence.getLabel(sentence.labelLength - 1))

        var i = 0
        while (i < sentence.dataLength) {
          featureBuffer(i * vocabLength + ev.toType[Int](sentence.getData(i)))
            = ev.fromType[Float](1.0f)
          i += 1
        }
        while (i < dataLength) {
          featureBuffer(i * vocabLength + endTokenIndex) = ev.fromType[Float](1.0f)
          i += 1
        }

        i = 0
        while (i < sentence.labelLength) {
          labelBuffer(i) = ev.plus(sentence.label()(i), ev.fromType[Float](1.0f))
          i += 1
        }
        while (i < labelLength) {
          labelBuffer(i) = ev.plus(startTokenIndex, ev.fromType[Float](1.0f))
          i += 1
        }
      } else {
        feature.resize(dataLength).zero
        label.resize(labelLength).zero

        val featureBuffer = feature.storage().array()
        val labelBuffer = label.storage().array()

        Array.copy(sentence.data, 0, featureBuffer, 0, dataLength)
        Array.copy(sentence.label, 0, labelBuffer, 0, labelLength)
      }
      Sample[T](feature, label)
    })
  }
} 
Example 28
Source File: SentenceTokenizer.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.dataset.text

import java.io.FileInputStream
import java.net.{URI, URL}

import com.intel.analytics.bigdl.dataset.Transformer

import scala.collection.Iterator
import opennlp.tools.tokenize.{SimpleTokenizer, Tokenizer, TokenizerME, TokenizerModel}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}



class SentenceTokenizer(tokenFile: Option[String] = None)
  extends Transformer[String, Array[String]] {

  var modelIn: FileInputStream = _
  var model: TokenizerModel = _

  var tokenizer: Tokenizer = _

  def this(tokenFile: URL) {
    this(Some(tokenFile.getPath))
  }

  def close(): Unit = {
    if (modelIn != null) {
      modelIn.close()
    }
  }

  override def apply(prev: Iterator[String]): Iterator[Array[String]] =
    prev.map(x => {
      if (tokenizer == null) {
        if (!tokenFile.isDefined) {
          tokenizer = SimpleTokenizer.INSTANCE
        } else {
          val src: Path = new Path(tokenFile.get)
          val fs = src.getFileSystem(new Configuration())
          val in = fs.open(src)
          model = new TokenizerModel(in)
          tokenizer = new TokenizerME(model)
        }
      }
      val words = tokenizer.tokenize(x)
      words
    })
}

object SentenceTokenizer {
  def apply(tokenFile: Option[String] = None):
    SentenceTokenizer = new SentenceTokenizer(tokenFile)
  def apply(tokenFile: URL):
    SentenceTokenizer = new SentenceTokenizer(tokenFile)
}