scala.collection.Iterator Scala Examples
The following examples show how to use scala.collection.Iterator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BGRImgToBatch.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.{MiniBatch, Transformer, Utils} import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import scala.collection.Iterator object BGRImgToBatch { def apply(batchSize: Int, toRGB: Boolean = true): BGRImgToBatch = new BGRImgToBatch(batchSize, toRGB) } class BGRImgToBatch(totalBatch: Int, toRGB: Boolean = true) extends Transformer[LabeledBGRImage, MiniBatch[Float]] { private val batchPerCore = Utils.getBatchSize(totalBatch) override def apply(prev: Iterator[LabeledBGRImage]): Iterator[MiniBatch[Float]] = { val batchSizePerCore = batchPerCore new Iterator[MiniBatch[Float]] { private val featureTensor: Tensor[Float] = Tensor[Float]() private val labelTensor: Tensor[Float] = Tensor[Float]() private var featureData: Array[Float] = null private var labelData: Array[Float] = null private var width = 0 private var height = 0 private val batchSize = batchSizePerCore override def hasNext: Boolean = prev.hasNext override def next(): MiniBatch[Float] = { if (prev.hasNext) { var i = 0 while (i < batchSize && prev.hasNext) { val img = prev.next() if (featureData == null) { featureData = new Array[Float](batchSize * 3 * img.height() * img.width()) labelData = new Array[Float](batchSize) height = img.height() width = img.width() } img.copyTo(featureData, i * img.width() * img.height() * 3, toRGB) labelData(i) = img.label() i += 1 } if (labelTensor.nElement() != i) { featureTensor.set(Storage[Float](featureData), storageOffset = 1, sizes = Array(i, 3, height, width)) labelTensor.set(Storage[Float](labelData), storageOffset = 1, sizes = Array(i)) } MiniBatch(featureTensor, labelTensor) } else { null } } } } }
Example 2
Source File: ParRange.scala From scala-parallel-collections with Apache License 2.0 | 5 votes |
package scala package collection.parallel.immutable import scala.collection.immutable.Range import scala.collection.parallel.Combiner import scala.collection.parallel.SeqSplitter import scala.collection.Iterator override def map2combiner[S, That](f: Int => S, cb: Combiner[S, That]): Combiner[S, That] = { while (hasNext) { cb += f(next) } cb } } override def toString = s"Par$range" } object ParRange { def apply(start: Int, end: Int, step: Int, inclusive: Boolean) = new ParRange( if (inclusive) Range.inclusive(start, end, step) else Range(start, end, step) ) }
Example 3
Source File: BufferedSource.scala From perf_tester with Apache License 2.0 | 5 votes |
package scala.io import java.io.{ InputStream, BufferedReader, InputStreamReader, PushbackReader } import Source.DefaultBufSize import scala.collection.{ Iterator, AbstractIterator } override def mkString = { // Speed up slurping of whole data set in the simplest cases. val allReader = decachedReader val sb = new StringBuilder val buf = new Array[Char](bufferSize) var n = 0 while (n != -1) { n = allReader.read(buf) if (n>0) sb.appendAll(buf, 0, n) } sb.result } }
Example 4
Source File: SystemProperties.scala From perf_tester with Apache License 2.0 | 5 votes |
package scala package sys import scala.collection.{ mutable, Iterator } import scala.collection.JavaConverters._ import java.security.AccessControlException import scala.language.implicitConversions def exclusively[T](body: => T) = this synchronized body implicit def systemPropertiesToCompanion(p: SystemProperties): SystemProperties.type = this private final val HeadlessKey = "java.awt.headless" private final val PreferIPv4StackKey = "java.net.preferIPv4Stack" private final val PreferIPv6AddressesKey = "java.net.preferIPv6Addresses" private final val NoTraceSuppressionKey = "scala.control.noTraceSuppression" def help(key: String): String = key match { case HeadlessKey => "system should not utilize a display device" case PreferIPv4StackKey => "system should prefer IPv4 sockets" case PreferIPv6AddressesKey => "system should prefer IPv6 addresses" case NoTraceSuppressionKey => "scala should not suppress any stack trace creation" case _ => "" } lazy val headless: BooleanProp = BooleanProp.keyExists(HeadlessKey) lazy val preferIPv4Stack: BooleanProp = BooleanProp.keyExists(PreferIPv4StackKey) lazy val preferIPv6Addresses: BooleanProp = BooleanProp.keyExists(PreferIPv6AddressesKey) lazy val noTraceSuppression: BooleanProp = BooleanProp.valueIsTrue(NoTraceSuppressionKey) }
Example 5
Source File: RandomAlterAspect.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.transform.vision.image.augmentation import breeze.numerics.sqrt import org.opencv.core.{CvType, Mat, Rect} import com.intel.analytics.bigdl.dataset.Transformer import com.intel.analytics.bigdl.dataset.image.LabeledBGRImage import com.intel.analytics.bigdl.opencv.OpenCV import org.opencv.imgproc.Imgproc import scala.collection.Iterator import com.intel.analytics.bigdl.opencv import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature} import com.intel.analytics.bigdl.transform.vision.image.opencv.OpenCVMat import org.apache.spark.ml import org.apache.spark.ml.feature import org.opencv.core.Size object RandomAlterAspect { def apply(min_area_ratio: Float = 0.08f, max_area_ratio: Int = 1, min_aspect_ratio_change: Float = 0.75f, interp_mode: String = "CUBIC", cropLength: Int = 224): RandomAlterAspect = { OpenCV.isOpenCVLoaded new RandomAlterAspect(min_area_ratio, max_area_ratio, min_aspect_ratio_change, interp_mode, cropLength) } } class RandomAlterAspect(min_area_ratio: Float = 0.08f, max_area_ratio: Int = 1, min_aspect_ratio_change: Float = 0.75f, interp_mode: String = "CUBIC", cropLength: Int = 224) extends FeatureTransformer { import com.intel.analytics.bigdl.utils.RandomGenerator.RNG @inline private def randRatio(min: Float, max: Float): Float = { val res = (RNG.uniform(1e-2, (max - min) * 1000 + 1) + min * 1000) / 1000 res.toFloat } override protected def transformMat(feature: ImageFeature): Unit = { val h = feature.opencvMat().size().height val w = feature.opencvMat().size().width val area = h * w require(min_area_ratio <= max_area_ratio, "min_area_ratio should <= max_area_ratio") var attempt = 0 while (attempt < 10) { val area_ratio = randRatio(min_area_ratio, max_area_ratio) val aspect_ratio_change = randRatio(min_aspect_ratio_change, 1 / min_aspect_ratio_change) val new_area = area_ratio * area var new_h = (sqrt(new_area) * aspect_ratio_change).toInt var new_w = (sqrt(new_area) / aspect_ratio_change).toInt if (randRatio(0, 1) < 0.5) { val tmp = new_h new_h = new_w new_w = tmp } if (new_h <= h && new_w <= w) { val y = RNG.uniform(1e-2, h - new_h + 1).toInt val x = RNG.uniform(1e-2, w - new_w + 1).toInt Crop.transform(feature.opencvMat(), feature.opencvMat(), x, y, x + new_w, y + new_h, false, false) Imgproc.resize(feature.opencvMat(), feature.opencvMat(), new Size(cropLength, cropLength), 0, 0, 2) attempt = 100 } attempt += 1 } if (attempt < 20) { val (new_h, new_w) = resizeImagePerShorterSize(feature.opencvMat(), cropLength) Imgproc.resize(feature.opencvMat(), feature.opencvMat(), new Size(cropLength, cropLength), 0, 0, 2) } } private def resizeImagePerShorterSize(img: Mat, shorter_size: Int) : (Int, Int) = { val h = img.size().height val w = img.size().width var new_h = shorter_size var new_w = shorter_size if (h < w) { new_w = (w / h * shorter_size).toInt } else { new_h = (h / w * shorter_size).toInt } (new_h, new_w) } }
Example 6
Source File: ChannelScaledNormalizer.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.transform.vision.image.augmentation import com.intel.analytics.bigdl.dataset.image.LabeledBGRImage import com.intel.analytics.bigdl.dataset.{LocalDataSet, Transformer} import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature} import com.intel.analytics.bigdl.transform.vision.image.opencv.OpenCVMat import org.apache.log4j.Logger import scala.collection.Iterator object ChannelScaledNormalizer { def apply(meanR: Int, meanG: Int, meanB: Int, scale: Double): ChannelScaledNormalizer = { new ChannelScaledNormalizer(meanR, meanG, meanB, scale) } } class ChannelScaledNormalizer(meanR: Int, meanG: Int, meanB: Int, scale: Double) extends FeatureTransformer { override protected def transformMat(feature: ImageFeature): Unit = { val mat = feature.opencvMat() val toFloats = OpenCVMat.toFloatPixels(mat) val content = toFloats._1 require(content.length % 3 == 0, "Content should be multiple of 3 channels") var i = 0 val frameLength = content.length / 3 val height = toFloats._2 val width = toFloats._3 val bufferContent = new Array[Float](width * height * 3) val channels = 3 val mean = Array(meanR, meanG, meanB) var c = 0 while (c < channels) { i = 0 while (i < frameLength) { val data_index = c * frameLength + i bufferContent(data_index) = ((content(data_index) - mean(c)) * scale).toFloat i += 1 } c += 1 } if (mat != null) { mat.release() } val newMat = OpenCVMat.fromFloats(bufferContent, height, width) feature(ImageFeature.mat) = newMat } }
Example 7
Source File: BGRImgToSample.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.{Sample, Transformer} import com.intel.analytics.bigdl.tensor.Tensor import scala.collection.Iterator object BGRImgToSample { def apply(toRGB: Boolean = true): BGRImgToSample = { new BGRImgToSample(toRGB) } } class BGRImgToSample(toRGB: Boolean = true) extends Transformer[LabeledBGRImage, Sample[Float]] { private val featureBuffer = Tensor[Float]() private val labelBuffer = Tensor[Float](1) override def apply(prev: Iterator[LabeledBGRImage]): Iterator[Sample[Float]] = { prev.map(img => { labelBuffer.storage.array()(0) = img.label() if (featureBuffer.nElement() != 3 * img.height() * img.width()) { featureBuffer.resize(3, img.height(), img.width()) } img.copyTo(featureBuffer.storage().array(), 0, toRGB) Sample(featureBuffer, labelBuffer) }) } }
Example 8
Source File: BGRImgRdmCropper.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.Transformer import scala.collection.Iterator object BGRImgRdmCropper { def apply(cropWidth: Int, cropHeight: Int, padding: Int): BGRImgRdmCropper = new BGRImgRdmCropper(cropHeight, cropWidth, padding) } class BGRImgRdmCropper(cropHeight: Int, cropWidth: Int, padding: Int) extends Transformer[LabeledBGRImage, LabeledBGRImage] { import com.intel.analytics.bigdl.utils.RandomGenerator.RNG private val buffer = new LabeledBGRImage(cropWidth, cropHeight) override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = { prev.map(img => { val curImg = padding > 0 match { case true => val widthTmp = img.width() val heightTmp = img.height() val sourceTmp = img.content val padWidth = widthTmp + 2 * padding val padHeight = heightTmp + 2 * padding val temp = new LabeledBGRImage(padWidth, padHeight) val tempBuffer = temp.content val startIndex = (padding + padding * padWidth) * 3 val frameLength = widthTmp * heightTmp var i = 0 while (i < frameLength) { tempBuffer(startIndex + ((i / widthTmp) * padWidth + (i % widthTmp)) * 3 + 2) = sourceTmp(i * 3 + 2) tempBuffer(startIndex + ((i / widthTmp) * padWidth + (i % widthTmp)) * 3 + 1) = sourceTmp(i * 3 + 1) tempBuffer(startIndex + ((i / widthTmp) * padWidth + (i % widthTmp)) * 3) = sourceTmp(i * 3) i += 1 } temp.setLabel(img.label()) temp case _ => img } val width = curImg.width() val height = curImg.height() val source = curImg.content val startW = RNG.uniform(0, width - cropWidth).toInt val startH = RNG.uniform(0, height - cropHeight).toInt val startIndex = (startW + startH * width) * 3 val frameLength = cropWidth * cropHeight val target = buffer.content var i = 0 while (i < frameLength) { target(i * 3 + 2) = source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 2) target(i * 3 + 1) = source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 1) target(i * 3) = source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3) i += 1 } buffer.setLabel(curImg.label()) }) } }
Example 9
Source File: GreyImgToSample.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.{Sample, Transformer} import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import scala.collection.Iterator object GreyImgToSample { def apply(): GreyImgToSample = { new GreyImgToSample() } } class GreyImgToSample() extends Transformer[LabeledGreyImage, Sample[Float]] { private val featureBuffer = Tensor[Float]() private val labelBuffer = Tensor[Float](1) private val featureSize = new Array[Int](2) override def apply(prev: Iterator[LabeledGreyImage]): Iterator[Sample[Float]] = { prev.map(img => { labelBuffer.storage.array()(0) = img.label() featureSize(0) = img.height() featureSize(1) = img.width() featureBuffer.set(Storage(img.content), sizes = featureSize) Sample(featureBuffer, labelBuffer) }) } }
Example 10
Source File: ColorJitter.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.Transformer import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator import com.intel.analytics.bigdl.utils.RandomGenerator.RNG import scala.collection.Iterator import scala.util.Random object ColorJitter { def apply(): ColorJitter = { new ColorJitter() } } class ColorJitter extends Transformer[LabeledBGRImage, LabeledBGRImage] { // TODO: make the bcs parameter configurable private val bcsParameters = Map("brightness" -> 0.4f, "contrast" -> 0.4f, "saturation" -> 0.4f) private var gs: Array[Float] = null private def grayScale(dst: Array[Float], img: Array[Float]): Array[Float] = { var i = 0 while (i < img.length) { dst(i) = img(i)*0.299f + img(i + 1)*0.587f + img(i + 2)*0.114f dst(i + 1) = dst(i) dst(i + 2) = dst(i) i += 3 } dst } private def blend(img1: Array[Float], img2: Array[Float], alpha: Float): Array[Float] = { var i = 0 while (i < img1.length) { img1(i) = img1(i) * alpha + (1 - alpha) * img2(i) i += 1 } img1 } private def saturation(variance: Float)(input: Array[Float]): Array[Float] = { if (gs == null || gs.length < input.length) gs = new Array[Float](input.length) grayScale(gs, input) val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat blend(input, gs, alpha) input } private def brightness(variance: Float)(input: Array[Float]): Array[Float] = { if (gs == null || gs.length < input.length) gs = new Array[Float](input.length) java.util.Arrays.fill(gs, 0, gs.length, 0.0f) val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat blend(input, gs, alpha) input } private def contrast(variance: Float)(input: Array[Float]): Array[Float] = { if (gs == null || gs.length < input.length) gs = new Array[Float](input.length) grayScale(gs, input) val mean = gs.sum / gs.length java.util.Arrays.fill(gs, 0, gs.length, mean) val alpha = 1.0f + RNG.uniform(-variance, variance).toFloat blend(input, gs, alpha) input } private val ts = Map( 1 -> { brightness(bcsParameters.get("brightness").get)(_)}, 2 -> {contrast(bcsParameters.get("contrast").get)(_)}, 3 -> {saturation(bcsParameters.get("saturation").get)(_)} ) private def randomOrder(input: Array[Float]): Unit = { val order = Tensor.randperm[Float](3) var i = 1 while (i <= order.size(1)) { val idx = order(i).value().toInt ts(idx)(input) i += 1 } } override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = { prev.map(img => { val content = img.content require(content.length % 3 == 0) randomOrder(content) img }) } }
Example 11
Source File: BytesToBGRImg.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import java.awt.Color import java.awt.image.{BufferedImage, DataBufferByte} import java.nio.ByteBuffer import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer} import scala.collection.Iterator object BytesToBGRImg { def apply(normalize: Float = 255f, resizeW : Int = -1, resizeH : Int = -1): BytesToBGRImg = new BytesToBGRImg(normalize, resizeW, resizeH) } class BytesToBGRImg(normalize: Float, resizeW : Int = -1, resizeH : Int = -1) extends Transformer[ByteRecord, LabeledBGRImage] { private val buffer = new LabeledBGRImage() override def apply(prev: Iterator[ByteRecord]): Iterator[LabeledBGRImage] = { prev.map(rawData => { buffer.copy(getImgData(rawData, resizeW, resizeH), normalize).setLabel(rawData.label) }) } private def getImgData (record : ByteRecord, resizeW : Int, resizeH : Int) : Array[Byte] = { if (resizeW == -1) { return record.data } else { val rawData = record.data val imgBuffer = ByteBuffer.wrap(rawData) val width = imgBuffer.getInt val height = imgBuffer.getInt val bufferedImage : BufferedImage = new BufferedImage(width, height, BufferedImage.TYPE_3BYTE_BGR) val outputImagePixelData = bufferedImage.getRaster.getDataBuffer .asInstanceOf[DataBufferByte].getData System.arraycopy(imgBuffer.array(), 8, outputImagePixelData, 0, outputImagePixelData.length) BGRImage.resizeImage(bufferedImage, resizeW, resizeH) } } }
Example 12
Source File: Lighting.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.Transformer import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import com.intel.analytics.bigdl.utils.RandomGenerator.RNG import scala.collection.Iterator object Lighting { def apply(): Lighting = { new Lighting() } } class Lighting extends Transformer[LabeledBGRImage, LabeledBGRImage] { val alphastd = 0.1f val eigval = Tensor[Float](Storage(Array(0.2175f, 0.0188f, 0.0045f)), 1, Array(3)) val eigvec = Tensor[Float](Storage(Array(-0.5675f, 0.7192f, 0.4009f, -0.5808f, -0.0045f, -0.8140f, -0.5836f, -0.6948f, 0.4203f)), 1, Array(3, 3)) def lighting(input: Array[Float]): Unit = { if (alphastd != 0) { val alpha = Tensor[Float](3).apply1(_ => RNG.uniform(0, alphastd).toFloat) val rgb = eigvec.clone .cmul(alpha.view(1, 3).expand(Array(3, 3))) .cmul(eigval.view(1, 3).expand(Array(3, 3))) .sum(2).squeeze var i = 0 while (i < input.length) { input(i) = input(i) + rgb.storage().array()(0) input(i + 1) = input(i + 1) + rgb.storage().array()(1) input(i + 2) = input(i + 2) + rgb.storage().array()(2) i += 3 } } } override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = { prev.map(img => { lighting(img.content) img }) } }
Example 13
Source File: BGRImgToLocalSeqFile.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import java.nio.ByteBuffer import java.nio.file.Path import com.intel.analytics.bigdl.dataset.Transformer import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{Path => hadoopPath} import org.apache.hadoop.io.{SequenceFile, Text} import scala.collection.Iterator object BGRImgToLocalSeqFile { def apply(blockSize: Int, baseFileName: Path, hasName: Boolean = false): BGRImgToLocalSeqFile = { new BGRImgToLocalSeqFile(blockSize, baseFileName, hasName) } } class BGRImgToLocalSeqFile(blockSize: Int, baseFileName: Path, hasName: Boolean = false) extends Transformer[(LabeledBGRImage, String), String] { private val conf: Configuration = new Configuration private var index = 0 private val preBuffer: ByteBuffer = ByteBuffer.allocate(4 * 2) override def apply(prev: Iterator[(LabeledBGRImage, String)]): Iterator[String] = { new Iterator[String] { override def hasNext: Boolean = prev.hasNext override def next(): String = { val fileName = baseFileName + s"_$index.seq" val path = new hadoopPath(fileName) val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(path), SequenceFile.Writer.keyClass(classOf[Text]), SequenceFile.Writer.valueClass(classOf[Text])) var i = 0 while (i < blockSize && prev.hasNext) { val (image, imageName) = prev.next() preBuffer.putInt(image.width()) preBuffer.putInt(image.height()) val imageByteData = image.convertToByte() val data: Array[Byte] = new Array[Byte](preBuffer.capacity + imageByteData.length) System.arraycopy(preBuffer.array, 0, data, 0, preBuffer.capacity) System.arraycopy(imageByteData, 0, data, preBuffer.capacity, imageByteData.length) preBuffer.clear val imageKey = if (hasName) s"${imageName}\n${image.label().toInt}" else s"${image.label().toInt}" writer.append(new Text(imageKey), new Text(data)) i += 1 } writer.close() index += 1 fileName } } } }
Example 14
Source File: LocalImgReader.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import java.awt.color.ColorSpace import com.intel.analytics.bigdl.dataset.Transformer import scala.collection.Iterator object LocalImgReader { Class.forName("javax.imageio.ImageIO") Class.forName("java.awt.color.ICC_ColorSpace") // Class.forName("sun.java2d.cmm.lcms.LCMS") ColorSpace.getInstance(ColorSpace.CS_sRGB).toRGB(Array[Float](0, 0, 0)) def apply(scaleTo: Int = BGRImage.NO_SCALE, normalize: Float = 255f) : Transformer[LocalLabeledImagePath, LabeledBGRImage] = new LocalScaleImgReader(scaleTo, normalize) def apply(resizeW: Int, resizeH: Int, normalize: Float) : Transformer[LocalLabeledImagePath, LabeledBGRImage] = new LocalResizeImgReader(resizeW, resizeH, normalize) } class LocalResizeImgReader private[dataset](resizeW: Int, resizeH: Int, normalize: Float) extends Transformer[LocalLabeledImagePath, LabeledBGRImage] { private val buffer = new LabeledBGRImage() override def apply(prev: Iterator[LocalLabeledImagePath]): Iterator[LabeledBGRImage] = { prev.map(data => { val imgData = BGRImage.readImage(data.path, resizeW, resizeH) val label = data.label buffer.copy(imgData, normalize).setLabel(label) }) } }
Example 15
Source File: RowToByteRecords.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.example.imageclassification import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer} import org.apache.log4j.Logger import org.apache.spark.sql.Row import scala.collection.Iterator object RowToByteRecords { val logger = Logger.getLogger(getClass) def apply(colName: String = "data"): RowToByteRecords = { new RowToByteRecords(colName) } } class RowToByteRecords(colName: String) extends Transformer[Row, ByteRecord] { override def apply(prev: Iterator[Row]): Iterator[ByteRecord] = { prev.map( img => { ByteRecord(img.getAs[Array[Byte]](colName), -1.0f) } ) } }
Example 16
Source File: BGRImgCropper.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.Transformer import scala.collection.Iterator class BGRImgCropper(cropWidth: Int, cropHeight: Int, cropperMethod: CropperMethod = CropRandom) extends Transformer[LabeledBGRImage, LabeledBGRImage] { import com.intel.analytics.bigdl.utils.RandomGenerator.RNG private val buffer = new LabeledBGRImage(cropWidth, cropHeight) override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = { prev.map(img => { val width = img.width() val height = img.height() val (startH, startW) = cropperMethod match { case CropRandom => (math.ceil(RNG.uniform(1e-2, height - cropHeight)).toInt, math.ceil(RNG.uniform(1e-2, width - cropWidth)).toInt) case CropCenter => ((height - cropHeight) / 2, (width - cropWidth) / 2) } val startIndex = (startW + startH * width) * 3 val frameLength = cropWidth * cropHeight val source = img.content val target = buffer.content var i = 0 while (i < frameLength) { target(i * 3 + 2) = source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 2) target(i * 3 + 1) = source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3 + 1) target(i * 3) = source(startIndex + ((i / cropWidth) * width + (i % cropWidth)) * 3) i += 1 } buffer.setLabel(img.label()) }) } }
Example 17
Source File: BytesToGreyImg.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.{ByteRecord, Transformer} import scala.collection.Iterator object BytesToGreyImg { def apply(row: Int, col: Int): BytesToGreyImg = new BytesToGreyImg(row, col) } class BytesToGreyImg(row: Int, col: Int) extends Transformer[ByteRecord, LabeledGreyImage] { private val buffer = new LabeledGreyImage(row, col) override def apply(prev: Iterator[ByteRecord]): Iterator[LabeledGreyImage] = { prev.map(rawData => { require(row * col == rawData.data.length) buffer.setLabel(rawData.label).copy(rawData.data, 255.0f) }) } }
Example 18
Source File: GreyImgToBatch.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.{Utils, MiniBatch, Transformer} import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import scala.collection.Iterator object GreyImgToBatch { def apply(batchSize : Int) : GreyImgToBatch = { new GreyImgToBatch(batchSize) } } class GreyImgToBatch private[dataset](totalBatchSize: Int) extends Transformer[LabeledGreyImage, MiniBatch[Float]] { private def copyImage(img: GreyImage, storage: Array[Float], offset: Int): Unit = { val content = img.content val frameLength = img.width() * img.height() var j = 0 while (j < frameLength) { storage(offset + j) = content(j) j += 1 } } private val batchPerCore = Utils.getBatchSize(totalBatchSize) override def apply(prev: Iterator[LabeledGreyImage]): Iterator[MiniBatch[Float]] = { val batchSizePerCore = batchPerCore new Iterator[MiniBatch[Float]] { private val featureTensor: Tensor[Float] = Tensor[Float]() private val labelTensor: Tensor[Float] = Tensor[Float]() private var featureData: Array[Float] = null private var labelData: Array[Float] = null private val batchSize = batchSizePerCore private var width = 0 private var height = 0 override def hasNext: Boolean = prev.hasNext override def next(): MiniBatch[Float] = { if (prev.hasNext) { var i = 0 while (i < batchSize && prev.hasNext) { val img = prev.next() if (featureData == null) { featureData = new Array[Float](batchSize * img.height() * img.width()) labelData = new Array[Float](batchSize) height = img.height() width = img.width() } copyImage(img, featureData, i * img.width() * img.height()) labelData(i) = img.label() i += 1 } if (labelTensor.nElement() != i) { featureTensor.set(Storage[Float](featureData), storageOffset = 1, sizes = Array(i, height, width)) labelTensor.set(Storage[Float](labelData), storageOffset = 1, sizes = Array(i)) } MiniBatch(featureTensor, labelTensor) } else { null } } } } }
Example 19
Source File: BGRImgToImageVector.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.Transformer import org.apache.log4j.Logger import org.apache.spark.mllib.linalg.DenseVector import scala.collection.Iterator object BGRImgToImageVector { val logger = Logger.getLogger(getClass) def apply(): BGRImgToImageVector = { new BGRImgToImageVector() } } class BGRImgToImageVector() extends Transformer[LabeledBGRImage, DenseVector] { private var featureData: Array[Float] = null override def apply(prev: Iterator[LabeledBGRImage]): Iterator[DenseVector] = { prev.map( img => { if (null == featureData) { featureData = new Array[Float](3 * img.height() * img.width()) } img.copyTo(featureData, 0, true) new DenseVector(featureData.map(_.toDouble)) } ) } }
Example 20
Source File: GreyImgNormalizer.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.DataSet import com.intel.analytics.bigdl.dataset.Transformer import scala.collection.Iterator object GreyImgNormalizer { def apply(dataSet: DataSet[LabeledGreyImage], samples: Int = Int.MaxValue) : GreyImgNormalizer = { var sum: Double = 0 var total: Int = 0 dataSet.shuffle() var iter = dataSet.toLocal().data(train = false) var i = 0 while (i < math.min(samples, dataSet.size())) { val img = iter.next() img.content.foreach(e => { sum += e total += 1 }) i += 1 } val mean = sum / total sum = 0 i = 0 iter = dataSet.toLocal().data(train = false) while (i < math.min(samples, dataSet.size())) { val img = iter.next() img.content.foreach(e => { val diff = e - mean sum += diff * diff }) i += 1 } val std = math.sqrt(sum / total).toFloat new GreyImgNormalizer(mean, std) } def apply(mean : Double, std : Double): GreyImgNormalizer = { new GreyImgNormalizer(mean, std) } } class GreyImgNormalizer(mean : Double, std : Double) extends Transformer[LabeledGreyImage, LabeledGreyImage] { def getMean(): Double = mean def getStd(): Double = std override def apply(prev: Iterator[LabeledGreyImage]): Iterator[LabeledGreyImage] = { prev.map(img => { var i = 0 val content = img.content while (i < content.length) { content(i) = ((content(i) - mean) / std).toFloat i += 1 } img }) } }
Example 21
Source File: HFlip.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.Transformer import com.intel.analytics.bigdl.utils.RandomGenerator import scala.collection.Iterator object HFlip { def apply(threshold: Double = 0.0): HFlip = { new HFlip(threshold) } } class HFlip(threshold: Double) extends Transformer[LabeledBGRImage, LabeledBGRImage] { override def apply(prev: Iterator[LabeledBGRImage]): Iterator[LabeledBGRImage] = { prev.map(img => { if (RandomGenerator.RNG.uniform(0, 1) >= threshold) { img.hflip() } else { img } }) } }
Example 22
Source File: LocalSeqFileToBytes.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.DataSet.SeqFileFolder import com.intel.analytics.bigdl.dataset.{ByteRecord, LocalSeqFilePath, Transformer} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.SequenceFile.Reader import org.apache.hadoop.io.{SequenceFile, Text} import scala.collection.Iterator object LocalSeqFileToBytes { def apply(): LocalSeqFileToBytes = new LocalSeqFileToBytes() } class LocalSeqFileToBytes extends Transformer[LocalSeqFilePath, ByteRecord] { import org.apache.hadoop.fs.{Path => hPath} @transient private var key: Text = null @transient private var value: Text = null @transient private var reader: SequenceFile.Reader = null @transient private var oneRecordBuffer: ByteRecord = null override def apply(prev: Iterator[LocalSeqFilePath]): Iterator[ByteRecord] = { new Iterator[ByteRecord] { override def next(): ByteRecord = { if (oneRecordBuffer != null) { val res = oneRecordBuffer oneRecordBuffer = null return res } if (key == null) { key = new Text() } if (value == null) { value = new Text } if (reader == null || !reader.next(key, value)) { if (reader != null) { reader.close() } reader = new SequenceFile.Reader(new Configuration, Reader.file(new hPath(prev.next().path.toAbsolutePath.toString))) reader.next(key, value) } ByteRecord(value.copyBytes(), SeqFileFolder.readLabel(key).toFloat) } override def hasNext: Boolean = { if (oneRecordBuffer != null) { true } else if (reader == null) { prev.hasNext } else { if (reader.next(key, value)) { oneRecordBuffer = ByteRecord(value.copyBytes(), SeqFileFolder.readLabel(key).toFloat) return true } else { prev.hasNext } } } } } }
Example 23
Source File: GreyImgCropper.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.image import com.intel.analytics.bigdl.dataset.Transformer import scala.collection.Iterator object GreyImgCropper { def apply(cropWidth: Int, cropHeight: Int) : GreyImgCropper = { new GreyImgCropper(cropWidth, cropHeight) } } class GreyImgCropper(cropWidth: Int, cropHeight: Int) extends Transformer[LabeledGreyImage, LabeledGreyImage] { import com.intel.analytics.bigdl.utils.RandomGenerator.RNG private val buffer = new LabeledGreyImage(cropWidth, cropHeight) override def apply(prev: Iterator[LabeledGreyImage]): Iterator[LabeledGreyImage] = { prev.map(img => { val width = img.width() val height = img.height() val startW = RNG.uniform(0, width - cropWidth).toInt val startH = RNG.uniform(0, height - cropHeight).toInt val startIndex = startW + startH * width val frameLength = cropWidth * cropHeight val source = img.content val target = buffer.content var i = 0 while (i < frameLength) { target(i) = source(startIndex + (i / cropWidth) * width + (i % cropWidth)) i += 1 } buffer.setLabel(img.label()) }) } }
Example 24
Source File: SentenceBiPadding.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import com.intel.analytics.bigdl.dataset.Transformer import com.intel.analytics.bigdl.dataset.text.utils.SentenceToken import scala.collection.Iterator class SentenceBiPadding( start: Option[String] = None, end: Option[String] = None) extends Transformer[String, String] { val sentenceStart = start.getOrElse(SentenceToken.start) val sentenceEnd = end.getOrElse(SentenceToken.end) override def apply(prev: Iterator[String]): Iterator[String] = { prev.map(x => { val sentence = sentenceStart + " " + x + " " + sentenceEnd sentence }) } } object SentenceBiPadding { def apply(start: Option[String] = None, end: Option[String] = None): SentenceBiPadding = new SentenceBiPadding(start, end) }
Example 25
Source File: SentenceSplitter.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import java.io.FileInputStream import java.net.{URI, URL} import com.intel.analytics.bigdl.dataset.Transformer import opennlp.tools.sentdetect.{SentenceDetector, SentenceDetectorME, SentenceModel} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.collection.Iterator class SentenceSplitter(sentFile: Option[String] = None) extends Transformer[String, Array[String]] { var modelIn: FileInputStream = _ var model: SentenceModel = _ var sentenceDetector: SentenceDetector = _ def this(sentFileURL: URL) { this(Some(sentFileURL.getPath)) } def this(sentFile: String) { this(Some(sentFile)) } def close(): Unit = { if (modelIn != null) { modelIn.close() } } override def apply(prev: Iterator[String]): Iterator[Array[String]] = prev.map(x => { if (!sentFile.isDefined) { x.split('.') } else { if (sentenceDetector == null) { val src: Path = new Path(sentFile.get) val fs = src.getFileSystem(new Configuration()) val in = fs.open(src) model = new SentenceModel(in) sentenceDetector = new SentenceDetectorME(model) } sentenceDetector.sentDetect(x) } }) } object SentenceSplitter { def apply(sentFile: Option[String] = None): SentenceSplitter = new SentenceSplitter(sentFile) def apply(sentFileURL: URL): SentenceSplitter = new SentenceSplitter(sentFileURL) def apply(sentFile: String): SentenceSplitter = new SentenceSplitter(sentFile) }
Example 26
Source File: TextToLabeledSentence.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import com.intel.analytics.bigdl.dataset.Transformer import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import scala.collection.Iterator import scala.reflect.ClassTag object TextToLabeledSentence { def apply[T: ClassTag](dictionary: Dictionary) (implicit ev: TensorNumeric[T]) : TextToLabeledSentence[T] = new TextToLabeledSentence[T](dictionary) def apply[T: ClassTag](numSteps: Int)(implicit ev: TensorNumeric[T]) : TextToSentenceWithSteps[T] = new TextToSentenceWithSteps[T](numSteps) } private[bigdl] class TextToSentenceWithSteps[T: ClassTag](numSteps: Int) (implicit ev: TensorNumeric[T]) extends Transformer[Array[T], LabeledSentence[T]] { val xbuffer = new Array[T](numSteps) val ybuffer = new Array[T](numSteps) val buffer = new LabeledSentence[T]() override def apply(prev: Iterator[Array[T]]): Iterator[LabeledSentence[T]] = { prev.map(sentence => { require(sentence.length >= numSteps + 1, "input sentence length should be numSteps + 1, " + s"sentence.length = ${sentence.length}, numSteps = ${numSteps}") Array.copy(sentence, 0, xbuffer, 0, numSteps) Array.copy(sentence, 1, ybuffer, 0, numSteps) buffer.copy(xbuffer, ybuffer) buffer }) } }
Example 27
Source File: LabeledSentenceToSample.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import com.intel.analytics.bigdl.dataset.{Sample, Transformer} import scala.collection.Iterator import java.util import com.intel.analytics.bigdl.tensor.{DoubleType, FloatType, Tensor} import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import scala.reflect.ClassTag object LabeledSentenceToSample { def apply[T: ClassTag] (vocabLength: Int, fixDataLength: Option[Int] = None, fixLabelLength: Option[Int] = None) (implicit ev: TensorNumeric[T]) : LabeledSentenceToSample[T] = new LabeledSentenceToSample[T]( vocabLength, fixDataLength, fixLabelLength, true) def apply[T: ClassTag] (oneHot: Boolean, fixDataLength: Option[Int], fixLabelLength: Option[Int]) (implicit ev: TensorNumeric[T]) : LabeledSentenceToSample[T] = new LabeledSentenceToSample[T]( vocabLength = 0, fixDataLength, fixLabelLength, oneHot) } val startTokenIndex = sentence.getData(0) val endTokenIndex = if (labelLength == 1) 0 else ev.toType[Int](sentence.getLabel(sentence.labelLength - 1)) var i = 0 while (i < sentence.dataLength) { featureBuffer(i * vocabLength + ev.toType[Int](sentence.getData(i))) = ev.fromType[Float](1.0f) i += 1 } while (i < dataLength) { featureBuffer(i * vocabLength + endTokenIndex) = ev.fromType[Float](1.0f) i += 1 } i = 0 while (i < sentence.labelLength) { labelBuffer(i) = ev.plus(sentence.label()(i), ev.fromType[Float](1.0f)) i += 1 } while (i < labelLength) { labelBuffer(i) = ev.plus(startTokenIndex, ev.fromType[Float](1.0f)) i += 1 } } else { feature.resize(dataLength).zero label.resize(labelLength).zero val featureBuffer = feature.storage().array() val labelBuffer = label.storage().array() Array.copy(sentence.data, 0, featureBuffer, 0, dataLength) Array.copy(sentence.label, 0, labelBuffer, 0, labelLength) } Sample[T](feature, label) }) } }
Example 28
Source File: SentenceTokenizer.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import java.io.FileInputStream import java.net.{URI, URL} import com.intel.analytics.bigdl.dataset.Transformer import scala.collection.Iterator import opennlp.tools.tokenize.{SimpleTokenizer, Tokenizer, TokenizerME, TokenizerModel} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} class SentenceTokenizer(tokenFile: Option[String] = None) extends Transformer[String, Array[String]] { var modelIn: FileInputStream = _ var model: TokenizerModel = _ var tokenizer: Tokenizer = _ def this(tokenFile: URL) { this(Some(tokenFile.getPath)) } def close(): Unit = { if (modelIn != null) { modelIn.close() } } override def apply(prev: Iterator[String]): Iterator[Array[String]] = prev.map(x => { if (tokenizer == null) { if (!tokenFile.isDefined) { tokenizer = SimpleTokenizer.INSTANCE } else { val src: Path = new Path(tokenFile.get) val fs = src.getFileSystem(new Configuration()) val in = fs.open(src) model = new TokenizerModel(in) tokenizer = new TokenizerME(model) } } val words = tokenizer.tokenize(x) words }) } object SentenceTokenizer { def apply(tokenFile: Option[String] = None): SentenceTokenizer = new SentenceTokenizer(tokenFile) def apply(tokenFile: URL): SentenceTokenizer = new SentenceTokenizer(tokenFile) }