breeze.linalg.* Scala Example

Source File: TransformerOperation.scala From BigDL with Apache License 2.0

6 votes

package com.intel.analytics.bigdl.nn

import breeze.linalg.*
import breeze.numerics.exp
import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, TensorModule}
import com.intel.analytics.bigdl.optim.Regularizer
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.utils.{EngineType, T}

import scala.reflect.ClassTag

private[nn] object TransformerOperation {
  def dense[T: ClassTag](
    inputSize: Int,
    outputSize: Int,
    bias: Boolean = true,
    activation: TensorModule[T] = null,
    wRegularizer: Regularizer[T] = null,
    bRegularizer: Regularizer[T] = null,
    name: String = "")(implicit ev: TensorNumeric[T]): Module[T] = {
    val seq = new Sequential[T]()
    val layer = Linear[T](
      inputSize = inputSize,
      outputSize = outputSize,
      withBias = bias,
      wRegularizer = wRegularizer,
      bRegularizer = bRegularizer)

    layer.setInitMethod(weightInitMethod = Xavier, biasInitMethod = Zeros)
    if (name != "") layer.setName(name)
    seq.add(TimeDistributed[T](layer))
    if (activation != null) seq.add(activation)
    seq
  }

  def softMax[T: ClassTag]()(implicit ev: TensorNumeric[T]): Module[T] = {
    val layer = SoftMax[T]()
    val model = Sequential[T]()
    model.add(Transpose[T](Array((2, 4))))
    model.add(layer)
    model.add(Transpose[T](Array((2, 4))))
    model.asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]]
  }

  
  def attentionBiasLowerTriangle[T: ClassTag](
     length: Int, output: Tensor[T])(implicit ev: TensorNumeric[T]): Tensor[T] = {
    val arr = output.storage().array()
    for (i <- 0 to (length - 1)) {
      var j = length - 1
      while (j > i) {
        // reminder: here not 1
        arr(i * length + j) = ev.fromType(maskValue)
        j -= 1
      }
    }
    output.resize(Array(1, 1, length, length))
  }
}

sealed trait TransformerType

case object Translation extends TransformerType
case object LanguageModel extends TransformerType

Source File: Adam.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.optim

import breeze.linalg.*
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.{T, Table}

import scala.math._
import scala.reflect.ClassTag


    _s.mul(ev.fromType[Double](beta1)).add(ev.fromType[Double](1-beta1), dfdx)
    // buffer = dfdx * dfdx
    buffer.resizeAs(dfdx).cmul(dfdx, dfdx)
    _r.mul(ev.fromType[Double](beta2)).add(ev.fromType[Double](1-beta2), buffer)
    _denom.sqrt(_r)

    // used as MKL.axpy: 1 * a + y = y, and fill buffer with one
    buffer.fill(ev.one)
    _denom.add(ev.fromType(eps), buffer)

    // efficiency improved upon by changing the order of computation, at expense of clarity
    val biasCorrection1 = 1 - pow(beta1, timestep)
    val biasCorrection2 = 1 - pow(beta2, timestep)
    val stepSize = clr * sqrt(biasCorrection2) / biasCorrection1
    parameter.addcdiv(ev.fromType[Double](-stepSize), _s, _denom)

    state("evalCounter") = timestep // A tmp tensor to hold the sqrt(v) + epsilon
    state("s") = _s // 1st moment variables
    state("r") = _r // 2nd moment variables
    state("denom") = _denom // 3nd moment variables

    (parameter, Array(fx))
  }

  override def loadFromTable(config: Table): this.type = {
    this.learningRate = config.get[Double]("learningRate").getOrElse(this.learningRate)
    this.learningRateDecay = config.get[Double]("learningRateDecay")
      .getOrElse(this.learningRateDecay)
    this.beta1 = config.get[Double]("beta1").getOrElse(this.beta1)
    this.beta2 = config.get[Double]("beta2").getOrElse(this.beta2)
    this.Epsilon = config.get[Double]("Epsilon").getOrElse(this.Epsilon)
    this
  }

  override def clearHistory(): Unit = {
    state.delete("s")
    state.delete("r")
  }

  override def getLearningRate(): Double = this.learningRate
}

Source File: ExecuteSQLStop.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.bundle.common

import breeze.collection.mutable.ArrayMap
import breeze.linalg.*
import cn.piflow._
import cn.piflow.conf._
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import cn.piflow.lib._
import cn.piflow.lib.io.{FileFormat, TextFile}
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.elasticsearch.common.collect.Tuple


class ExecuteSQLStop extends ConfigurableStop{

  val authorEmail: String = "[email protected]"
  val description: String = "Create temporary view table to execute sql"
  val inportList: List[String] = List(Port.DefaultPort)
  val outportList: List[String] = List(Port.DefaultPort)

  var sql: String = _
  var ViewName: String = _


  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {

    val spark = pec.get[SparkSession]()
    val inDF = in.read()
    inDF.createOrReplaceTempView(ViewName)

    val frame: DataFrame = spark.sql(sql)
    out.write(frame)
  }


  override def setProperties(map: Map[String, Any]): Unit = {
    sql = MapUtil.get(map,"sql").asInstanceOf[String]
    ViewName = MapUtil.get(map,"ViewName").asInstanceOf[String]

  }
  override def initialize(ctx: ProcessContext): Unit = {

  }
  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val sql = new PropertyDescriptor().name("sql")
      .displayName("Sql")
      .description("Sql string")
      .defaultValue("")
      .required(true)
      .example("select * from temp")
    descriptor = sql :: descriptor

    val ViewName = new PropertyDescriptor()
      .name("viewName")
      .displayName("ViewName")
      .description("Name of the temporary view table")
      .defaultValue("temp")
      .required(true)
      .example("temp")

    descriptor = ViewName :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/common/ExecuteSqlStop.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.CommonGroup)
  }



}

Source File: HypoTest.scala From seqspark with Apache License 2.0

5 votes

package org.dizhang.seqspark.stat

import breeze.linalg.{DenseMatrix, DenseVector, inv, *}
import breeze.stats.{mean, variance}


                    ) extends NullModel {
      def dof = y.length - xs.cols + 1
      def residuals = y - estimates
      val invInfo = inv(xs.t * (xs(::, *) *:* b) * a)
    }

    def apply(y: DenseVector[Double], x: Option[DenseMatrix[Double]], fit: Boolean, binary: Boolean): NullModel = {
      x match {
        case Some(dm) => apply(y, dm, fit, binary)
        case None => apply(y, fit, binary)
      }
    }

    def apply(y: DenseVector[Double], fit: Boolean, binary: Boolean): NullModel = {
      if (fit) {
        Fit(y, binary)
      } else {
        Simple(y, binary)
      }
    }

    def apply(reg: Regression): NullModel = {
      val y = reg.responses
      reg match {
        case lr: LogisticRegression =>
          Fitted(y, reg.estimates, reg.xs, 1.0, lr.residualsVariance, binary = true)
        case lr: LinearRegression =>
          Fitted(y, reg.estimates, reg.xs, lr.residualsVariance, DenseVector.ones[Double](y.length), binary = false)
      }
    }

    def apply(y: DenseVector[Double], x: DenseMatrix[Double], fit: Boolean, binary: Boolean): NullModel = {
      if (! fit) {
        Mutiple(y, x, binary)
      } else if (binary) {
        val reg = LogisticRegression(y, x)
        Fitted(y, reg.estimates, reg.xs, 1.0, reg.residualsVariance, binary)
      } else {
        val reg = LinearRegression(y, x)
        Fitted(y, reg.estimates, reg.xs, reg.residualsVariance, DenseVector.ones[Double](y.length), binary)
      }
    }

    def Fit(y: DenseVector[Double], x: DenseMatrix[Double], binary: Boolean): Fitted = {
      apply(y, x, fit = true, binary).asInstanceOf[Fitted]
    }

    def Fit(y: DenseVector[Double], binary: Boolean): Fitted = {
      val my = DenseVector.fill(y.length)(mean(y))
      val residuals = y - my
      val xs = DenseMatrix.ones[Double](y.length, 1)
      val invInfo = DenseMatrix.fill(1,1)(1.0/y.length)
      val a = if (binary) 1.0 else variance(residuals)
      val b = if (binary) my.map(e => e * (1 - e)) else DenseVector.ones[Double](y.length)
      Fitted(y, my, xs, a, b, binary)
    }
  }
}

Source File: ScoreTest.scala From seqspark with Apache License 2.0

5 votes

package org.dizhang.seqspark.stat

import breeze.linalg.{*, CSCMatrix, DenseMatrix, DenseVector, SparseVector}
import org.dizhang.seqspark.stat.HypoTest.NullModel.{Fitted => SNM}
import org.dizhang.seqspark.util.General._


object ScoreTest {

  def apply(nm: SNM, x: CSCMatrix[Double]): ScoreTest = {
    Sparse(nm, x)
  }

  def apply(nm: SNM, x: DenseMatrix[Double]): ScoreTest = {
    Dense(nm, x)
  }

  def apply(nm: SNM, x: DenseVector[Double]): ScoreTest = {
    Dense(nm, DenseVector.horzcat(x))
  }

  def apply(nm: SNM, x: SparseVector[Double]): ScoreTest = {
    Sparse(nm, SparseVector.horzcat(x))
  }

  def apply(nm: SNM,
            x1: DenseMatrix[Double],
            x2: CSCMatrix[Double]): ScoreTest = {
    Mixed(nm, x1, x2)
  }

  case class Sparse(nm: SNM,
                    x: CSCMatrix[Double]) extends ScoreTest {
    val score = (nm.residuals.toDenseMatrix * x).toDenseVector / nm.a
    lazy val variance = {
      val c = nm.xs
      val IccInv = nm.invInfo * nm.a
      val Igg = (colMultiply(x, nm.b).t * x).toDense
      val Icg = (c(::, *) *:* nm.b).t * x
      val Igc = Icg.t
      (Igg - Igc * IccInv * Icg) / nm.a
    }
  }

  case class Dense(nm: SNM,
                   x: DenseMatrix[Double]) extends ScoreTest {
    val score = x.t * nm.residuals / nm.a
    lazy val variance = {
      val c = nm.xs
      val IccInv = nm.invInfo * nm.a
      val Igg = (x(::, *) *:* nm.b).t * x
      val Icg = (c(::, *) *:* nm.b).t * x
      val Igc = Icg.t
      (Igg - Igc * IccInv * Icg)/nm.a
    }
  }

  case class Mixed(nm: SNM,
                   x1: DenseMatrix[Double],
                   x2: CSCMatrix[Double]) extends ScoreTest {
    private val dense = Dense(nm, x1)
    private val sparse = Sparse(nm, x2)
    val score = DenseVector.vertcat(dense.score, sparse.score)
    lazy val variance = {
      val v1 = dense.variance
      val v4 = sparse.variance
      val v2 = {
        val c = nm.xs
        val IccInv = nm.invInfo * nm.a
        val Igg = (x1(::, *) *:* nm.b).t * x2
        val Icg = (c(::, *) *:* nm.b).t * x2
        val Igc = x1.t * (c(::, *) *:* nm.b).t
        (Igg - Igc * IccInv * Icg) / nm.a
      }
      val v3 = v2.t
      val v12 = DenseMatrix.horzcat(v1, v2)
      val v34 = DenseMatrix.horzcat(v3, v4)
      DenseMatrix.vertcat(v12, v34)
    }
  }

  case class Mock(score: DenseVector[Double],
                  variance: DenseMatrix[Double]) extends ScoreTest
}

@SerialVersionUID(7778780001L)
sealed trait ScoreTest extends HypoTest {
  def score: DenseVector[Double]
  def variance: DenseMatrix[Double]
}

Source File: OneHotEncoder.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{*, Axis, DenseMatrix, Vector, convert, max}
import cats.syntax.option._
import io.picnicml.doddlemodel.data.Feature.FeatureIndex
import io.picnicml.doddlemodel.data.Features
import io.picnicml.doddlemodel.syntax.OptionSyntax._
import io.picnicml.doddlemodel.typeclasses.Transformer



case class OneHotEncoder private (private val numBinaryColumns: Option[Vector[Int]],
                                  private val featureIndex: FeatureIndex)

object OneHotEncoder {

  def apply(featureIndex: FeatureIndex): OneHotEncoder = OneHotEncoder(none, featureIndex)

  @SerialVersionUID(0L)
  implicit lazy val ev: Transformer[OneHotEncoder] = new Transformer[OneHotEncoder] {

    @inline override def isFitted(model: OneHotEncoder): Boolean = model.numBinaryColumns.isDefined

    override def fit(model: OneHotEncoder, x: Features): OneHotEncoder = {
      val numBinaryColumns = convert(max(x(::, model.featureIndex.categorical.columnIndices).apply(::, *)).t, Int) + 1
      model.copy(numBinaryColumns = numBinaryColumns.some)
    }

    override protected def transformSafe(model: OneHotEncoder, x: Features): Features = {
      val xTransformed = model.featureIndex.categorical.columnIndices.zipWithIndex.foldLeft(x) {
        case (xTransformedCurrent, (colIndex, statisticIndex)) =>
          appendEncodedColumns(xTransformedCurrent, colIndex, model.numBinaryColumns.getOrBreak(statisticIndex))
      }
      xTransformed.delete(model.featureIndex.categorical.columnIndices, Axis._1)
    }

    private def appendEncodedColumns(x: Features, columnIndex: Int, numEncodedColumns: Int): Features = {
      val encoded = DenseMatrix.zeros[Float](x.rows, numEncodedColumns)
      convert(x(::, columnIndex), Int).iterator.foreach { case (rowIndex, colIndex) =>
        // if value is larger than the maximum value encountered during training it is ignored,
        // i.e. no value is set in the binary encoded matrix
        if (colIndex < numEncodedColumns) encoded(rowIndex, colIndex) = 1.0f
      }
      DenseMatrix.horzcat(x, encoded)
    }
  }
}

Source File: StandardScaler.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{*, convert}
import breeze.stats.{mean, stddev}
import cats.syntax.option._
import io.picnicml.doddlemodel.data.Feature.FeatureIndex
import io.picnicml.doddlemodel.data.{Features, RealVector}
import io.picnicml.doddlemodel.syntax.OptionSyntax._
import io.picnicml.doddlemodel.typeclasses.Transformer


case class StandardScaler private (private val sampleMean: Option[RealVector],
                                   private val sampleStdDev: Option[RealVector],
                                   private val featureIndex: FeatureIndex)

object StandardScaler {

  def apply(featureIndex: FeatureIndex): StandardScaler = StandardScaler(none, none, featureIndex)

  @SerialVersionUID(0L)
  implicit lazy val ev: Transformer[StandardScaler] = new Transformer[StandardScaler] {

    override def isFitted(model: StandardScaler): Boolean =
      model.sampleMean.isDefined && model.sampleStdDev.isDefined

    override def fit(model: StandardScaler, x: Features): StandardScaler = {
      val xToPreprocess = x(::, model.featureIndex.numerical.columnIndices)
      val sampleStdDev = convert(stddev(xToPreprocess(::, *)).t.toDenseVector, Float)
      sampleStdDev(sampleStdDev :== 0.0f) := 1.0f
      model.copy(mean(xToPreprocess(::, *)).t.toDenseVector.some, sampleStdDev.some)
    }

    override protected def transformSafe(model: StandardScaler, x: Features): Features = {
      val xCopy = x.copy
      model.featureIndex.numerical.columnIndices.zipWithIndex.foreach { case (colIndex, statisticIndex) =>
        (0 until xCopy.rows).foreach { rowIndex =>
          xCopy(rowIndex, colIndex) = (xCopy(rowIndex, colIndex) - model.sampleMean.getOrBreak(statisticIndex)) /
            model.sampleStdDev.getOrBreak(statisticIndex)
        }
      }
      xCopy
    }
  }
}

Source File: StandardScalerTest.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{*, DenseMatrix, DenseVector, convert}
import breeze.stats.{mean, stddev}
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature}
import io.picnicml.doddlemodel.preprocessing.StandardScaler.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class StandardScalerTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-4f)

  "Standard scaler" should "preprocess the numerical features" in {
    val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
    val featureIndex = FeatureIndex(
      List(
        NumericalFeature,
        NumericalFeature,
        NumericalFeature,
        NumericalFeature,
        CategoricalFeature
      )
    )
    val scaler = StandardScaler(featureIndex)
    val trainedScaler = ev.fit(scaler, x)
    val xTransformed = ev.transform(trainedScaler, x)

    breezeEqual(mean(x(::, *)).t, DenseVector.zeros[Float](5)) shouldBe false
    breezeEqual(convert(stddev(x(::, *)).t, Float), DenseVector.ones[Float](5)) shouldBe false

    val expectedMeans = DenseVector.zeros[Float](5)
    expectedMeans(-1) = mean(x(::, -1))
    breezeEqual(mean(xTransformed(::, *)).t, expectedMeans) shouldBe true

    val expectedStdDevs = DenseVector.ones[Float](5)
    expectedStdDevs(-1) = stddev(x(::, -1)).toFloat
    breezeEqual(convert(stddev(xTransformed(::, *)).t, Float), expectedStdDevs) shouldBe true
  }

  it should "handle the zero variance case" in {
    val x = DenseMatrix.ones[Float](10, 5)
    val scaler = StandardScaler(FeatureIndex.numerical(5))
    val trainedScaler = ev.fit(scaler, x)
    val xTransformed = ev.transform(trainedScaler, x)

    xTransformed.forall(_.isNaN) shouldBe false
  }

  it should "preprocess a subset of numerical features" in {
    val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
    val scaler = StandardScaler(FeatureIndex.numerical(5).subset("f0", "f2", "f4"))
    val trainedScaler = ev.fit(scaler, x)
    val xTransformed = ev.transform(trainedScaler, x)

    breezeEqual(mean(x(::, *)).t, DenseVector.zeros[Float](5)) shouldBe false
    breezeEqual(convert(stddev(x(::, *)).t, Float), DenseVector.ones[Float](5)) shouldBe false

    assert(tolerance.areEqual(mean(xTransformed(::, 0)), 0.0f))
    assert(tolerance.areEqual(convert(stddev(xTransformed(::, 0)), Float), 1.0f))
    assert(!tolerance.areEqual(mean(xTransformed(::, 1)), 0.0f))
    assert(!tolerance.areEqual(convert(stddev(xTransformed(::, 1)), Float), 1.0f))
    assert(tolerance.areEqual(mean(xTransformed(::, 2)), 0.0f))
    assert(tolerance.areEqual(convert(stddev(xTransformed(::, 2)), Float), 1.0f))
    assert(!tolerance.areEqual(mean(xTransformed(::, 3)), 0.0f))
    assert(!tolerance.areEqual(convert(stddev(xTransformed(::, 3)), Float), 1.0f))
    assert(tolerance.areEqual(mean(xTransformed(::, 4)), 0.0f))
    assert(tolerance.areEqual(convert(stddev(xTransformed(::, 4)), Float), 1.0f))
  }
}

breeze.linalg.* Scala Examples