org.apache.spark.ml.feature.IDFModel Scala Examples

The following examples show how to use org.apache.spark.ml.feature.IDFModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalIDF.scala    From spark-ml-serving   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving.TypedTransformerConverter
import io.hydrosphere.spark_ml_serving.common.utils.DataUtils._
import io.hydrosphere.spark_ml_serving.common._
import io.hydrosphere.spark_ml_serving.common.utils.DataUtils
import org.apache.spark.ml.feature.IDFModel
import org.apache.spark.mllib.feature.{IDFModel => OldIDFModel}
import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}

class LocalIDF(override val sparkTransformer: IDFModel) extends LocalTransformer[IDFModel] {
  override def transform(localData: LocalData): LocalData = {
    val idf = sparkTransformer.idf

    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val newData = column.data.mapToMlLibVectors.map { vector =>
          val n         = vector.size
          val values    = vector.values
          val newValues = new Array[Double](n)
          var j         = 0
          while (j < n) {
            newValues(j) = values(j) * idf(j)
            j += 1
          }
          newValues.toList
        }
        localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))

      case None => localData
    }
  }
}

object LocalIDF extends SimpleModelLoader[IDFModel] with TypedTransformerConverter[IDFModel] {

  override def build(metadata: Metadata, data: LocalData): IDFModel = {
    val idfParams = data
      .column("idf")
      .get
      .data
      .head
      .asInstanceOf[Map[String, Any]]

    val idfVector            = OldVectors.fromML(DataUtils.constructVector(idfParams))
    val oldIDFconstructor = classOf[OldIDFModel].getDeclaredConstructor(classOf[OldVector])

    oldIDFconstructor.setAccessible(true)

    val oldIDF = oldIDFconstructor.newInstance(idfVector)
    val const  = classOf[IDFModel].getDeclaredConstructor(classOf[String], classOf[OldIDFModel])
    val idf    = const.newInstance(metadata.uid, oldIDF)
    idf
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
      .set(idf.minDocFreq, metadata.paramMap("minDocFreq").asInstanceOf[Number].intValue())
  }

  override implicit def toLocal(transformer: IDFModel) =
    new LocalIDF(transformer)
} 
Example 2
Source File: IDFOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.bundle.ops.feature

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.{OpModel, OpNode}
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.feature.IDFModel
import org.apache.spark.ml.param.Param
import org.apache.spark.mllib.feature
import org.apache.spark.mllib.linalg.Vectors


class IDFOp extends SimpleSparkOp[IDFModel] {
  override val Model: OpModel[SparkBundleContext, IDFModel] = new OpModel[SparkBundleContext, IDFModel] {
    override val klazz: Class[IDFModel] = classOf[IDFModel]

    override def opName: String = Bundle.BuiltinOps.feature.idf

    override def store(model: Model, obj: IDFModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      model.withValue("idf", Value.vector(obj.idf.toArray))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): IDFModel = {
      val idfModel = new feature.IDFModel(Vectors.dense(model.value("idf").getTensor[Double].toArray))
      new IDFModel(uid = "", idfModel = idfModel)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: IDFModel): IDFModel = {
    new IDFModel(uid = uid, idfModel = new feature.IDFModel(Vectors.dense(model.idf.toArray)))
  }

  override def sparkInputs(obj: IDFModel): Seq[ParamSpec] = {
    Seq("input" -> obj.inputCol)
  }

  override def sparkOutputs(obj: IDFModel): Seq[SimpleParamSpec] = {
    Seq("output" -> obj.outputCol)
  }
} 
Example 3
Source File: IDF.scala    From aardpfark   with Apache License 2.0 5 votes vote down vote up
package com.ibm.aardpfark.spark.ml.feature

import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument}
import com.ibm.aardpfark.pfa.expression._
import com.ibm.aardpfark.pfa.types.WithSchema
import com.ibm.aardpfark.spark.ml.PFAModel
import com.sksamuel.avro4s.{AvroNamespace, AvroSchema}
import org.apache.avro.{Schema, SchemaBuilder}

import org.apache.spark.ml.feature.IDFModel

@AvroNamespace("com.ibm.aardpfark.exec.spark.ml.feature")
case class IDFData(idf: Seq[Double]) extends WithSchema {
  override def schema: Schema = AvroSchema[this.type]
}




  override def action: PFAExpression = {
    NewRecord(outputSchema, Map(outputCol -> a.zipmap(inputExpr, idfRef, multFn.ref)))
  }

  override def pfa: PFADocument = {
    PFABuilder()
      .withName(sparkTransformer.uid)
      .withMetadata(getMetadata)
      .withInput(inputSchema)
      .withOutput(outputSchema)
      .withCell(modelCell)
      .withFunction(multFn)
      .withAction(action)
      .pfa
  }

}