org.apache.spark.ml.feature.IDFModel Scala Examples
The following examples show how to use org.apache.spark.ml.feature.IDFModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalIDF.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.preprocessors import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common.utils.DataUtils._ import io.hydrosphere.spark_ml_serving.common._ import io.hydrosphere.spark_ml_serving.common.utils.DataUtils import org.apache.spark.ml.feature.IDFModel import org.apache.spark.mllib.feature.{IDFModel => OldIDFModel} import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors} class LocalIDF(override val sparkTransformer: IDFModel) extends LocalTransformer[IDFModel] { override def transform(localData: LocalData): LocalData = { val idf = sparkTransformer.idf localData.column(sparkTransformer.getInputCol) match { case Some(column) => val newData = column.data.mapToMlLibVectors.map { vector => val n = vector.size val values = vector.values val newValues = new Array[Double](n) var j = 0 while (j < n) { newValues(j) = values(j) * idf(j) j += 1 } newValues.toList } localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData)) case None => localData } } } object LocalIDF extends SimpleModelLoader[IDFModel] with TypedTransformerConverter[IDFModel] { override def build(metadata: Metadata, data: LocalData): IDFModel = { val idfParams = data .column("idf") .get .data .head .asInstanceOf[Map[String, Any]] val idfVector = OldVectors.fromML(DataUtils.constructVector(idfParams)) val oldIDFconstructor = classOf[OldIDFModel].getDeclaredConstructor(classOf[OldVector]) oldIDFconstructor.setAccessible(true) val oldIDF = oldIDFconstructor.newInstance(idfVector) val const = classOf[IDFModel].getDeclaredConstructor(classOf[String], classOf[OldIDFModel]) val idf = const.newInstance(metadata.uid, oldIDF) idf .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String]) .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String]) .set(idf.minDocFreq, metadata.paramMap("minDocFreq").asInstanceOf[Number].intValue()) } override implicit def toLocal(transformer: IDFModel) = new LocalIDF(transformer) }
Example 2
Source File: IDFOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.feature import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.{OpModel, OpNode} import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.feature.IDFModel import org.apache.spark.ml.param.Param import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.Vectors class IDFOp extends SimpleSparkOp[IDFModel] { override val Model: OpModel[SparkBundleContext, IDFModel] = new OpModel[SparkBundleContext, IDFModel] { override val klazz: Class[IDFModel] = classOf[IDFModel] override def opName: String = Bundle.BuiltinOps.feature.idf override def store(model: Model, obj: IDFModel) (implicit context: BundleContext[SparkBundleContext]): Model = { model.withValue("idf", Value.vector(obj.idf.toArray)) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): IDFModel = { val idfModel = new feature.IDFModel(Vectors.dense(model.value("idf").getTensor[Double].toArray)) new IDFModel(uid = "", idfModel = idfModel) } } override def sparkLoad(uid: String, shape: NodeShape, model: IDFModel): IDFModel = { new IDFModel(uid = uid, idfModel = new feature.IDFModel(Vectors.dense(model.idf.toArray))) } override def sparkInputs(obj: IDFModel): Seq[ParamSpec] = { Seq("input" -> obj.inputCol) } override def sparkOutputs(obj: IDFModel): Seq[SimpleParamSpec] = { Seq("output" -> obj.outputCol) } }
Example 3
package com.ibm.aardpfark.spark.ml.feature import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.expression._ import com.ibm.aardpfark.pfa.types.WithSchema import com.ibm.aardpfark.spark.ml.PFAModel import com.sksamuel.avro4s.{AvroNamespace, AvroSchema} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.spark.ml.feature.IDFModel @AvroNamespace("com.ibm.aardpfark.exec.spark.ml.feature") case class IDFData(idf: Seq[Double]) extends WithSchema { override def schema: Schema = AvroSchema[this.type] } override def action: PFAExpression = { NewRecord(outputSchema, Map(outputCol -> a.zipmap(inputExpr, idfRef, multFn.ref))) } override def pfa: PFADocument = { PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withCell(modelCell) .withFunction(multFn) .withAction(action) .pfa } }