org.apache.spark.ml.param.shared.HasFeaturesCol Scala Examples
The following examples show how to use org.apache.spark.ml.param.shared.HasFeaturesCol.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DLEstimatorBase.scala From BigDL with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol} import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.rdd.RDD import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row} abstract class DLEstimatorBase[Learner <: DLEstimatorBase[Learner, M], M <: DLTransformerBase[M]] extends Estimator[M] with HasLabelCol { protected def internalFit(dataFrame: DataFrame): M override def fit(dataFrame: DataFrame): M = { transformSchema(dataFrame.schema, logging = true) internalFit(dataFrame) } override def copy(extra: ParamMap): Learner = defaultCopy(extra) }
Example 2
Source File: MultinomialLabeler.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.mleap.feature import ml.combust.mleap.core.feature.MultinomialLabelerModel import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.Transformer import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.mleap.param.{HasLabelsCol, HasProbabilitiesCol} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared.HasFeaturesCol import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.types._ import org.apache.spark.sql.functions.{udf, col} import ml.combust.mleap.core.util.VectorConverters._ class MultinomialLabeler(override val uid: String = Identifiable.randomUID("math_unary"), val model: MultinomialLabelerModel) extends Transformer with HasFeaturesCol with HasProbabilitiesCol with HasLabelsCol { def setFeaturesCol(value: String): this.type = set(featuresCol, value) def setProbabilitiesCol(value: String): this.type = set(probabilitiesCol, value) def setLabelsCol(value: String): this.type = set(labelsCol, value) @org.apache.spark.annotation.Since("2.0.0") override def transform(dataset: Dataset[_]): DataFrame = { val probabilitiesUdf = udf { (vector: Vector) => model.top(vector).map(_._1).toArray } val labelsUdf = udf { (vector: Vector) => model.topLabels(vector).toArray } dataset.withColumn($(probabilitiesCol), probabilitiesUdf(col($(featuresCol)))). withColumn($(labelsCol), labelsUdf(col($(featuresCol)))) } override def copy(extra: ParamMap): Transformer = copyValues(new MultinomialLabeler(uid, model), extra) @DeveloperApi override def transformSchema(schema: StructType): StructType = { require(schema($(featuresCol)).dataType.isInstanceOf[VectorUDT], s"Features column must be of type NumericType but got ${schema($(featuresCol)).dataType}") val inputFields = schema.fields require(!inputFields.exists(_.name == $(probabilitiesCol)), s"Output column ${$(probabilitiesCol)} already exists.") require(!inputFields.exists(_.name == $(labelsCol)), s"Output column ${$(labelsCol)} already exists.") StructType(schema.fields ++ Seq(StructField($(probabilitiesCol), ArrayType(DoubleType)), StructField($(labelsCol), ArrayType(StringType)))) } }
Example 3
Source File: XGBoostUtils.scala From pravda-ml with Apache License 2.0 | 5 votes |
package ml.dmlc.xgboost4j.scala.spark import ml.dmlc.xgboost4j.scala.Booster import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param.{BooleanParam, Params} import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasPredictionCol} import org.apache.spark.sql.{Dataset, functions} object XGBoostUtils { def getBooster(x: XGBoostClassificationModel): Booster = x._booster def getBooster(x: XGBoostRegressionModel): Booster = x._booster } trait OkXGBoostParams extends HasFeaturesCol with HasPredictionCol { this: Params => val densifyInput = new BooleanParam(this, "densifyInput", "In order to fix the difference between spark abd xgboost sparsity treatment") val predictAsDouble = new BooleanParam(this, "predictAsDouble", "Whenver to cast XGBoost prediction to double matching common behavior for other predictors.") val addRawTrees = new BooleanParam(this, "addRawTrees", "Whenever to add raw trees block to model summary.") val addSignificance = new BooleanParam(this, "addSignificance", "Whenever to add feature significance block to model summary.") def setAddSignificance(value: Boolean): this.type = set(addSignificance, value) def setAddRawTrees(value: Boolean): this.type = set(addRawTrees, value) def setDensifyInput(value: Boolean): this.type = set(densifyInput, value) def setPredictAsDouble(value: Boolean): this.type = set(predictAsDouble, value) protected def densifyIfNeeded(dataset: Dataset[_]) : Dataset[_] = { if ($(densifyInput)) { val densify = functions.udf((x: Vector) => x.toDense) val col = getFeaturesCol val metadata = dataset.schema(col).metadata dataset.withColumn( col, densify(dataset(col)).as(col, metadata)) } else { dataset } } } trait OkXGBoostClassifierParams extends XGBoostClassifierParams with OkXGBoostParams trait OkXGBoostRegressorParams extends XGBoostRegressorParams with OkXGBoostParams