scala.annotation.varargs Scala Examples
The following examples show how to use scala.annotation.varargs.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: Transformer.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.Logging import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ protected def validateInputType(inputType: DataType): Unit = {} override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType validateInputType(inputType) if (schema.fieldNames.contains($(outputCol))) { throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } override def transform(dataset: DataFrame): DataFrame = { transformSchema(dataset.schema, logging = true) dataset.withColumn($(outputCol), callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol)))) } override def copy(extra: ParamMap): T = defaultCopy(extra) }
Example 2
Source File: CompiledSemanticsCsvPlugin.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.semantics.compiled.plugin.csv import com.eharmony.aloha.semantics.compiled.{RequiredAccessorCode, OptionalAccessorCode, VariableAccessorCode, CompiledSemanticsPlugin} import com.eharmony.aloha.reflect.RefInfo import scala.annotation.varargs def accessorFunctionCode(spec: String): Either[Seq[String], VariableAccessorCode] = { val code = colNamesToTypes.get(spec).map { case t if t.isRequired => Right(RequiredAccessorCode(Seq(s"""(_:$inputTypeString).${t.toString()}("${escape(spec)}")"""))) case t => Right(OptionalAccessorCode(Seq(s"""(_:$inputTypeString).${t.toString()}("${escape(spec)}")"""))) }.getOrElse { Left(Seq[String](s"Couldn't produce code for specification: '$spec'.")) } code } private[this] def escape(s: String) = s.replace("\\", "\\\\").replace("\"", "\\\"") } object CompiledSemanticsCsvPlugin { @varargs def apply(colNamesToTypes: (String, CsvTypes.CsvType)*): CompiledSemanticsCsvPlugin = CompiledSemanticsCsvPlugin(colNamesToTypes.toMap) }
Example 3
Source File: ParamGridBuilder.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml.tuning import com.tencent.angel.sona.ml.param.{BooleanParam, DoubleParam, FloatParam, IntParam, LongParam, Param, ParamMap, ParamPair} import scala.annotation.varargs import scala.collection.mutable /** * Builder for a param grid used in grid search-based model selection. */ class ParamGridBuilder { private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]] /** * Sets the given parameters in this grid to fixed values. */ def baseOn(paramMap: ParamMap): this.type = { baseOn(paramMap.toSeq: _*) this } /** * Sets the given parameters in this grid to fixed values. */ @varargs def baseOn(paramPairs: ParamPair[_]*): this.type = { paramPairs.foreach { p => addGrid(p.param.asInstanceOf[Param[Any]], Seq(p.value)) } this } /** * Adds a param with multiple values (overwrites if the input param exists). */ def addGrid[T](param: Param[T], values: Iterable[T]): this.type = { paramGrid.put(param, values) this } // specialized versions of addGrid for Java. /** * Adds a double param with multiple values. */ def addGrid(param: DoubleParam, values: Array[Double]): this.type = { addGrid[Double](param, values) } /** * Adds an int param with multiple values. */ def addGrid(param: IntParam, values: Array[Int]): this.type = { addGrid[Int](param, values) } /** * Adds a float param with multiple values. */ def addGrid(param: FloatParam, values: Array[Float]): this.type = { addGrid[Float](param, values) } /** * Adds a long param with multiple values. */ def addGrid(param: LongParam, values: Array[Long]): this.type = { addGrid[Long](param, values) } /** * Adds a boolean param with true and false. */ def addGrid(param: BooleanParam): this.type = { addGrid[Boolean](param, Array(true, false)) } /** * Builds and returns all combinations of parameters specified by the param grid. */ def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 4
Source File: Estimator.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml import com.tencent.angel.sona.ml.param.{ParamMap, ParamPair} import scala.annotation.varargs import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.sql.Dataset /** * :: DeveloperApi :: * Abstract class for estimators that fit models to data. */ @DeveloperApi abstract class Estimator[M <: Model[M]] extends PipelineStage { /** * Fits a single model to the input data with optional parameters. * * @param dataset input dataset * @param firstParamPair the first param pair, overrides embedded params * @param otherParamPairs other param pairs. These values override any specified in this * Estimator's embedded ParamMap. * @return fitted model */ @varargs def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): M = { val map = new ParamMap() .put(firstParamPair) .put(otherParamPairs: _*) fit(dataset, map) } /** * Fits a single model to the input data with provided parameter map. * * @param dataset input dataset * @param paramMap Parameter map. * These values override any specified in this Estimator's embedded ParamMap. * @return fitted model */ def fit(dataset: Dataset[_], paramMap: ParamMap): M = { copy(paramMap).fit(dataset) } /** * Fits a model to the input data. */ def fit(dataset: Dataset[_]): M /** * Fits multiple models to the input data with multiple sets of parameters. * The default implementation uses a for loop on each parameter map. * Subclasses could override this to optimize multi-model training. * * @param dataset input dataset * @param paramMaps An array of parameter maps. * These values override any specified in this Estimator's embedded ParamMap. * @return fitted models, matching the input parameter maps */ def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[M] = { paramMaps.map(fit(dataset, _)) } override def copy(extra: ParamMap): Estimator[M] }
Example 5
Source File: StreamletShape.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.streamlets import scala.collection.immutable import scala.annotation.varargs trait StreamletShape { def inlets: immutable.IndexedSeq[Inlet] def outlets: immutable.IndexedSeq[Outlet] @varargs def withInlets(inlet: Inlet, inlets: Inlet*): StreamletShape @varargs def withOutlets(outlet: Outlet, outlets: Outlet*): StreamletShape } private[streamlets] final case class StreamletShapeImpl( inlets: immutable.IndexedSeq[Inlet], outlets: immutable.IndexedSeq[Outlet] ) extends StreamletShape { @varargs def withInlets(inlet: Inlet, inlets: Inlet*) = copy(inlets = inlet +: inlets.toIndexedSeq) @varargs def withOutlets(outlet: Outlet, outlets: Outlet*) = copy(outlets = outlet +: outlets.toIndexedSeq) } object StreamletShape { def apply(inlet: Inlet): StreamletShape = StreamletShapeImpl(immutable.IndexedSeq(inlet), immutable.IndexedSeq()) def apply(outlet: Outlet): StreamletShape = StreamletShapeImpl(immutable.IndexedSeq(), immutable.IndexedSeq(outlet)) def apply(inlet: Inlet, outlet: Outlet): StreamletShape = StreamletShapeImpl(immutable.IndexedSeq(inlet), immutable.IndexedSeq(outlet)) @varargs def withInlets(inlet: Inlet, inlets: Inlet*): StreamletShapeImpl = StreamletShapeImpl(inlet +: inlets.toIndexedSeq, immutable.IndexedSeq()) @varargs def withOutlets(outlet: Outlet, outlets: Outlet*): StreamletShapeImpl = StreamletShapeImpl(immutable.IndexedSeq(), outlet +: outlets.toIndexedSeq) // Java API @varargs def createWithInlets(inlet: Inlet, inlets: Inlet*): StreamletShapeImpl = withInlets(inlet, inlets: _*) // Java API @varargs def createWithOutlets(outlet: Outlet, outlets: Outlet*): StreamletShapeImpl = withOutlets(outlet, outlets: _*) }
Example 6
Source File: InfinispanJavaRDD.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.rdd import org.apache.spark.api.java.{JavaPairRDD, JavaSparkContext} import org.infinispan.query.dsl.Query import org.infinispan.spark._ import org.infinispan.spark.config.ConnectorConfiguration import scala.annotation.varargs import scala.reflect.ClassTag object InfinispanJavaRDD { def createInfinispanRDD[K, V](jsc: JavaSparkContext, config: ConnectorConfiguration): InfinispanJavaRDD[K, V] = { createInfinispanRDD(jsc.sc, config, new PerServerSplitter) } def createInfinispanRDD[K, V](jsc: JavaSparkContext, config: ConnectorConfiguration, splitter: Splitter): InfinispanJavaRDD[K, V] = { val infinispanRDD = new InfinispanRDD[K, V](jsc.sc, config, splitter) implicit val keyClassTag = ClassTag.AnyRef.asInstanceOf[ClassTag[K]] implicit val valueClassTag = ClassTag.AnyRef.asInstanceOf[ClassTag[V]] new InfinispanJavaRDD[K, V](infinispanRDD) } def write[K, V](pairRDD: JavaPairRDD[K, V], config: ConnectorConfiguration) = pairRDD.rdd.writeToInfinispan(config) } class InfinispanJavaRDD[K, V](rdd: InfinispanRDD[K, V]) (implicit override val kClassTag: ClassTag[K], implicit override val vClassTag: ClassTag[V]) extends JavaPairRDD[K, V](rdd) with CacheManagementAware { def filterByQuery[R](q: Query): JavaPairRDD[K, R] = { val filteredRDD = rdd.filterByQuery[R](q) implicit val converted = ClassTag.AnyRef.asInstanceOf[ClassTag[R]] JavaPairRDD.fromRDD[K, R](filteredRDD) } def filterByQuery[R](q: String): JavaPairRDD[K, R] = { val filteredRDD = rdd.filterByQuery[R](q) implicit val converted = ClassTag.AnyRef.asInstanceOf[ClassTag[R]] JavaPairRDD.fromRDD[K, R](filteredRDD) } @varargs def filterByCustom[R](filterFactory: String, params: AnyRef*): JavaPairRDD[K, R] = { val filteredRDD = rdd.filterByCustom[R](filterFactory, params: _*) implicit val converted = ClassTag.AnyRef.asInstanceOf[ClassTag[R]] JavaPairRDD.fromRDD[K, R](filteredRDD) } override def count() = rdd.count() override def cacheAdmin(): CacheAdmin = rdd.cacheAdmin() }
Example 7
Source File: Transformer.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.Logging import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ protected def validateInputType(inputType: DataType): Unit = {} override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType validateInputType(inputType) if (schema.fieldNames.contains($(outputCol))) { throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } override def transform(dataset: DataFrame): DataFrame = { transformSchema(dataset.schema, logging = true) dataset.withColumn($(outputCol), callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol)))) } override def copy(extra: ParamMap): T = defaultCopy(extra) }
Example 8
Source File: ParamGridBuilder.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.param._ @Since("1.2.0") def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 9
Source File: Transformer.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ protected def validateInputType(inputType: DataType): Unit = {} override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType validateInputType(inputType) if (schema.fieldNames.contains($(outputCol))) { throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) val transformUDF = udf(this.createTransformFunc, outputDataType) dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol)))) } override def copy(extra: ParamMap): T = defaultCopy(extra) }
Example 10
Source File: ParamGridBuilder.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ @Since("1.2.0") def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 11
Source File: ServiceHandler.scala From akka-grpc with Apache License 2.0 | 5 votes |
package akka.grpc.javadsl import java.util.concurrent.{ CompletableFuture, CompletionStage } import akka.annotation.ApiMayChange import akka.annotation.InternalApi import akka.grpc.scaladsl.{ ServiceHandler => sServiceHandler } import akka.http.javadsl.model.{ HttpRequest, HttpResponse, StatusCodes } // using japi because bindAndHandleAsync expects that import akka.japi.{ Function => JFunction } import scala.annotation.varargs @ApiMayChange object ServiceHandler { @varargs def handler(handlers: JFunction[HttpRequest, CompletionStage[HttpResponse]]*) : JFunction[HttpRequest, CompletionStage[HttpResponse]] = { val servicesHandler = concat(handlers: _*) (req: HttpRequest) => if (sServiceHandler.isGrpcRequest(req)) servicesHandler(req) else unsupportedMediaType } private[javadsl] def concat(handlers: JFunction[HttpRequest, CompletionStage[HttpResponse]]*) : JFunction[HttpRequest, CompletionStage[HttpResponse]] = (req: HttpRequest) => handlers.foldLeft(notFound) { (comp, next) => comp.thenCompose(res => if (res.status == StatusCodes.NOT_FOUND) next.apply(req) else comp) } }
Example 12
Source File: ParamGridBuilder.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ @Since("1.2.0") def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 13
Source File: ParamGridBuilder.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.Experimental import org.apache.spark.ml.param._ def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 14
Source File: Transformer.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.Logging import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ protected def validateInputType(inputType: DataType): Unit = {} override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType validateInputType(inputType) if (schema.fieldNames.contains($(outputCol))) { throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } override def transform(dataset: DataFrame): DataFrame = { transformSchema(dataset.schema, logging = true) dataset.withColumn($(outputCol), callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol)))) } override def copy(extra: ParamMap): T = defaultCopy(extra) }
Example 15
Source File: ParamGridBuilder.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.Experimental import org.apache.spark.ml.param._ def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 16
Source File: Transformer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ protected def validateInputType(inputType: DataType): Unit = {} override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType validateInputType(inputType) if (schema.fieldNames.contains($(outputCol))) { throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) val transformUDF = udf(this.createTransformFunc, outputDataType) dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol)))) } override def copy(extra: ParamMap): T = defaultCopy(extra) }
Example 17
Source File: ParamGridBuilder.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ @Since("1.2.0") def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 18
Source File: Utils.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package com.github.gtache.lsp.utils import java.util import java.util.ResourceBundle import com.intellij.openapi.diagnostic.Logger import scala.annotation.varargs @varargs def concatenateArrays(arr: Array[Any]*): Array[Any] = { arr.flatten.toArray } def stringToList(str: String, sep: String = lineSeparator): util.List[String] = { import scala.collection.JavaConverters._ str.split(sep).toIndexedSeq.asJava } def parseArgs(strArr: Array[String]): Array[String] = { val mutableBuffer: scala.collection.mutable.Buffer[String] = scala.collection.mutable.Buffer() var isSingleQuote = false var isDoubleQuote = false var wasEscaped = false val curStr = StringBuilder.newBuilder strArr.foreach(str => { for (i <- Range(0, str.length)) { str(i) match { case '\'' => if (!wasEscaped) { isSingleQuote = !isSingleQuote } wasEscaped = false curStr.append('\'') case '\"' => if (!wasEscaped) { isDoubleQuote = !isDoubleQuote } wasEscaped = false curStr.append('\"') case ' ' => if (isSingleQuote || isDoubleQuote) { curStr.append(" ") } else { mutableBuffer.append(curStr.toString()) curStr.clear() } wasEscaped = false case '\\' => if (wasEscaped) { wasEscaped = false } else { wasEscaped = true } curStr.append('\\') case c => curStr.append(c) wasEscaped = false } } if (curStr.nonEmpty) { mutableBuffer.append(curStr.toString()) curStr.clear() } }) mutableBuffer.toArray } }
Example 19
Source File: Transformer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ protected def validateInputType(inputType: DataType): Unit = {} override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType validateInputType(inputType) if (schema.fieldNames.contains($(outputCol))) { throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) val transformUDF = udf(this.createTransformFunc, outputDataType) dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol)))) } override def copy(extra: ParamMap): T = defaultCopy(extra) }
Example 20
Source File: ParamGridBuilder.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ @Since("1.2.0") def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 21
Source File: Transformer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ protected def validateInputType(inputType: DataType): Unit = {} override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType validateInputType(inputType) if (schema.fieldNames.contains($(outputCol))) { throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) val transformUDF = udf(this.createTransformFunc, outputDataType) dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol)))) } override def copy(extra: ParamMap): T = defaultCopy(extra) }