scala.annotation.varargs Scala Examples

The following examples show how to use scala.annotation.varargs. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: Transformer.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import scala.annotation.varargs

import org.apache.spark.Logging
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._


  protected def validateInputType(inputType: DataType): Unit = {}

  override def transformSchema(schema: StructType): StructType = {
    val inputType = schema($(inputCol)).dataType
    validateInputType(inputType)
    if (schema.fieldNames.contains($(outputCol))) {
      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
    }
    val outputFields = schema.fields :+
      StructField($(outputCol), outputDataType, nullable = false)
    StructType(outputFields)
  }

  override def transform(dataset: DataFrame): DataFrame = {
    transformSchema(dataset.schema, logging = true)
    dataset.withColumn($(outputCol),
      callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol))))
  }

  override def copy(extra: ParamMap): T = defaultCopy(extra)
} 
Example 2
Source File: CompiledSemanticsCsvPlugin.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.semantics.compiled.plugin.csv

import com.eharmony.aloha.semantics.compiled.{RequiredAccessorCode, OptionalAccessorCode, VariableAccessorCode, CompiledSemanticsPlugin}
import com.eharmony.aloha.reflect.RefInfo

import scala.annotation.varargs


    def accessorFunctionCode(spec: String): Either[Seq[String], VariableAccessorCode] = {
        val code = colNamesToTypes.get(spec).map {
            case t if t.isRequired => Right(RequiredAccessorCode(Seq(s"""(_:$inputTypeString).${t.toString()}("${escape(spec)}")""")))
            case t => Right(OptionalAccessorCode(Seq(s"""(_:$inputTypeString).${t.toString()}("${escape(spec)}")""")))
        }.getOrElse {
            Left(Seq[String](s"Couldn't produce code for specification: '$spec'."))
        }

        code
    }

    private[this] def escape(s: String) = s.replace("\\", "\\\\").replace("\"", "\\\"")
}

object CompiledSemanticsCsvPlugin {
    @varargs def apply(colNamesToTypes: (String, CsvTypes.CsvType)*): CompiledSemanticsCsvPlugin =
        CompiledSemanticsCsvPlugin(colNamesToTypes.toMap)
} 
Example 3
Source File: ParamGridBuilder.scala    From sona   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.sona.ml.tuning

import com.tencent.angel.sona.ml.param.{BooleanParam, DoubleParam, FloatParam, IntParam, LongParam, Param, ParamMap, ParamPair}

import scala.annotation.varargs
import scala.collection.mutable

/**
  * Builder for a param grid used in grid search-based model selection.
  */
class ParamGridBuilder {

  private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]

  /**
    * Sets the given parameters in this grid to fixed values.
    */

  def baseOn(paramMap: ParamMap): this.type = {
    baseOn(paramMap.toSeq: _*)
    this
  }

  /**
    * Sets the given parameters in this grid to fixed values.
    */

  @varargs
  def baseOn(paramPairs: ParamPair[_]*): this.type = {
    paramPairs.foreach { p =>
      addGrid(p.param.asInstanceOf[Param[Any]], Seq(p.value))
    }
    this
  }

  /**
    * Adds a param with multiple values (overwrites if the input param exists).
    */

  def addGrid[T](param: Param[T], values: Iterable[T]): this.type = {
    paramGrid.put(param, values)
    this
  }

  // specialized versions of addGrid for Java.

  /**
    * Adds a double param with multiple values.
    */

  def addGrid(param: DoubleParam, values: Array[Double]): this.type = {
    addGrid[Double](param, values)
  }

  /**
    * Adds an int param with multiple values.
    */

  def addGrid(param: IntParam, values: Array[Int]): this.type = {
    addGrid[Int](param, values)
  }

  /**
    * Adds a float param with multiple values.
    */

  def addGrid(param: FloatParam, values: Array[Float]): this.type = {
    addGrid[Float](param, values)
  }

  /**
    * Adds a long param with multiple values.
    */

  def addGrid(param: LongParam, values: Array[Long]): this.type = {
    addGrid[Long](param, values)
  }

  /**
    * Adds a boolean param with true and false.
    */

  def addGrid(param: BooleanParam): this.type = {
    addGrid[Boolean](param, Array(true, false))
  }

  /**
    * Builds and returns all combinations of parameters specified by the param grid.
    */

  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 4
Source File: Estimator.scala    From sona   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.sona.ml

import com.tencent.angel.sona.ml.param.{ParamMap, ParamPair}
import scala.annotation.varargs
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.sql.Dataset

/**
 * :: DeveloperApi ::
 * Abstract class for estimators that fit models to data.
 */
@DeveloperApi
abstract class Estimator[M <: Model[M]] extends PipelineStage {

  /**
   * Fits a single model to the input data with optional parameters.
   *
   * @param dataset input dataset
   * @param firstParamPair the first param pair, overrides embedded params
   * @param otherParamPairs other param pairs.  These values override any specified in this
   *                        Estimator's embedded ParamMap.
   * @return fitted model
   */
  @varargs
  def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): M = {
    val map = new ParamMap()
      .put(firstParamPair)
      .put(otherParamPairs: _*)
    fit(dataset, map)
  }

  /**
   * Fits a single model to the input data with provided parameter map.
   *
   * @param dataset input dataset
   * @param paramMap Parameter map.
   *                 These values override any specified in this Estimator's embedded ParamMap.
   * @return fitted model
   */
  def fit(dataset: Dataset[_], paramMap: ParamMap): M = {
    copy(paramMap).fit(dataset)
  }

  /**
   * Fits a model to the input data.
   */
  def fit(dataset: Dataset[_]): M

  /**
   * Fits multiple models to the input data with multiple sets of parameters.
   * The default implementation uses a for loop on each parameter map.
   * Subclasses could override this to optimize multi-model training.
   *
   * @param dataset input dataset
   * @param paramMaps An array of parameter maps.
   *                  These values override any specified in this Estimator's embedded ParamMap.
   * @return fitted models, matching the input parameter maps
   */
  def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[M] = {
    paramMaps.map(fit(dataset, _))
  }

  override def copy(extra: ParamMap): Estimator[M]
} 
Example 5
Source File: StreamletShape.scala    From cloudflow   with Apache License 2.0 5 votes vote down vote up
package cloudflow.streamlets

import scala.collection.immutable
import scala.annotation.varargs

trait StreamletShape {
  def inlets: immutable.IndexedSeq[Inlet]
  def outlets: immutable.IndexedSeq[Outlet]

  @varargs
  def withInlets(inlet: Inlet, inlets: Inlet*): StreamletShape

  @varargs
  def withOutlets(outlet: Outlet, outlets: Outlet*): StreamletShape
}

private[streamlets] final case class StreamletShapeImpl(
    inlets: immutable.IndexedSeq[Inlet],
    outlets: immutable.IndexedSeq[Outlet]
) extends StreamletShape {

  @varargs
  def withInlets(inlet: Inlet, inlets: Inlet*) = copy(inlets = inlet +: inlets.toIndexedSeq)

  @varargs
  def withOutlets(outlet: Outlet, outlets: Outlet*) = copy(outlets = outlet +: outlets.toIndexedSeq)
}

object StreamletShape {
  def apply(inlet: Inlet): StreamletShape =
    StreamletShapeImpl(immutable.IndexedSeq(inlet), immutable.IndexedSeq())
  def apply(outlet: Outlet): StreamletShape =
    StreamletShapeImpl(immutable.IndexedSeq(), immutable.IndexedSeq(outlet))
  def apply(inlet: Inlet, outlet: Outlet): StreamletShape =
    StreamletShapeImpl(immutable.IndexedSeq(inlet), immutable.IndexedSeq(outlet))

  @varargs
  def withInlets(inlet: Inlet, inlets: Inlet*): StreamletShapeImpl =
    StreamletShapeImpl(inlet +: inlets.toIndexedSeq, immutable.IndexedSeq())

  @varargs
  def withOutlets(outlet: Outlet, outlets: Outlet*): StreamletShapeImpl =
    StreamletShapeImpl(immutable.IndexedSeq(), outlet +: outlets.toIndexedSeq)

  // Java API
  @varargs
  def createWithInlets(inlet: Inlet, inlets: Inlet*): StreamletShapeImpl =
    withInlets(inlet, inlets: _*)

  // Java API
  @varargs
  def createWithOutlets(outlet: Outlet, outlets: Outlet*): StreamletShapeImpl =
    withOutlets(outlet, outlets: _*)
} 
Example 6
Source File: InfinispanJavaRDD.scala    From infinispan-spark   with Apache License 2.0 5 votes vote down vote up
package org.infinispan.spark.rdd

import org.apache.spark.api.java.{JavaPairRDD, JavaSparkContext}
import org.infinispan.query.dsl.Query
import org.infinispan.spark._
import org.infinispan.spark.config.ConnectorConfiguration

import scala.annotation.varargs
import scala.reflect.ClassTag


object InfinispanJavaRDD {

   def createInfinispanRDD[K, V](jsc: JavaSparkContext, config: ConnectorConfiguration): InfinispanJavaRDD[K, V] = {
      createInfinispanRDD(jsc.sc, config, new PerServerSplitter)
   }

   def createInfinispanRDD[K, V](jsc: JavaSparkContext, config: ConnectorConfiguration, splitter: Splitter): InfinispanJavaRDD[K, V] = {
      val infinispanRDD = new InfinispanRDD[K, V](jsc.sc, config, splitter)
      implicit val keyClassTag = ClassTag.AnyRef.asInstanceOf[ClassTag[K]]
      implicit val valueClassTag = ClassTag.AnyRef.asInstanceOf[ClassTag[V]]
      new InfinispanJavaRDD[K, V](infinispanRDD)
   }

   def write[K, V](pairRDD: JavaPairRDD[K, V], config: ConnectorConfiguration) = pairRDD.rdd.writeToInfinispan(config)
}

class InfinispanJavaRDD[K, V](rdd: InfinispanRDD[K, V])
                             (implicit override val kClassTag: ClassTag[K], implicit override val vClassTag: ClassTag[V])
  extends JavaPairRDD[K, V](rdd) with CacheManagementAware {

   def filterByQuery[R](q: Query): JavaPairRDD[K, R] = {
     val filteredRDD = rdd.filterByQuery[R](q)
     implicit val converted = ClassTag.AnyRef.asInstanceOf[ClassTag[R]]
     JavaPairRDD.fromRDD[K, R](filteredRDD)
   }

   def filterByQuery[R](q: String): JavaPairRDD[K, R] = {
     val filteredRDD = rdd.filterByQuery[R](q)
     implicit val converted = ClassTag.AnyRef.asInstanceOf[ClassTag[R]]
     JavaPairRDD.fromRDD[K, R](filteredRDD)
   }

   @varargs def filterByCustom[R](filterFactory: String, params: AnyRef*): JavaPairRDD[K, R] = {
      val filteredRDD = rdd.filterByCustom[R](filterFactory, params: _*)
      implicit val converted = ClassTag.AnyRef.asInstanceOf[ClassTag[R]]
      JavaPairRDD.fromRDD[K, R](filteredRDD)
   }

   override def count() = rdd.count()

   override def cacheAdmin(): CacheAdmin = rdd.cacheAdmin()
} 
Example 7
Source File: Transformer.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import scala.annotation.varargs

import org.apache.spark.Logging
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._


  protected def validateInputType(inputType: DataType): Unit = {}

  override def transformSchema(schema: StructType): StructType = {
    val inputType = schema($(inputCol)).dataType
    validateInputType(inputType)
    if (schema.fieldNames.contains($(outputCol))) {
      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
    }
    val outputFields = schema.fields :+
      StructField($(outputCol), outputDataType, nullable = false)
    StructType(outputFields)
  }

  override def transform(dataset: DataFrame): DataFrame = {
    transformSchema(dataset.schema, logging = true)
    dataset.withColumn($(outputCol),
      callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol))))
  }

  override def copy(extra: ParamMap): T = defaultCopy(extra)
} 
Example 8
Source File: ParamGridBuilder.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.param._


  @Since("1.2.0")
  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 9
Source File: Transformer.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import scala.annotation.varargs

import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.internal.Logging
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._


  protected def validateInputType(inputType: DataType): Unit = {}

  override def transformSchema(schema: StructType): StructType = {
    val inputType = schema($(inputCol)).dataType
    validateInputType(inputType)
    if (schema.fieldNames.contains($(outputCol))) {
      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
    }
    val outputFields = schema.fields :+
      StructField($(outputCol), outputDataType, nullable = false)
    StructType(outputFields)
  }

  override def transform(dataset: Dataset[_]): DataFrame = {
    transformSchema(dataset.schema, logging = true)
    val transformUDF = udf(this.createTransformFunc, outputDataType)
    dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
  }

  override def copy(extra: ParamMap): T = defaultCopy(extra)
} 
Example 10
Source File: ParamGridBuilder.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._


  @Since("1.2.0")
  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 11
Source File: ServiceHandler.scala    From akka-grpc   with Apache License 2.0 5 votes vote down vote up
package akka.grpc.javadsl

import java.util.concurrent.{ CompletableFuture, CompletionStage }

import akka.annotation.ApiMayChange
import akka.annotation.InternalApi
import akka.grpc.scaladsl.{ ServiceHandler => sServiceHandler }
import akka.http.javadsl.model.{ HttpRequest, HttpResponse, StatusCodes }
// using japi because bindAndHandleAsync expects that
import akka.japi.{ Function => JFunction }

import scala.annotation.varargs

@ApiMayChange
object ServiceHandler {

  
  @varargs
  def handler(handlers: JFunction[HttpRequest, CompletionStage[HttpResponse]]*)
      : JFunction[HttpRequest, CompletionStage[HttpResponse]] = {
    val servicesHandler = concat(handlers: _*)
    (req: HttpRequest) => if (sServiceHandler.isGrpcRequest(req)) servicesHandler(req) else unsupportedMediaType
  }

  private[javadsl] def concat(handlers: JFunction[HttpRequest, CompletionStage[HttpResponse]]*)
      : JFunction[HttpRequest, CompletionStage[HttpResponse]] =
    (req: HttpRequest) =>
      handlers.foldLeft(notFound) { (comp, next) =>
        comp.thenCompose(res => if (res.status == StatusCodes.NOT_FOUND) next.apply(req) else comp)
      }

} 
Example 12
Source File: ParamGridBuilder.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._


  @Since("1.2.0")
  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 13
Source File: ParamGridBuilder.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param._


  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 14
Source File: Transformer.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import scala.annotation.varargs

import org.apache.spark.Logging
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._


  protected def validateInputType(inputType: DataType): Unit = {}

  override def transformSchema(schema: StructType): StructType = {
    val inputType = schema($(inputCol)).dataType
    validateInputType(inputType)
    if (schema.fieldNames.contains($(outputCol))) {
      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
    }
    val outputFields = schema.fields :+
      StructField($(outputCol), outputDataType, nullable = false)
    StructType(outputFields)
  }

  override def transform(dataset: DataFrame): DataFrame = {
    transformSchema(dataset.schema, logging = true)
    dataset.withColumn($(outputCol),
      callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol))))
  }

  override def copy(extra: ParamMap): T = defaultCopy(extra)
} 
Example 15
Source File: ParamGridBuilder.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param._


  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 16
Source File: Transformer.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import scala.annotation.varargs

import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.internal.Logging
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._


  protected def validateInputType(inputType: DataType): Unit = {}

  override def transformSchema(schema: StructType): StructType = {
    val inputType = schema($(inputCol)).dataType
    validateInputType(inputType)
    if (schema.fieldNames.contains($(outputCol))) {
      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
    }
    val outputFields = schema.fields :+
      StructField($(outputCol), outputDataType, nullable = false)
    StructType(outputFields)
  }

  override def transform(dataset: Dataset[_]): DataFrame = {
    transformSchema(dataset.schema, logging = true)
    val transformUDF = udf(this.createTransformFunc, outputDataType)
    dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
  }

  override def copy(extra: ParamMap): T = defaultCopy(extra)
} 
Example 17
Source File: ParamGridBuilder.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._


  @Since("1.2.0")
  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 18
Source File: Utils.scala    From intellij-lsp   with Apache License 2.0 5 votes vote down vote up
package com.github.gtache.lsp.utils

import java.util
import java.util.ResourceBundle

import com.intellij.openapi.diagnostic.Logger

import scala.annotation.varargs


  @varargs def concatenateArrays(arr: Array[Any]*): Array[Any] = {
    arr.flatten.toArray
  }

  def stringToList(str: String, sep: String = lineSeparator): util.List[String] = {
    import scala.collection.JavaConverters._
    str.split(sep).toIndexedSeq.asJava
  }

  def parseArgs(strArr: Array[String]): Array[String] = {
    val mutableBuffer: scala.collection.mutable.Buffer[String] = scala.collection.mutable.Buffer()
    var isSingleQuote = false
    var isDoubleQuote = false
    var wasEscaped = false
    val curStr = StringBuilder.newBuilder
    strArr.foreach(str => {
      for (i <- Range(0, str.length)) {
        str(i) match {
          case '\'' =>
            if (!wasEscaped) {
              isSingleQuote = !isSingleQuote
            }
            wasEscaped = false
            curStr.append('\'')
          case '\"' =>
            if (!wasEscaped) {
              isDoubleQuote = !isDoubleQuote
            }
            wasEscaped = false
            curStr.append('\"')
          case ' ' =>
            if (isSingleQuote || isDoubleQuote) {
              curStr.append(" ")
            } else {
              mutableBuffer.append(curStr.toString())
              curStr.clear()
            }
            wasEscaped = false
          case '\\' =>
            if (wasEscaped) {
              wasEscaped = false
            } else {
              wasEscaped = true
            }
            curStr.append('\\')
          case c =>
            curStr.append(c)
            wasEscaped = false
        }
      }
      if (curStr.nonEmpty) {
        mutableBuffer.append(curStr.toString())
        curStr.clear()
      }
    })
    mutableBuffer.toArray
  }


} 
Example 19
Source File: Transformer.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import scala.annotation.varargs

import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.internal.Logging
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._


  protected def validateInputType(inputType: DataType): Unit = {}

  override def transformSchema(schema: StructType): StructType = {
    val inputType = schema($(inputCol)).dataType
    validateInputType(inputType)
    if (schema.fieldNames.contains($(outputCol))) {
      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
    }
    val outputFields = schema.fields :+
      StructField($(outputCol), outputDataType, nullable = false)
    StructType(outputFields)
  }

  override def transform(dataset: Dataset[_]): DataFrame = {
    transformSchema(dataset.schema, logging = true)
    val transformUDF = udf(this.createTransformFunc, outputDataType)
    dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
  }

  override def copy(extra: ParamMap): T = defaultCopy(extra)
} 
Example 20
Source File: ParamGridBuilder.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._


  @Since("1.2.0")
  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
} 
Example 21
Source File: Transformer.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import scala.annotation.varargs

import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.internal.Logging
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._


  protected def validateInputType(inputType: DataType): Unit = {}

  override def transformSchema(schema: StructType): StructType = {
    val inputType = schema($(inputCol)).dataType
    validateInputType(inputType)
    if (schema.fieldNames.contains($(outputCol))) {
      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
    }
    val outputFields = schema.fields :+
      StructField($(outputCol), outputDataType, nullable = false)
    StructType(outputFields)
  }

  override def transform(dataset: Dataset[_]): DataFrame = {
    transformSchema(dataset.schema, logging = true)
    val transformUDF = udf(this.createTransformFunc, outputDataType)
    dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
  }

  override def copy(extra: ParamMap): T = defaultCopy(extra)
}