org.apache.spark.sql.catalyst.analysis.FunctionRegistry Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.analysis.FunctionRegistry.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: NativeFunctionRegistration.scala From spark-alchemy with Apache License 2.0 | 5 votes |
package com.swoop.alchemy.spark.expressions import org.apache.spark.sql.EncapsulationViolator.createAnalysisException import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ExpressionInfo, RuntimeReplaceable} import scala.reflect.ClassTag import scala.util.{Failure, Success, Try} // based on Spark's FunctionRegistry @ossSpark trait NativeFunctionRegistration extends FunctionRegistration { type FunctionBuilder = Seq[Expression] => Expression def expressions: Map[String, (ExpressionInfo, FunctionBuilder)] def registerFunctions(fr: FunctionRegistry): Unit = { expressions.foreach { case (name, (info, builder)) => fr.registerFunction(FunctionIdentifier(name), info, builder) } } def registerFunctions(spark: SparkSession): Unit = { registerFunctions(spark.sessionState.functionRegistry) } protected def expressionInfo[T <: Expression : ClassTag](name: String): ExpressionInfo = { val clazz = scala.reflect.classTag[T].runtimeClass val df = clazz.getAnnotation(classOf[ExpressionDescription]) if (df != null) { new ExpressionInfo(clazz.getCanonicalName, null, name, df.usage(), df.extended()) } else { new ExpressionInfo(clazz.getCanonicalName, name) } } }
Example 2
Source File: PythonSQLUtils.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.api.python import java.io.InputStream import java.nio.channels.Channels import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.python.PythonRDDServer import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.arrow.ArrowConverters import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText) // This is needed when generating SQL documentation for built-in functions. def listBuiltinFunctionInfos(): Array[ExpressionInfo] = { FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray } private[sql] class ArrowRDDServer(sqlContext: SQLContext) extends PythonRDDServer { override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = { // Create array to consume iterator so that we can safely close the inputStream val batches = ArrowConverters.getBatchesFromStream(Channels.newChannel(input)).toArray // Parallelize the record batches to create an RDD JavaRDD.fromRDD(sqlContext.sparkContext.parallelize(batches, batches.length)) } }
Example 3
Source File: SQLContextExtensionBase.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.{ParserDialect, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, SimpleFunctionRegistry} import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.DDLParser import org.apache.spark.sql.extension.OptimizerFactory.ExtendableOptimizerBatch import org.apache.spark.util.Utils import scala.util.Try import scala.util.control.NonFatal override protected def extendedParserDialect: ParserDialect = try { val clazz = Utils.classForName(dialectClassName) clazz.newInstance().asInstanceOf[ParserDialect] } catch { case NonFatal(e) => // Since we didn't find the available SQL Dialect, it will fail even for SET command: // SET spark.sql.dialect=sql; Let's reset as default dialect automatically. val dialect = conf.dialect // reset the sql dialect conf.unsetConf(SQLConf.DIALECT) // throw out the exception, and the default sql dialect will take effect for next query. throw new DialectException( s""" |Instantiating dialect '$dialect' failed. |Reverting to default dialect '${conf.dialect}'""".stripMargin, e) } // (suggestion) make this implicit to FunctionRegistry. protected def registerBuiltins(registry: FunctionRegistry): Unit = { FunctionRegistry.expressions.foreach { case (name, (info, builder)) => registry.registerFunction(name, builder) } } override protected def extendedDdlParser(parser: String => LogicalPlan): DDLParser = new DDLParser(sqlParser.parse(_)) override protected def registerFunctions(registry: FunctionRegistry): Unit = { } }
Example 4
Source File: RegisterHierarchyFunctions.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions._ import FunctionBuilders._ private[sql] object RegisterHierarchyFunctions { def apply(functionRegistry: FunctionRegistry): Unit = { val r = (name: String, builder: ExpressionBuilder) => functionRegistry.registerFunction(name, builder) r("level", unaryExpression[Level]) r("post_rank", unaryExpression[PostRank]) r("pre_rank", unaryExpression[PreRank]) r("is_root", unaryExpression[IsRoot]) r("is_leaf", unaryExpression[IsLeaf]) r("name", unaryExpression[Name]) r("is_descendant", binaryExpression[IsDescendant]) r("is_descendant_or_self", binaryExpression[IsDescendantOrSelf]) r("is_ancestor", reverse(binaryExpression[IsDescendant])) r("is_ancestor_or_self", reverse(binaryExpression[IsDescendantOrSelf])) r("is_parent", binaryExpression[IsParent]) r("is_child", reverse(binaryExpression[IsParent])) r("is_sibling", binaryExpression[IsSibling]) r("is_self", binaryExpression[IsSelf]) r("is_sibling_or_self", binaryExpression[IsSiblingOrSelf]) r("is_following", binaryExpression[IsFollowing]) r("is_preceding", reverse(binaryExpression[IsFollowing])) } }
Example 5
Source File: RegisterCustomFunctions.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.analysis.FunctionRegistry._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.currency.CurrencyConversionFunction import org.apache.spark.sql.types._ import scala.reflect.ClassTag // TODO move this to an implicit function in the registry. private[this] def registerExpression[T <: Expression](registry: FunctionRegistry, name: String) (implicit tag: ClassTag[T]): Unit = { val (_, (_, builder)) = expression[T](name) registry.registerFunction(name, builder) } def apply(registry: FunctionRegistry): Unit = { registerExpression[Remainder](registry, "remainder") registerExpression[Remainder](registry, "mod") registerExpression[AddYears](registry, "add_years") registerExpression[AddSeconds](registry, "add_seconds") registerExpression[DateAdd](registry, "add_days") registerExpression[Replace](registry, "replace") registerExpression[Log](registry, "ln") registry.registerFunction("to_double", toDoubleBuilder) registry.registerFunction("to_integer", toIntegerBuilder) registry.registerFunction("to_varchar", toVarcharBuilder) registry.registerFunction("rand", randBuilder) registry.registerFunction("days_between", daysBetweenBuilder) // register all currency conversions CurrencyConversionFunction.functions.foreach { case (name, impl) => registry.registerFunction(name, impl.getExpression) } } private def toDoubleBuilder(expressions: Seq[Expression]): Expression = expressions match { case Seq(exp) => Cast(exp, DoubleType) case _ => throw new AnalysisException("Input argument to TO_DOUBLE must be a single expression") } private def toIntegerBuilder(expressions: Seq[Expression]): Expression = expressions match { case Seq(exp) => Cast(exp, IntegerType) case _ => throw new AnalysisException("Input argument to TO_INTEGER must be a single expression") } private def toVarcharBuilder(expressions: Seq[Expression]): Expression = expressions match { case Seq(exp) => Cast(exp, StringType) case _ => throw new AnalysisException("Input argument to TO_VARCHAR must be a single expression") } private def randBuilder(expressions: Seq[Expression]): Expression = expressions match { case Nil => new Rand() case Seq(IntegerLiteral(n)) => new Rand(n) case _ => throw new AnalysisException("Input argument to RAND must be an integer literal.") } private def daysBetweenBuilder(expressions: Seq[Expression]): Expression = expressions match { case Seq(exp1, exp2) => Abs(DateDiff(exp1, exp2)) case _ => throw new AnalysisException("Input argument to DAYS_BETWEEN must two expressions.") } }
Example 6
Source File: PythonSQLUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.arrow.ArrowConverters import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText) // This is needed when generating SQL documentation for built-in functions. def listBuiltinFunctionInfos(): Array[ExpressionInfo] = { FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray } def arrowPayloadToDataFrame( payloadRDD: JavaRDD[Array[Byte]], schemaString: String, sqlContext: SQLContext): DataFrame = { ArrowConverters.toDataFrame(payloadRDD, schemaString, sqlContext) } }