org.apache.spark.sql.catalyst.ScalaReflection Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.ScalaReflection.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: UserDefinedFunction.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.expressions import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.expressions.ScalaUDF import org.apache.spark.sql.types.DataType def asNondeterministic(): UserDefinedFunction = { if (!_deterministic) { this } else { val udf = copyAll() udf._deterministic = false udf } } } // We have to use a name different than `UserDefinedFunction` here, to avoid breaking the binary // compatibility of the auto-generate UserDefinedFunction object. private[sql] object SparkUserDefinedFunction { def create( f: AnyRef, dataType: DataType, inputSchemas: Seq[Option[ScalaReflection.Schema]]): UserDefinedFunction = { val inputTypes = if (inputSchemas.contains(None)) { None } else { Some(inputSchemas.map(_.get.dataType)) } val udf = new UserDefinedFunction(f, dataType, inputTypes) udf.nullableTypes = Some(inputSchemas.map(_.map(_.nullable).getOrElse(true))) udf } }
Example 2
Source File: StreamStaticJoiner.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.spark import knolx.Config._ import knolx.KnolXLogger import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.functions.{col, from_json} import org.apache.spark.sql.types.StructType object StreamStaticJoiner extends App with KnolXLogger { info("Creating Spark Session") val spark = SparkSession.builder().master(sparkMaster).appName(sparkAppName).getOrCreate() spark.sparkContext.setLogLevel("WARN") info("Static Dataframe") val companiesDF = spark.read.option("header", "true").csv("src/main/resources/companies.csv") companiesDF.show(false) info("Original Streaming Dataframe") val schema = ScalaReflection.schemaFor[Stock].dataType.asInstanceOf[StructType] val stockStreamDF = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", bootstrapServer) .option("subscribe", topic) .load() .select(from_json(col("value").cast("string"), schema).as("value")) .select("value.*") stockStreamDF.printSchema() stockStreamDF.writeStream.format("console").start() info("Filtered Streaming Dataframe") val filteredStockStreamDF = stockStreamDF.join(companiesDF, "companyName") val filteredStockStreamingQuery = filteredStockStreamDF.writeStream.format("console").start() info("Waiting for the query to terminate...") filteredStockStreamingQuery.awaitTermination() filteredStockStreamingQuery.stop() }
Example 3
Source File: StreamStreamOuterJoiner.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.spark import knolx.Config._ import knolx.KnolXLogger import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.functions.{col, expr, from_json} import org.apache.spark.sql.types.StructType object StreamStreamOuterJoiner extends App with KnolXLogger { info("Creating Spark Session") val spark = SparkSession.builder().master(sparkMaster).appName(sparkAppName).getOrCreate() spark.sparkContext.setLogLevel("WARN") info("Streaming companies Dataframe") val companiesDF = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", bootstrapServer) .option("subscribe", companiesTopic) .load() .select(col("value").cast("string").as("companyName"), col("timestamp").as("companyTradingTime")) .withWatermark("companyTradingTime", "10 seconds") companiesDF.writeStream.format("console").option("truncate", false).start() info("Original Streaming Dataframe") val schema = ScalaReflection.schemaFor[Stock].dataType.asInstanceOf[StructType] val stockStreamDF = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", bootstrapServer) .option("subscribe", stocksTopic) .load() .select(from_json(col("value").cast("string"), schema).as("value"), col("timestamp").as("stockInputTime")) .select("value.*", "stockInputTime") .withWatermark("stockInputTime", "10 seconds") info("Filtered Streaming Dataframe") val filteredStockStreamDF = stockStreamDF.join(companiesDF, expr("companyName = stockName AND stockInputTime >= companyTradingTime AND stockInputTime <= companyTradingTime + interval 20 seconds"), joinType = "leftOuter") val filteredStockStreamingQuery = filteredStockStreamDF.writeStream.format("console").option("truncate", false).start() info("Waiting for the query to terminate...") filteredStockStreamingQuery.awaitTermination() filteredStockStreamingQuery.stop() }
Example 4
Source File: StreamStreamJoiner.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.spark import knolx.Config._ import knolx.KnolXLogger import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.functions.{col, expr, from_json} import org.apache.spark.sql.types.StructType object StreamStreamJoiner extends App with KnolXLogger { info("Creating Spark Session") val spark = SparkSession.builder().master(sparkMaster).appName(sparkAppName).getOrCreate() spark.sparkContext.setLogLevel("WARN") info("Streaming companies Dataframe") val companiesDF = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", bootstrapServer) .option("subscribe", companiesTopic) .load() .select(col("value").cast("string").as("companyName"), col("timestamp").as("companyTradingTime")) companiesDF.writeStream.format("console").option("truncate", false).start() info("Original Streaming Dataframe") val schema = ScalaReflection.schemaFor[Stock].dataType.asInstanceOf[StructType] val stockStreamDF = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", bootstrapServer) .option("subscribe", stocksTopic) .load() .select(from_json(col("value").cast("string"), schema).as("value"), col("timestamp").as("stockInputTime")) .select("value.*", "stockInputTime") info("Filtered Streaming Dataframe") val filteredStockStreamDF = stockStreamDF.join(companiesDF, expr("companyName = stockName AND stockInputTime >= companyTradingTime AND stockInputTime <= companyTradingTime + interval 20 seconds")) val filteredStockStreamingQuery = filteredStockStreamDF.writeStream.format("console").option("truncate", false).start() info("Waiting for the query to terminate...") filteredStockStreamingQuery.awaitTermination() filteredStockStreamingQuery.stop() }
Example 5
Source File: SPLScalaReflection.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.sparklinedata import org.apache.spark.SparkContext import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.ScalaReflection object SPLScalaReflection { import ScalaReflection.universe import ScalaReflection.mirror def changeSessionStateClass : Unit = { val spkSessionCSymbol = mirror.classSymbol(classOf[SparkSession]) val spkSessionModSymbol = spkSessionCSymbol.companion.asModule val spkSessionModClassMirror = mirror.reflectModule(spkSessionModSymbol) val spkSessionModule = spkSessionModClassMirror.instance val spkSessionModuleMirror = mirror.reflect(spkSessionModule) val spkSessionModuleTyp = spkSessionModuleMirror.symbol.selfType val termSessionState = spkSessionModuleTyp.decl( universe.TermName("HIVE_SESSION_STATE_CLASS_NAME")).asTerm.accessed.asTerm val sessionStateField = spkSessionModuleMirror.reflectField(termSessionState) sessionStateField.set("org.apache.spark.sql.hive.sparklinedata.SPLSessionState") } // def main(args : Array[String]) : Unit = { // changeSessionStateClass // // println(new SparkSession(new SparkContext()).sharedState.getClass) // } }
Example 6
Source File: package.scala From frameless with Apache License 2.0 | 5 votes |
package frameless import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.expressions.Literal package object functions extends Udf with UnaryFunctions { object aggregate extends AggregateFunctions object nonAggregate extends NonAggregateFunctions def lit[A: TypedEncoder, T](value: A): TypedColumn[T, A] = { val encoder = TypedEncoder[A] if (ScalaReflection.isNativeType(encoder.jvmRepr) && encoder.catalystRepr == encoder.jvmRepr) { val expr = Literal(value, encoder.catalystRepr) new TypedColumn(expr) } else { val expr = FramelessLit(value, encoder) new TypedColumn(expr) } } }