org.apache.spark.sql.catalyst.ScalaReflection Scala Example

Source File: UserDefinedFunction.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.expressions

import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.expressions.ScalaUDF
import org.apache.spark.sql.types.DataType


  def asNondeterministic(): UserDefinedFunction = {
    if (!_deterministic) {
      this
    } else {
      val udf = copyAll()
      udf._deterministic = false
      udf
    }
  }
}

// We have to use a name different than `UserDefinedFunction` here, to avoid breaking the binary
// compatibility of the auto-generate UserDefinedFunction object.
private[sql] object SparkUserDefinedFunction {

  def create(
      f: AnyRef,
      dataType: DataType,
      inputSchemas: Seq[Option[ScalaReflection.Schema]]): UserDefinedFunction = {
    val inputTypes = if (inputSchemas.contains(None)) {
      None
    } else {
      Some(inputSchemas.map(_.get.dataType))
    }
    val udf = new UserDefinedFunction(f, dataType, inputTypes)
    udf.nullableTypes = Some(inputSchemas.map(_.map(_.nullable).getOrElse(true)))
    udf
  }
}

Source File: StreamStaticJoiner.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.spark

import knolx.Config._
import knolx.KnolXLogger
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.functions.{col, from_json}
import org.apache.spark.sql.types.StructType


object StreamStaticJoiner extends App with KnolXLogger {
  info("Creating Spark Session")
  val spark = SparkSession.builder().master(sparkMaster).appName(sparkAppName).getOrCreate()
  spark.sparkContext.setLogLevel("WARN")

  info("Static Dataframe")
  val companiesDF = spark.read.option("header", "true").csv("src/main/resources/companies.csv")
  companiesDF.show(false)

  info("Original Streaming Dataframe")
  val schema = ScalaReflection.schemaFor[Stock].dataType.asInstanceOf[StructType]
  val stockStreamDF =
    spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", bootstrapServer)
      .option("subscribe", topic)
      .load()
      .select(from_json(col("value").cast("string"), schema).as("value"))
      .select("value.*")

  stockStreamDF.printSchema()
  stockStreamDF.writeStream.format("console").start()

  info("Filtered Streaming Dataframe")
  val filteredStockStreamDF = stockStreamDF.join(companiesDF, "companyName")
  val filteredStockStreamingQuery = filteredStockStreamDF.writeStream.format("console").start()

  info("Waiting for the query to terminate...")
  filteredStockStreamingQuery.awaitTermination()
  filteredStockStreamingQuery.stop()
}

Source File: StreamStreamOuterJoiner.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.spark

import knolx.Config._
import knolx.KnolXLogger
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.functions.{col, expr, from_json}
import org.apache.spark.sql.types.StructType


object StreamStreamOuterJoiner extends App with KnolXLogger {
  info("Creating Spark Session")
  val spark = SparkSession.builder().master(sparkMaster).appName(sparkAppName).getOrCreate()
  spark.sparkContext.setLogLevel("WARN")

  info("Streaming companies Dataframe")
  val companiesDF =
    spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", bootstrapServer)
      .option("subscribe", companiesTopic)
      .load()
      .select(col("value").cast("string").as("companyName"),
        col("timestamp").as("companyTradingTime"))
      .withWatermark("companyTradingTime", "10 seconds")

  companiesDF.writeStream.format("console").option("truncate", false).start()

  info("Original Streaming Dataframe")
  val schema = ScalaReflection.schemaFor[Stock].dataType.asInstanceOf[StructType]
  val stockStreamDF =
    spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", bootstrapServer)
      .option("subscribe", stocksTopic)
      .load()
      .select(from_json(col("value").cast("string"), schema).as("value"),
        col("timestamp").as("stockInputTime"))
      .select("value.*", "stockInputTime")
      .withWatermark("stockInputTime", "10 seconds")

  info("Filtered Streaming Dataframe")
  val filteredStockStreamDF = stockStreamDF.join(companiesDF,
    expr("companyName = stockName AND stockInputTime >= companyTradingTime AND stockInputTime <= companyTradingTime + interval 20 seconds"),
    joinType = "leftOuter")
  val filteredStockStreamingQuery = filteredStockStreamDF.writeStream.format("console").option("truncate", false).start()

  info("Waiting for the query to terminate...")
  filteredStockStreamingQuery.awaitTermination()
  filteredStockStreamingQuery.stop()
}

Source File: StreamStreamJoiner.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.spark

import knolx.Config._
import knolx.KnolXLogger
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.functions.{col, expr, from_json}
import org.apache.spark.sql.types.StructType


object StreamStreamJoiner extends App with KnolXLogger {
  info("Creating Spark Session")
  val spark = SparkSession.builder().master(sparkMaster).appName(sparkAppName).getOrCreate()
  spark.sparkContext.setLogLevel("WARN")

  info("Streaming companies Dataframe")
  val companiesDF =
    spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", bootstrapServer)
      .option("subscribe", companiesTopic)
      .load()
      .select(col("value").cast("string").as("companyName"),
        col("timestamp").as("companyTradingTime"))

  companiesDF.writeStream.format("console").option("truncate", false).start()

  info("Original Streaming Dataframe")
  val schema = ScalaReflection.schemaFor[Stock].dataType.asInstanceOf[StructType]
  val stockStreamDF =
    spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", bootstrapServer)
      .option("subscribe", stocksTopic)
      .load()
      .select(from_json(col("value").cast("string"), schema).as("value"),
        col("timestamp").as("stockInputTime"))
      .select("value.*", "stockInputTime")

  info("Filtered Streaming Dataframe")
  val filteredStockStreamDF = stockStreamDF.join(companiesDF,
    expr("companyName = stockName AND stockInputTime >= companyTradingTime AND stockInputTime <= companyTradingTime + interval 20 seconds"))
  val filteredStockStreamingQuery = filteredStockStreamDF.writeStream.format("console").option("truncate", false).start()

  info("Waiting for the query to terminate...")
  filteredStockStreamingQuery.awaitTermination()
  filteredStockStreamingQuery.stop()
}

Source File: SPLScalaReflection.scala From spark-druid-olap with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.sparklinedata

import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.ScalaReflection

object SPLScalaReflection {

  import ScalaReflection.universe
  import ScalaReflection.mirror

  def changeSessionStateClass : Unit = {
    val spkSessionCSymbol = mirror.classSymbol(classOf[SparkSession])
    val spkSessionModSymbol = spkSessionCSymbol.companion.asModule
    val spkSessionModClassMirror = mirror.reflectModule(spkSessionModSymbol)
    val spkSessionModule = spkSessionModClassMirror.instance
    val spkSessionModuleMirror = mirror.reflect(spkSessionModule)
    val spkSessionModuleTyp = spkSessionModuleMirror.symbol.selfType
    val termSessionState = spkSessionModuleTyp.decl(
      universe.TermName("HIVE_SESSION_STATE_CLASS_NAME")).asTerm.accessed.asTerm
    val sessionStateField = spkSessionModuleMirror.reflectField(termSessionState)
    sessionStateField.set("org.apache.spark.sql.hive.sparklinedata.SPLSessionState")
  }

//  def main(args : Array[String]) : Unit = {
//    changeSessionStateClass
//
//    println(new SparkSession(new SparkContext()).sharedState.getClass)
//  }

}

Source File: package.scala From frameless with Apache License 2.0

5 votes

package frameless

import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.expressions.Literal

package object functions extends Udf with UnaryFunctions {
  object aggregate extends AggregateFunctions
  object nonAggregate extends NonAggregateFunctions

  
  def lit[A: TypedEncoder, T](value: A): TypedColumn[T, A] = {
    val encoder = TypedEncoder[A]

    if (ScalaReflection.isNativeType(encoder.jvmRepr) && encoder.catalystRepr == encoder.jvmRepr) {
      val expr = Literal(value, encoder.catalystRepr)
      new TypedColumn(expr)
    } else {
      val expr = FramelessLit(value, encoder)
      new TypedColumn(expr)
    }
  }
}

org.apache.spark.sql.catalyst.ScalaReflection Scala Examples