scala.reflect.runtime.universe Scala Example

Source File: LoginControllerFactory.scala From scuruto with MIT License

5 votes

package controller

import skinny.SkinnyConfig

object LoginControllerFactory {

  private val DEFAULT_PROVIDOR = "App"

  val create: LoginController = {
    val providor = SkinnyConfig.stringConfigValue("login.providor").map { configValue =>
      configValue.capitalize
    } getOrElse DEFAULT_PROVIDOR

    import scala.reflect.runtime.universe
    val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)
    val module = runtimeMirror.staticModule(s"controller.login.${providor}LoginController")
    val obj = runtimeMirror.reflectModule(module)
    val controller = obj.instance
    controller.asInstanceOf[LoginController]
  }

}

Source File: L8-35DataFrameExamplesRDD.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.types.StructType
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.json4s.DefaultFormats

object CdrDataframeExamplesRDDApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 5) {
      System.err.println(
        "Usage: CdrDataframeExamplesRDDApp <appname> <batchInterval> <hostname> <schemaPath>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port, schemaFile) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._
    implicit val formats = DefaultFormats

    val schemaJson = scala.io.Source.fromFile(schemaFile).mkString
    val schema = DataType.fromJson(schemaJson).asInstanceOf[StructType]

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()
        val highInternet = sqlC.createDataFrame(cdrs.rdd.filter(r => r.getFloat(3) + r.getFloat(4) >= r.getFloat(5) + r.getFloat(6)), schema)
        val highOther = cdrs.except(highInternet)
        val highInternetGrid = highInternet.select("squareId", "countryCode").dropDuplicates()
        val highOtherGrid = highOther.select("squareId", "countryCode").dropDuplicates()
        highOtherGrid.except(highInternetGrid).show()
        highInternetGrid.except(highOtherGrid).show()
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: L8-13HiveQL.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object CdrHiveqlApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrHiveqlApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val cl = Thread.currentThread().getContextClassLoader()
    val hiveC = new HiveContext(ssc.sparkContext)
    Thread.currentThread().setContextClassLoader(cl)

    import hiveC.implicits._

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        seqToCdr(rdd).toDF().registerTempTable("cdrs")

        hiveC.sql("SET DATE_FMT='yy-MM-dd|HH'")
        hiveC.sql("SELECT from_unixtime(timeInterval, ${hiveconf:DATE_FMT}) AS TS, SUM(smsInActivity + smsOutActivity + callInActivity + callOutActivity + internetTrafficActivity) AS Activity FROM cdrs GROUP BY from_unixtime(timeInterval, ${hiveconf:DATE_FMT}) ORDER BY Activity DESC").show()
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: L6-20CassandraConnector.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.json4s.DefaultFormats
import org.json4s.jvalue2extractable
import org.json4s.jvalue2monadic
import org.json4s.native.JsonMethods.parse
import org.json4s.string2JsonInput

import com.datastax.spark.connector.SomeColumns
import com.datastax.spark.connector.cql.CassandraConnector
import com.datastax.spark.connector.streaming.toDStreamFunctions
import com.datastax.spark.connector.toNamedColumnRef

object CassandraConnectorSinkApp {

  def main(args: Array[String]) {
    if (args.length != 6) {
      System.err.println(
        "Usage: CassandraConnectorSinkApp <appname> <cassandraHost> <cassandraPort> <keyspace> <tableName> <columnName>")
      System.exit(1)
    }

    val Seq(appName, cassandraHost, cassandraPort, keyspace, tableName, columnName) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
      .set("spark.cassandra.connection.host", cassandraHost)
      .set("spark.cassandra.connection.port", cassandraPort)

    val batchInterval = 10
    val windowSize = 20
    val slideInterval = 10

    val ssc = new StreamingContext(conf, Seconds(batchInterval))

    CassandraConnector(conf).withSessionDo { session =>
      session.execute(s"CREATE KEYSPACE IF NOT EXISTS %s WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1 }".format(keyspace))
      session.execute(s"CREATE TABLE IF NOT EXISTS %s.%s (key TEXT PRIMARY KEY, %s FLOAT)".format(keyspace, tableName, columnName))
    }

    HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env",
      interval = batchInterval)
      .flatMap(rec => {
        implicit val formats = DefaultFormats
        val query = parse(rec) \ "query"
        ((query \ "results" \ "quote").children)
          .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat))
      })
      .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval))
      .map(stock => (stock._1, stock._2 / (windowSize / batchInterval)))
      .saveToCassandra(keyspace, tableName)

    ssc.start()
    ssc.awaitTermination()
  }
}

Source File: L9-17MLCrossValidation.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.feature.Normalizer
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.regression.RandomForestRegressor
import org.apache.spark.ml.tuning.CrossValidator
import org.apache.spark.ml.tuning.ParamGridBuilder
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object MLCrossValidationApp {

  case class Activity(label: Double,
    accelXHand: Double, accelYHand: Double, accelZHand: Double,
    accelXChest: Double, accelYChest: Double, accelZChest: Double,
    accelXAnkle: Double, accelYAnkle: Double, accelZAnkle: Double)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: MLCrossValidationApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    val substream = ssc.socketTextStream(hostname, port.toInt)
      .filter(!_.contains("NaN"))
      .map(_.split(" "))
      .filter(f => f(1) == "4" || f(1) == "5")
      .map(f => Array(f(1), f(4), f(5), f(6), f(20), f(21), f(22), f(36), f(37), f(38)))
      .map(f => f.map(v => v.toDouble))
      .foreachRDD(rdd => {
        if (!rdd.isEmpty) {
          val accelerometer = rdd.map(x => Activity(x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9))).toDF()
          val split = accelerometer.randomSplit(Array(0.3, 0.7))
          val test = split(0)
          val train = split(1)

          val assembler = new VectorAssembler()
            .setInputCols(Array(
              "accelXHand", "accelYHand", "accelZHand",
              "accelXChest", "accelYChest", "accelZChest",
              "accelXAnkle", "accelYAnkle", "accelZAnkle"))
            .setOutputCol("vectors")
          val normalizer = new Normalizer()
            .setInputCol(assembler.getOutputCol)
            .setOutputCol("features")
          val regressor = new RandomForestRegressor()

          val pipeline = new Pipeline()
            .setStages(Array(assembler, normalizer, regressor))

          val validator = new CrossValidator()
            .setEstimator(pipeline)
            .setEvaluator(new RegressionEvaluator)
          val pGrid = new ParamGridBuilder()
            .addGrid(normalizer.p, Array(1.0, 5.0, 10.0))
            .addGrid(regressor.numTrees, Array(10, 50, 100))
            .build()
          validator.setEstimatorParamMaps(pGrid)
          validator.setNumFolds(5)

          val bestModel = validator.fit(train)
          val prediction = bestModel.transform(test)
          prediction.show()
        }
      })

    ssc.start()
    ssc.awaitTermination()
  }

}

Source File: L9-15MLPipeline.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.Normalizer
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.regression.RandomForestRegressor
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.ml.param.ParamMap

object MLPipelineApp {

  case class Activity(label: Double,
    accelXHand: Double, accelYHand: Double, accelZHand: Double,
    accelXChest: Double, accelYChest: Double, accelZChest: Double,
    accelXAnkle: Double, accelYAnkle: Double, accelZAnkle: Double)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: MLPipelineApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    val substream = ssc.socketTextStream(hostname, port.toInt)
      .filter(!_.contains("NaN"))
      .map(_.split(" "))
      .filter(f => f(1) == "4" || f(1) == "5")
      .map(f => Array(f(1), f(4), f(5), f(6), f(20), f(21), f(22), f(36), f(37), f(38)))
      .map(f => f.map(v => v.toDouble))
      .foreachRDD(rdd => {
        if (!rdd.isEmpty) {
          val accelerometer = rdd.map(x => Activity(x(0), x(1), x(2), x(3), x(4), x(5), x(6), x(7), x(8), x(9))).toDF()
          val split = accelerometer.randomSplit(Array(0.3, 0.7))
          val test = split(0)
          val train = split(1)

          val assembler = new VectorAssembler()
            .setInputCols(Array(
              "accelXHand", "accelYHand", "accelZHand",
              "accelXChest", "accelYChest", "accelZChest",
              "accelXAnkle", "accelYAnkle", "accelZAnkle"))
            .setOutputCol("vectors")
          val normalizer = new Normalizer()
            .setInputCol(assembler.getOutputCol)
            .setOutputCol("features")
          val regressor = new RandomForestRegressor()

          val pipeline = new Pipeline()
            .setStages(Array(assembler, normalizer, regressor))
          val pMap =  ParamMap(normalizer.p -> 1.0)
          val model = pipeline.fit(train, pMap)
          val prediction = model.transform(test)
          prediction.show()
        }
      })

    ssc.start()
    ssc.awaitTermination()
  }

}

Source File: TestFlinkGenerator.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.generator

import com.amazon.milan.application.ApplicationConfiguration
import com.amazon.milan.application.sources.S3DataSource
import com.amazon.milan.dataformats.JsonDataInputFormat
import com.amazon.milan.compiler.flink.testing.{IntRecord, TestApplicationExecutor}
import com.amazon.milan.lang._
import com.amazon.milan.testing.applications._
import org.junit.Assert._
import org.junit.Test

import scala.reflect.runtime.universe
import scala.tools.reflect.ToolBox


@Test
class TestFlinkGenerator {
  private val generator = new FlinkGenerator(GeneratorConfig())

  @Test
  def test_FlinkGenerator_GenerateScala_WithListSourceAndMapOfOneRecord_GeneratesCodeThatCompilesAndOutputsMappedRecord(): Unit = {
    val input = Stream.of[IntRecord].withName("input")
    val output = input.map(r => IntRecord(r.i + 1)).withName("output")

    val graph = new StreamGraph(output)
    val config = new ApplicationConfiguration
    config.setListSource(input, IntRecord(1))

    val result = TestApplicationExecutor.executeApplication(graph, config, 10, output)
    val outputRecords = result.getRecords(output)
    assertEquals(List(IntRecord(2)), outputRecords)
  }

  @Test
  def test_FlinkGenerator_GenerateScala_WithS3DataSource_GeneratesCodeThatCompiles(): Unit = {
    val input = Stream.of[IntRecord].withName("input")
    val output = input.map(r => IntRecord(r.i + 1)).withName("output")

    val graph = new StreamGraph(output)
    val config = new ApplicationConfiguration
    config.setSource(input, new S3DataSource[IntRecord]("bucket", "prefix", new JsonDataInputFormat[IntRecord]()))

    val generatedCode = this.generator.generateScala(graph, config, "", "TestApp")

    this.eval(generatedCode)
  }

  private def eval(code: String): Any = {
    try {
      val tb = ToolBox(universe.runtimeMirror(this.getClass.getClassLoader)).mkToolBox()
      val tree = tb.parse(code)
      tb.eval(tree)
    }
    catch {
      case ex: Throwable =>
        Console.println(code)
        throw ex
    }
  }
}

Source File: ApplicationFeature.scala From CMAK with Apache License 2.0

5 votes

package features

import com.typesafe.config.Config
import grizzled.slf4j.Logging
import kafka.manager.features.KMFeature

import scala.util.{Success, Failure, Try}

sealed trait ApplicationFeature extends KMFeature

case object KMClusterManagerFeature extends ApplicationFeature
case object KMTopicManagerFeature extends ApplicationFeature
case object KMPreferredReplicaElectionFeature extends ApplicationFeature
case object KMScheduleLeaderElectionFeature extends ApplicationFeature
case object KMReassignPartitionsFeature extends ApplicationFeature
case object KMBootstrapClusterConfigFeature extends ApplicationFeature

object ApplicationFeature extends Logging {
  import scala.reflect.runtime.universe

  val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)

  def from(s: String) : Option[ApplicationFeature] = {
    Try {
      val clazz = s"features.$s"
      val module = runtimeMirror.staticModule(clazz)
      val obj = runtimeMirror.reflectModule(module)
      obj.instance match {
        case f: ApplicationFeature =>
          f
        case _ =>
          throw new IllegalArgumentException(s"Unknown application feature $s")
      }
    } match {
      case Failure(t) =>
        error(s"Unknown application feature $s")
        None
      case Success(f) => Option(f)
    }
  }
  
}

case class ApplicationFeatures(features: Set[ApplicationFeature])

object ApplicationFeatures extends Logging {

  lazy val default : List[String] = List(
    KMClusterManagerFeature,
    KMTopicManagerFeature,
    KMPreferredReplicaElectionFeature, 
    KMReassignPartitionsFeature).map(_.getClass.getSimpleName)
  
  def getApplicationFeatures(config: Config) : ApplicationFeatures = {
    import scala.collection.JavaConverters._
    val configFeatures: Option[List[String]] = Try(config.getStringList("application.features").asScala.toList).toOption
    
    if(configFeatures.isEmpty) {
      warn(s"application.features not found in conf file, using default values $default")
    }

    val f = configFeatures.getOrElse(default).map(ApplicationFeature.from).flatten
    ApplicationFeatures(f.toSet)
  }
}

Source File: KMFeature.scala From CMAK with Apache License 2.0

5 votes

package kafka.manager.features

import grizzled.slf4j.Logging
import kafka.manager.model.{Kafka_0_8_1_1, ClusterConfig}

import scala.collection.mutable.ListBuffer
import scala.util.{Success, Failure, Try}



trait KMFeature

sealed trait ClusterFeature extends KMFeature

case object KMLogKafkaFeature extends ClusterFeature
case object KMDeleteTopicFeature extends ClusterFeature
case object KMJMXMetricsFeature extends ClusterFeature
case object KMDisplaySizeFeature extends ClusterFeature
case object KMPollConsumersFeature extends ClusterFeature

object ClusterFeature extends Logging {
  import scala.reflect.runtime.universe

  val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)

  def from(s: String) : Option[ClusterFeature] = {
    Try {
          val clazz = s"features.$s"
          val module = runtimeMirror.staticModule(clazz)
          val obj = runtimeMirror.reflectModule(module)
          obj.instance match {
            case f: ClusterFeature =>
              f
            case _ =>
              throw new IllegalArgumentException(s"Unknown application feature $s")
          }
        } match {
      case Failure(t) =>
        error(s"Unknown application feature $s")
        None
      case Success(f) => Option(f)
    }
  }

}

case class ClusterFeatures(features: Set[ClusterFeature])

object ClusterFeatures {
  val default = ClusterFeatures(Set())
  
  def from(clusterConfig: ClusterConfig) : ClusterFeatures = {
    val buffer = new ListBuffer[ClusterFeature]
    
    if(clusterConfig.logkafkaEnabled)
      buffer+=KMLogKafkaFeature

    if(clusterConfig.jmxEnabled)
      buffer+=KMJMXMetricsFeature

    if(clusterConfig.displaySizeEnabled)
      buffer+=KMDisplaySizeFeature
    
    if(clusterConfig.version != Kafka_0_8_1_1)
      buffer+=KMDeleteTopicFeature

    if(clusterConfig.pollConsumers)
      buffer+=KMPollConsumersFeature

    ClusterFeatures(buffer.toSet)
  }
}

Source File: HBaseCredentialProvider.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.deploy.yarn.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging

private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def credentialsRequired(hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
}

Source File: IntegrationsModule.scala From scuruto with MIT License

5 votes

package module

import integration._
import skinny.SkinnyConfig

class IntegrationsModule extends scaldi.Module {

  private val DEFAULT_SERVICE = "Null"

  val service = SkinnyConfig.stringConfigValue("externalIntegration.service").map { configValue =>
    configValue.capitalize
  } getOrElse DEFAULT_SERVICE

  import scala.reflect.runtime.universe
  val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)
  val module = runtimeMirror.staticModule(s"integration.${service}Integration")
  val obj = runtimeMirror.reflectModule(module)
  val integration = obj.instance

  bind[ExternalServiceIntegration] to integration.asInstanceOf[ExternalServiceIntegration]
}

Source File: UploadControllerFactory.scala From scuruto with MIT License

5 votes

package controller

import skinny.SkinnyConfig

object UploadControllerFactory {

  private val DEFAULT_DESTINATION = "Local"

  val create: UploadController = {
    val destination = SkinnyConfig.stringConfigValue("upload.destination").map { configValue =>
      configValue.capitalize
    } getOrElse DEFAULT_DESTINATION

    import scala.reflect.runtime.universe
    val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)
    val module = runtimeMirror.staticModule(s"controller.upload.${destination}UploadController")
    val obj = runtimeMirror.reflectModule(module)
    val controller = obj.instance
    controller.asInstanceOf[UploadController]
  }

}

Source File: L8-8Sql.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object CdrSqlApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrSqlApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()
        cdrs.registerTempTable("cdrs")

        sqlC.sql("SELECT countryCode, COUNT(countryCode) AS cCount FROM cdrs GROUP BY countryCode ORDER BY cCount DESC LIMIT 5").show()
        sqlC.dropTempTable("cdrs")
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: ContractProxyModule.scala From fintrospect with Apache License 2.0

5 votes

package io.fintrospect

import com.twitter.finagle.Service
import com.twitter.finagle.http.path.{Path, Root}
import com.twitter.finagle.http.{Request, Response}
import io.fintrospect.renderers.swagger2dot0.{ApiInfo, Swagger2dot0Json}


import scala.reflect.runtime.universe.TypeTag
import scala.reflect.runtime.{currentMirror, universe}



object ContractProxyModule {
  def apply[T <: Contract](name: String, service: Service[Request, Response], contract: T, rootPath: Path = Root, description: String = null)(implicit tag: TypeTag[T]): RouteModule[Request, Response] = {
    val descriptionOption = Option(description).getOrElse(s"Proxy services for $name API")
    val routes = universe.typeOf[T].members
      .filter(_.isModule)
      .map(_.asModule)
      .map(currentMirror.reflectModule(_).instance)
      .filter(_.isInstanceOf[ContractEndpoint])
      .map(_.asInstanceOf[ContractEndpoint].route)

    routes.foldLeft(RouteModule(rootPath, Swagger2dot0Json(ApiInfo(name, name, descriptionOption)))) {
      (spec, route) => spec.withRoute(route.bindToProxy(service))
    }
  }
}

Source File: ScalaCleanCompilerPlugin.scala From ScalaClean with Apache License 2.0

5 votes

package org.scalaclean.analysis

import org.scalaclean.analysis.plugin.{ExtensionPlugin, ExtensionPluginFactory, JunitPlugin, ModsPlugin}

import scala.tools.nsc.Global
import scala.tools.nsc.plugins.{Plugin, PluginComponent}

class ScalaCleanCompilerPlugin(override val global: Global) extends Plugin {

  override val name: String = "scalaclean-analysis-plugin"
  override val description: String = "ScalaClean analysis plugin"

  val component = new ScalaCompilerPluginComponent(global)

  //hardcoded for the moment
  component.extensions += ModsPlugin.create(component, "")
  component.extensions += JunitPlugin.create(component, "")

  override def processOptions(
                               options: List[String],
                               error: String => Unit): Unit = {

    import scala.reflect.runtime.universe
    val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)

    val realOptions = options.distinct
    component.options = realOptions
    for (option <- realOptions) {
      if (option == "debug:true") {
        component.debug = true
      } else if (option.startsWith("extension:")) {
        val end = {
          val end = option.indexOf(':', 10)
          if (end == -1) option.length else end
        }
        val fqn = option.substring(10, end)
        val module = runtimeMirror.staticModule(fqn)
        runtimeMirror.reflectModule(module).instance match {
          case valid: ExtensionPluginFactory => component.extensions += valid.create(component, option.substring(end))
          case null => throw new IllegalArgumentException("not a valid Extension FQN - expected the name of an object")
          case invalid => throw new IllegalArgumentException(s"not a valid Extension FQN - ${invalid.getClass.getName} is not a ${classOf[ExtensionDescriptor[_]].getName}")
        }
      } else if (option.startsWith("srcdirs:")) {
        component.sourceDirs = option.substring(8).split(java.io.File.pathSeparatorChar).toList
      } else
        error(s"Option not recognised: $option")
    }
  }

  override val optionsHelp: Option[String] = Some( //
    s"""-P:$name:debug:true        Set debugging on the ScalaClean analysis plugin
       |-P:$name:srcdirs           The path of sources, separated by ${java.io.File.pathSeparatorChar}
       |-P:$name:extension:<fqn>   Add an extension dataset. FQN is the fully qualified name of the appropriate ExtensionDescriptor object
       |""".stripMargin)

  override val components: List[PluginComponent] = List(component)
}

Source File: ChewerJob.scala From comet-data-pipeline with Apache License 2.0

5 votes

package com.ebiznext.comet.job.ingest

import com.ebiznext.comet.schema.handlers.StorageHandler
import com.ebiznext.comet.schema.model.{Domain, Schema, Type}
import com.ebiznext.comet.utils.{SparkJob, SparkJobResult}
import org.apache.hadoop.fs.Path

import scala.reflect.runtime.universe
import scala.util.Try

trait ChewerJob extends SparkJob {
  var domain: Domain
  var schema: Schema
  var types: List[Type]
  var path: List[Path]
  var storageHandler: StorageHandler

  def run(
    domain: Domain,
    schema: Schema,
    types: List[Type],
    path: List[Path],
    storageHandler: StorageHandler
  ): Try[SparkJobResult] = {
    this.domain = domain
    this.schema = schema
    this.types = types
    this.path = path
    this.storageHandler = storageHandler
    run()
  }
}

object ChewerJob {

  def run(
    objName: String,
    domain: Domain,
    schema: Schema,
    types: List[Type],
    path: List[Path],
    storageHandler: StorageHandler
  ): Try[SparkJobResult] = {
    val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)
    val module = runtimeMirror.staticModule(objName)
    val obj: universe.ModuleMirror = runtimeMirror.reflectModule(module)
    val chewer = obj.instance.asInstanceOf[ChewerJob]
    chewer.run(domain, schema, types, path, storageHandler)
  }
}

Source File: MorpheusGraphTest.scala From morpheus with Apache License 2.0

5 votes

package org.opencypher.morpheus.impl

import org.apache.spark.sql.Row
import org.opencypher.morpheus.api.io.MorpheusElementTable
import org.opencypher.morpheus.api.value.MorpheusElement._
import org.opencypher.morpheus.impl.table.SparkTable.DataFrameTable
import org.opencypher.morpheus.testing.MorpheusTestSuite
import org.opencypher.morpheus.testing.fixture.{GraphConstructionFixture, RecordsVerificationFixture, TeamDataFixture}
import org.opencypher.okapi.api.types._
import org.opencypher.okapi.relational.api.planning.RelationalRuntimeContext
import org.opencypher.okapi.relational.api.table.RelationalCypherRecords
import org.opencypher.okapi.relational.impl.operators.Start
import org.opencypher.okapi.testing.Bag

import scala.reflect.runtime.universe

abstract class MorpheusGraphTest extends MorpheusTestSuite
  with GraphConstructionFixture
  with RecordsVerificationFixture
  with TeamDataFixture {

  object MorpheusGraphTest {
    implicit class RecordOps(records: RelationalCypherRecords[DataFrameTable]) {
      def planStart: Start[DataFrameTable] = {
        implicit val tableTypeTag: universe.TypeTag[DataFrameTable] = morpheus.tableTypeTag
        implicit val context: RelationalRuntimeContext[DataFrameTable] = morpheus.basicRuntimeContext()
        Start.fromEmptyGraph(records)
      }
    }
  }

  it("should return only nodes with that exact label (single label)") {
    val graph = initGraph(dataFixtureWithoutArrays)
    val nodes = graph.nodes("n", CTNode("Person"), exactLabelMatch = true)
    val cols = Seq(
      n,
      nHasLabelPerson,
      nHasPropertyLuckyNumber,
      nHasPropertyName
    )
    verify(nodes, cols, Bag(Row(4L.encodeAsMorpheusId.toList, true, 8L, "Donald")))
  }

  it("should return only nodes with that exact label (multiple labels)") {
    val graph = initGraph(dataFixtureWithoutArrays)
    val nodes = graph.nodes("n", CTNode("Person", "German"), exactLabelMatch = true)
    val cols = Seq(
      n,
      nHasLabelGerman,
      nHasLabelPerson,
      nHasPropertyLuckyNumber,
      nHasPropertyName
    )
    val data = Bag(
      Row(2L.encodeAsMorpheusId.toList, true, true, 1337L, "Martin"),
      Row(3L.encodeAsMorpheusId.toList, true, true, 8L, "Max"),
      Row(0L.encodeAsMorpheusId.toList, true, true, 42L, "Stefan")
    )
    verify(nodes, cols, data)
  }

  it("should support the same node label from multiple node tables") {
    // this creates additional :Person nodes
    val personsPart2 = morpheus.sparkSession.createDataFrame(
      Seq(
        (5L, false, "Soeren", 23L),
        (6L, false, "Hannes", 42L))
    ).toDF("ID", "IS_SWEDE", "NAME", "NUM")

    val personTable2 = MorpheusElementTable.create(personTable.mapping, personsPart2)

    val graph = morpheus.graphs.create(personTable, personTable2)
    graph.nodes("n").size shouldBe 6
  }

  it("should support the same relationship type from multiple relationship tables") {
    // this creates additional :KNOWS relationships
    val knowsParts2 = morpheus.sparkSession.createDataFrame(
      Seq(
        (1L, 7L, 2L, 2017L),
        (1L, 8L, 3L, 2016L))
    ).toDF("SRC", "ID", "DST", "SINCE")

    val knowsTable2 = MorpheusElementTable.create(knowsTable.mapping, knowsParts2)

    val graph = morpheus.graphs.create(personTable, knowsTable, knowsTable2)
    graph.relationships("r").size shouldBe 8
  }

  it("should return an empty result for non-present types") {
    val graph = morpheus.graphs.create(personTable, knowsTable)
    graph.nodes("n", CTNode("BAR")).size shouldBe 0
    graph.relationships("r", CTRelationship("FOO")).size shouldBe 0
  }
}

Source File: HBaseCredentialProvider.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.deploy.yarn.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging

private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def credentialsRequired(hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
}

Source File: TypeInfo.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s

import magnolia.TypeName

import scala.util.Try

case class TypeInfo(owner: String,
                    short: String,
                    typeArguments: Seq[TypeInfo],
                    nameAnnotation: Option[String],
                    namespaceAnnotation: Option[String],
                    erased: Boolean) {
  val full: String = s"$owner.$short"
}

object TypeInfo {

  import scala.reflect.runtime.universe

  def apply(typeName: TypeName, annos: Seq[Any]): TypeInfo = {
    val annotationExtractors = new AnnotationExtractors(annos)
    TypeInfo(
      typeName.owner, typeName.short, typeName.typeArguments.map(TypeInfo.fromTypeName),
      annotationExtractors.name,
      annotationExtractors.namespace,
      annotationExtractors.erased
    )
  }

  def fromTypeName(typeName: TypeName): TypeInfo = {
    // try to populate from the class name, but this may fail if the class is not top level
    // if it does fail then we default back to using what magnolia provides
    val maybeType: Option[universe.Type] = Try {
      val mirror = universe.runtimeMirror(Thread.currentThread().getContextClassLoader)
      val classsym = mirror.staticClass(typeName.full)
      classsym.toType
    }.toOption

    TypeInfo(
      owner = typeName.owner,
      short = typeName.short,
      typeArguments = typeName.typeArguments.map(fromTypeName),
      nameAnnotation = maybeType.flatMap(nameAnnotation),
      namespaceAnnotation = maybeType.flatMap(namespaceAnnotation),
      erased = maybeType.exists(erased)
    )
  }

  def fromClass[A](klass: Class[A]): TypeInfo = {
    import scala.reflect.runtime.universe
    val mirror = universe.runtimeMirror(Thread.currentThread().getContextClassLoader)
    val sym = mirror.classSymbol(klass)
    val tpe = sym.toType
    TypeInfo.fromType(tpe)
  }

  private def nameAnnotation(tpe: universe.Type): Option[String] = {
    import scala.reflect.runtime.universe._

    tpe.typeSymbol.typeSignature.typeSymbol.annotations.collectFirst {
      case a if a.tree.tpe =:= typeOf[AvroName] =>
        val annoValue = a.tree.children.tail.head.asInstanceOf[Literal].value.value
        annoValue.toString
    }
  }

  private def namespaceAnnotation(tpe: universe.Type): Option[String] = {
    import scala.reflect.runtime.universe._

    tpe.typeSymbol.typeSignature.typeSymbol.annotations.collectFirst {
      case a if a.tree.tpe =:= typeOf[AvroNamespace] =>
        val annoValue = a.tree.children.tail.head.asInstanceOf[Literal].value.value
        annoValue.toString
    }
  }

  private def erased(tpe: universe.Type): Boolean = {
    import scala.reflect.runtime.universe._

    tpe.typeSymbol.typeSignature.typeSymbol.annotations.exists {
      case a if a.tree.tpe =:= typeOf[AvroErasedName] => true
      case _ => false
    }
  }

  def fromType(tpe: universe.Type): TypeInfo = {
    TypeInfo(
      tpe.typeSymbol.owner.fullName,
      tpe.typeSymbol.name.decodedName.toString,
      tpe.typeArgs.map(fromType),
      nameAnnotation(tpe),
      namespaceAnnotation(tpe),
      erased(tpe)
    )
  }
}

Source File: HBaseDelegationTokenProvider.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.deploy.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils

private[security] class HBaseDelegationTokenProvider
  extends HadoopDelegationTokenProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainDelegationTokens(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def delegationTokensRequired(
      sparkConf: SparkConf,
      hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
}

Source File: Platform.scala From neo-sbt-scalafmt with Apache License 2.0

5 votes

package com.lucidchart.sbt.scalafmt

import sbt.Keys._
import sbt._
import sbt.internal.inc.Analysis
import sbt.util.CacheImplicits._
import sbt.util.CacheStore
import scala.reflect.runtime.universe

object AnalysisPlatform {
  def counted(prefix: String, single: String, plural: String, count: Int) =
    Analysis.counted(prefix, single, plural, count)
}

object CachePlatform {
  private[this] val mirror = universe.runtimeMirror(getClass.getClassLoader)

  private[this] val fileHashModified = {
    val module = mirror.reflectModule(mirror.staticModule("sbt.util.FileHashModified"))
    mirror.reflect(module.instance).reflectMethod(module.symbol.info.decl(universe.TermName("apply")).asMethod)
  }

  def fileInfo(file: File, hash: List[Byte], lastModified: Long) =
    fileHashModified(file, hash, lastModified.asInstanceOf[AnyRef]).asInstanceOf[HashModifiedFileInfo]

  def readFileInfo(cache: File) = CacheStore(cache).read(Set.empty[HashModifiedFileInfo])

  def writeFileInfo(cache: File, value: Set[HashModifiedFileInfo]) = CacheStore(cache).write(value)
}

object CommandPlatform {

  val CommandStrings = sbt.internal.CommandStrings

}

object LibraryPlatform {
  def moduleInfo(useIvy: SettingKey[Boolean]) =
    scalaModuleInfo := {
      if (useIvy.value) {
        // otherwise scala-library conflicts
        scalaModuleInfo.value.map(_.withOverrideScalaVersion(false))
      } else {
        scalaModuleInfo.value
      }
    }
}

Source File: SysProps.scala From scio with Apache License 2.0

5 votes

package com.spotify.scio

import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.reflect.ClassPath

final case class SysProp(flag: String, description: String) {
  def value(default: => String): String = sys.props.getOrElse(flag, default)

  def value: String = sys.props(flag)

  def valueOption: Option[String] = sys.props.get(flag)

  def value_=(str: String): Unit =
    sys.props(flag) = str

  def show: String =
    s"-D$flag=<String>\n\t$description"
}

trait SysProps {
  def properties: List[SysProp]

  def show: String = {
    val props = properties.map(p => s"  ${p.show}").mkString("\n")
    val name = this.getClass.getName.replace("$", "")
    s"$name:\n$props\n"
  }
}

object SysProps {
  import scala.jdk.CollectionConverters._
  import scala.reflect.runtime.universe

  def properties: Iterable[SysProps] = {
    val classLoader = Thread.currentThread().getContextClassLoader
    val runtimeMirror = universe.runtimeMirror(classLoader)
    ClassPath
      .from(classLoader)
      .getAllClasses
      .asScala
      .filter(_.getName.endsWith("SysProps"))
      .flatMap { clsInfo =>
        try {
          val cls = clsInfo.load()
          cls.getMethod("properties")
          val module = runtimeMirror.staticModule(cls.getName)
          val obj = runtimeMirror.reflectModule(module)
          Some(obj.instance.asInstanceOf[SysProps])
        } catch {
          case _: Throwable => None
        }
      }
  }
}

@registerSysProps
object CoreSysProps {
  val Project = SysProp("project", "")
  val Home = SysProp("java.home", "java home directory")
  val TmpDir = SysProp("java.io.tmpdir", "java temporary directory")
  val User = SysProp("user.name", "system username")
  val UserDir = SysProp("user.dir", "user dir")
}

Source File: WarpPropertyLike.scala From warp-core with MIT License

5 votes

package com.workday.warp.common

import org.pmw.tinylog.Logger

import scala.reflect.runtime.universe
import scala.reflect.runtime.universe.{Mirror, ModuleSymbol, MethodSymbolApi, Type}


  def values[T <: WarpPropertyLike](`class`: Class[T]): Seq[PropertyEntry] = {
    Logger.debug(s"getting property values for ${`class`.getCanonicalName}")
    val mirror: Mirror = universe.runtimeMirror(`class`.getClassLoader)

    // concrete type of the property holder class
    // TODO don't read this as a static module, this breaks nested config objects, eg those defined within a class or method.
    val module: ModuleSymbol = mirror.staticModule(`class`.getCanonicalName)
    // we reflected this as a module (singleton), so get the single instance, and obtain a mirror for that instance.
    val instanceMirror = mirror.reflect(mirror.reflectModule(module).instance)

    // the `MODULE$` field holds all the members we are really interested in.
    module.info.members.find(_.name == universe.TermName("MODULE$")) match {
      case Some(member) => member.info.members.toSeq.collect {
        // retain only public accessor methods with the correct return type.
        // recall that scala vals are private fields with generated accessor methods.
        case method: MethodSymbolApi if method.isPublic && method.isAccessor && method.returnType =:= entryType =>
          instanceMirror.reflectMethod(method.asMethod)().asInstanceOf[PropertyEntry]
      }
      case None =>
        throw new RuntimeException(s"it appears that ${`class`.getCanonicalName} is not a scala object (does not have a MODULE$$ field)")
    }
  }
}

Source File: LocalKMeansModel.scala From spark-ml-serving with Apache License 2.0

5 votes

package io.hydrosphere.spark_ml_serving.clustering

import io.hydrosphere.spark_ml_serving.TypedTransformerConverter
import io.hydrosphere.spark_ml_serving.common._
import io.hydrosphere.spark_ml_serving.common.utils.DataUtils
import org.apache.spark.ml.clustering.KMeansModel
import org.apache.spark.mllib.clustering.{KMeansModel => OldKMeansModel}
import org.apache.spark.mllib.linalg.{Vector => MLlibVec}

import scala.reflect.runtime.universe

class LocalKMeansModel(override val sparkTransformer: KMeansModel)
  extends LocalTransformer[KMeansModel] {
  lazy val parent: OldKMeansModel = {
    val mirror     = universe.runtimeMirror(sparkTransformer.getClass.getClassLoader)
    val parentTerm = universe.typeOf[KMeansModel].decl(universe.TermName("parentModel")).asTerm
    mirror.reflect(sparkTransformer).reflectField(parentTerm).get.asInstanceOf[OldKMeansModel]
  }

  override def transform(localData: LocalData): LocalData = {
    import io.hydrosphere.spark_ml_serving.common.utils.DataUtils._

    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val newColumn = LocalDataColumn(
          sparkTransformer.getPredictionCol,
          column.data.mapToMlLibVectors.map(x => parent.predict(x))
        )
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalKMeansModel
  extends SimpleModelLoader[KMeansModel]
  with TypedTransformerConverter[KMeansModel] {

  override def build(metadata: Metadata, data: LocalData): KMeansModel = {
    val mapRows = data.toMapList
    val centers = mapRows map { row =>
      val vec = DataUtils.constructVector(row("clusterCenter").asInstanceOf[Map[String, Any]])
      org.apache.spark.mllib.linalg.Vectors.fromML(vec)
    }
    val parentConstructor = classOf[OldKMeansModel].getDeclaredConstructor(classOf[Array[MLlibVec]])
    parentConstructor.setAccessible(true)
    val mlk = parentConstructor.newInstance(centers.toArray)

    val constructor =
      classOf[KMeansModel].getDeclaredConstructor(classOf[String], classOf[OldKMeansModel])
    constructor.setAccessible(true)
    var inst = constructor
      .newInstance(metadata.uid, mlk)
      .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
      .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])

    inst = inst.set(inst.k, metadata.paramMap("k").asInstanceOf[Number].intValue())
    inst = inst.set(inst.initMode, metadata.paramMap("initMode").asInstanceOf[String])
    inst = inst.set(inst.maxIter, metadata.paramMap("maxIter").asInstanceOf[Number].intValue())
    inst = inst.set(inst.initSteps, metadata.paramMap("initSteps").asInstanceOf[Number].intValue())
    inst = inst.set(inst.seed, metadata.paramMap("seed").toString.toLong)
    inst = inst.set(inst.tol, metadata.paramMap("tol").asInstanceOf[Double])
    inst
  }
  override implicit def toLocal(transformer: KMeansModel) =
    new LocalKMeansModel(transformer)
}

Source File: ShapeConverter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer.converters

import com.intel.analytics.bigdl.tensor.TensorNumericMath
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, SerializeContext}
import com.intel.analytics.bigdl.utils.{MultiShape, SingleShape, Shape => BigDLShape}
import com.intel.analytics.bigdl.serialization.Bigdl
import com.intel.analytics.bigdl.serialization.Bigdl.Shape.ShapeType
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule, DataType, Shape}

import scala.collection.JavaConverters._
import scala.reflect.ClassTag
import scala.reflect.runtime.universe

object ShapeConverter extends DataConverter {
  override def getAttributeValue[T: ClassTag]
  (context: DeserializeContext, attribute: Bigdl.AttrValue)
  (implicit ev: TensorNumericMath.TensorNumeric[T]): AnyRef = {
    val shape = attribute.getShape
    toBigDLShape(shape)
  }

  private def toBigDLShape(shape : Shape): BigDLShape = {
    if (shape.getSsize == 0) {
      // null is mapped to empty shape on the serialization stage.
      return null
    }
    if (shape.getShapeType == ShapeType.SINGLE) {
      val shapeValues = shape.getShapeValueList.asScala.toList.map(_.intValue)
      SingleShape(shapeValues)
    } else if (shape.getShapeType == ShapeType.MULTI) {
      val shapes = shape.getShapeList.asScala.toList.map(toBigDLShape(_))
      MultiShape(shapes)
    } else {
      throw new RuntimeException(s"${shape.getShapeType} not supported for now")
    }
  }

  def shapeToBigDL[T: ClassTag](context: DeserializeContext,
      model: BigDLModule, name: String)
    (implicit ev: TensorNumericMath.TensorNumeric[T]): BigDLShape = {
    val attrbute = AttrValue.newBuilder
    attrbute.setShape(
      name match {
        case "input" => model.getInputShape
        case "output" => model.getOutputShape
      })
    ShapeConverter.getAttributeValue(context, attrbute.build).asInstanceOf[BigDLShape]
  }

  def shapeToProto[T: ClassTag](context: SerializeContext[T], shape: BigDLShape)
    (implicit ev: TensorNumericMath.TensorNumeric[T]): Shape = {
    val attribute = AttrValue.newBuilder
    ShapeConverter.setAttributeValue(context, attribute, shape,
      universe.typeOf[BigDLShape])
    attribute.getShape
  }

  override def setAttributeValue[T: ClassTag]
  (context: SerializeContext[T], attributeBuilder: AttrValue.Builder,
   value: Any, valueType: universe.Type)(implicit ev: TensorNumericMath.TensorNumeric[T]): Unit = {
    attributeBuilder.setDataType(DataType.SHAPE)
    if (value != null) {
      val shape = value.asInstanceOf[BigDLShape]
      val shapeBuilder = Shape.newBuilder
      setShape(shape, shapeBuilder)
      attributeBuilder.setShape(shapeBuilder.build)
    }
  }

  private def setShape(bigdlShape : BigDLShape, shapeBuilder : Shape.Builder): Unit = {
    if (bigdlShape.isInstanceOf[SingleShape]) {
      shapeBuilder.setShapeType(ShapeType.SINGLE)
      val shapes = bigdlShape.toSingle
      shapeBuilder.setSsize(shapes.size)
      shapes.foreach(shape => {
        shapeBuilder.addShapeValue(shape)
      })
    } else if (bigdlShape.isInstanceOf[MultiShape]) {
      shapeBuilder.setShapeType(ShapeType.MULTI)
      val shapes = bigdlShape.toMulti
      shapeBuilder.setSsize(shapes.size)
      shapes.foreach(shape => {
        val subShapeBuilder = Shape.newBuilder
        setShape(shape, subShapeBuilder)
        shapeBuilder.addShape(subShapeBuilder.build)
      })
    } else {
      throw new RuntimeException(s"${bigdlShape} type not supported !")
    }
  }
}

Source File: DataFormatConverter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer.converters

import com.intel.analytics.bigdl.nn.abstractnn.DataFormat
import com.intel.analytics.bigdl.nn.abstractnn.DataFormat.{NCHW, NHWC}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, SerializeContext}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, DataType, InputDataFormat}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe


object DataFormatConverter extends DataConverter {
  override def getAttributeValue[T: ClassTag](context: DeserializeContext, attribute: AttrValue)
                                             (implicit ev: TensorNumeric[T]): AnyRef = {
    val dataFormat = attribute.getDataFormatValue
    dataFormat match {
      case InputDataFormat.NCHW => NCHW
      case InputDataFormat.NHWC => NHWC
    }

  }

  override def setAttributeValue[T: ClassTag]
  (context: SerializeContext[T],
   attributeBuilder: AttrValue.Builder, value: Any, valueType: universe.Type)
  (implicit ev: TensorNumeric[T]): Unit = {
    attributeBuilder.setDataType(DataType.DATA_FORMAT)
    if (value != null) {
      val dataFormat = value.asInstanceOf[DataFormat]
      val inputFormat = dataFormat match {
        case NCHW => InputDataFormat.NCHW
        case NHWC => InputDataFormat.NHWC
      }
      attributeBuilder.setDataFormatValue(inputFormat)
    }
  }
}

Source File: VariableFormatConverter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer.converters

import com.intel.analytics.bigdl.nn.VariableFormat
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, SerializeContext}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, DataType, VarFormat}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe


object VariableFormatConverter extends DataConverter {

  override def getAttributeValue[T: ClassTag](context: DeserializeContext, attribute: AttrValue)
                                             (implicit ev: TensorNumeric[T]): AnyRef = {
    val format = attribute.getVariableFormatValue
    format match {
      case VarFormat.DEFAULT => VariableFormat.Default
      case VarFormat.ONE_D => VariableFormat.ONE_D
      case VarFormat.IN_OUT => VariableFormat.IN_OUT
      case VarFormat.OUT_IN => VariableFormat.OUT_IN
      case VarFormat.IN_OUT_KW_KH => VariableFormat.IN_OUT_KW_KH
      case VarFormat.OUT_IN_KW_KH => VariableFormat.OUT_IN_KW_KH
      case VarFormat.GP_OUT_IN_KW_KH => VariableFormat.GP_OUT_IN_KW_KH
      case VarFormat.GP_IN_OUT_KW_KH => VariableFormat.GP_IN_OUT_KW_KH
      case VarFormat.OUT_IN_KT_KH_KW => VariableFormat.OUT_IN_KT_KH_KW
      case VarFormat.EMPTY_FORMAT => null
    }
  }

  override def setAttributeValue[T: ClassTag](
    context: SerializeContext[T], attributeBuilder: AttrValue.Builder,
    value: Any, valueType: universe.Type = null)(implicit ev: TensorNumeric[T]): Unit = {
    attributeBuilder.setDataType(DataType.VARIABLE_FORMAT)
    if (value != null) {
      val format = value.asInstanceOf[VariableFormat]
      val formatValue = format match {
        case VariableFormat.Default => VarFormat.DEFAULT
        case VariableFormat.ONE_D => VarFormat.ONE_D
        case VariableFormat.IN_OUT => VarFormat.IN_OUT
        case VariableFormat.OUT_IN => VarFormat.OUT_IN
        case VariableFormat.IN_OUT_KW_KH => VarFormat.IN_OUT_KW_KH
        case VariableFormat.OUT_IN_KW_KH => VarFormat.OUT_IN_KW_KH
        case VariableFormat.GP_OUT_IN_KW_KH => VarFormat.GP_OUT_IN_KW_KH
        case VariableFormat.GP_IN_OUT_KW_KH => VarFormat.GP_IN_OUT_KW_KH
        case VariableFormat.OUT_IN_KT_KH_KW => VarFormat.OUT_IN_KT_KH_KW
      }
      attributeBuilder.setVariableFormatValue(formatValue)
    } else {
      attributeBuilder.setVariableFormatValue(VarFormat.EMPTY_FORMAT)
    }
  }
}

Source File: ModuleConverter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer.converters

import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, ModuleData, ModuleSerializer, SerializeContext}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, DataType}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe



object ModuleConverter extends DataConverter {

  override def getAttributeValue[T: ClassTag](context: DeserializeContext, attribute: AttrValue)
                                             (implicit ev: TensorNumeric[T]): AnyRef = {
    val serializedModule = attribute.getBigDLModuleValue
    if (serializedModule.getModuleType != null && serializedModule.getModuleType != "") {
      ModuleSerializer.load(DeserializeContext(serializedModule,
        context.storages, context.storageType)).module
    } else {
      null
    }
  }

  override def setAttributeValue[T: ClassTag](context: SerializeContext[T],
    attributeBuilder: AttrValue.Builder,
    value: Any, valueType: universe.Type = null)(implicit ev: TensorNumeric[T]): Unit = {
    attributeBuilder.setDataType(DataType.MODULE)
    if (value != null) {
      val module = value.asInstanceOf[AbstractModule[Activity, Activity, T]]
      val serializableModule = ModuleSerializer.
        serialize(SerializeContext(ModuleData(module, Seq[String](), Seq[String]()),
          context.storages, context.storageType)).bigDLModule
      attributeBuilder.setBigDLModuleValue(serializableModule)
    }
  }
}

Source File: InitMethodConverter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer.converters

import com.intel.analytics.bigdl.nn._
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, SerializeContext}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, DataType, InitMethod, InitMethodType}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe


object InitMethodConverter extends DataConverter {

  override def getAttributeValue[T: ClassTag](context: DeserializeContext, attribute: AttrValue)
                                             (implicit ev: TensorNumeric[T]): AnyRef = {
    val initMemethod = attribute.getInitMethodValue
    val initType = initMemethod.getMethodType
    val methodData = initMemethod.getDataList
    initType match {
      case InitMethodType.RANDOM_UNIFORM => RandomUniform
      case InitMethodType.RANDOM_UNIFORM_PARAM =>
        RandomUniform(methodData.get(0), methodData.get(1))
      case InitMethodType.RANDOM_NORMAL =>
        RandomNormal(methodData.get(0), methodData.get(1))
      case InitMethodType.ZEROS => Zeros
      case InitMethodType.ONES => Ones
      case InitMethodType.CONST => ConstInitMethod(methodData.get(0))
      case InitMethodType.XAVIER => Xavier
      case InitMethodType.BILINEARFILLER => BilinearFiller
      case InitMethodType.EMPTY_INITIALIZATION => null
    }
  }

  override def setAttributeValue[T: ClassTag](
    context: SerializeContext[T], attributeBuilder: AttrValue.Builder,
    value: Any, valueType: universe.Type = null)(implicit ev: TensorNumeric[T]): Unit = {
    attributeBuilder.setDataType(DataType.INITMETHOD)
    val initMethodBuilder = InitMethod.newBuilder
    if (value != null) {
      val initMethod = value.asInstanceOf[InitializationMethod]
      initMethod match {
        case RandomUniform =>
          initMethodBuilder.setMethodType(InitMethodType.RANDOM_UNIFORM)
        case ru: RandomUniform =>
          initMethodBuilder.setMethodType(InitMethodType.RANDOM_UNIFORM_PARAM)
          initMethodBuilder.addData(ru.lower)
          initMethodBuilder.addData(ru.upper)
        case rm: RandomNormal =>
          initMethodBuilder.setMethodType(InitMethodType.RANDOM_NORMAL)
          initMethodBuilder.addData(rm.mean)
          initMethodBuilder.addData(rm.stdv)
        case Zeros =>
          initMethodBuilder.setMethodType(InitMethodType.ZEROS)
        case Ones =>
          initMethodBuilder.setMethodType(InitMethodType.ONES)
        case const: ConstInitMethod =>
          initMethodBuilder.setMethodType(InitMethodType.CONST)
          initMethodBuilder.addData(const.value)
        case Xavier =>
          initMethodBuilder.setMethodType(InitMethodType.XAVIER)
        case BilinearFiller =>
          initMethodBuilder.setMethodType(InitMethodType.BILINEARFILLER)
      }
      attributeBuilder.setInitMethodValue(initMethodBuilder.build)
    } else {
      initMethodBuilder.setMethodType(InitMethodType.EMPTY_INITIALIZATION)
      attributeBuilder.setInitMethodValue(initMethodBuilder.build)
    }
  }
}

Source File: RegularizerConverter.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer.converters

import com.intel.analytics.bigdl.optim.{L1L2Regularizer, L1Regularizer, L2Regularizer}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, SerializeContext}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, DataType, RegularizerType, Regularizer => SerializeRegularizer}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe



object RegularizerConverter extends DataConverter {

  override def getAttributeValue[T : ClassTag](context: DeserializeContext,
                                               attribute: AttrValue)
                                              (implicit ev: TensorNumeric[T]): AnyRef = {
    val regularizer = attribute.getRegularizerValue
    val regularizerType = regularizer.getRegularizerType
    if (regularizer.getRegularDataCount == 0) {
      return null
    }
    regularizerType match {
      case RegularizerType.L1Regularizer =>
        val l1 = regularizer.getRegularDataList.get(0)
        L1Regularizer[T](l1)
      case RegularizerType.L2Regularizer =>
        val l2 = regularizer.getRegularDataList.get(1)
        L2Regularizer[T](l2)
      case RegularizerType.L1L2Regularizer =>
        val l1 = regularizer.getRegularDataList.get(0)
        val l2 = regularizer.getRegularDataList.get(1)
        L1L2Regularizer[T](l1, l2)
    }
  }

  override def setAttributeValue[T : ClassTag]
  (context: SerializeContext[T], attributeBuilder: AttrValue.Builder, value: Any,
   valueType : universe.Type = null)
  (implicit ev: TensorNumeric[T]): Unit = {
    attributeBuilder.setDataType(DataType.REGULARIZER)
    if (value != null) {
      var regularizerBuilder = SerializeRegularizer.newBuilder
      val regularizer = value.asInstanceOf[L1L2Regularizer[T]]
      val l1 = regularizer.l1
      val l2 = regularizer.l2
      regularizerBuilder.addRegularData(l1)
      regularizerBuilder.addRegularData(l2)
      val regularizerType = regularizer match {
        case l1: L1Regularizer[_] => RegularizerType.L1Regularizer
        case l2: L2Regularizer[_] => RegularizerType.L2Regularizer
        case l1l2: L1L2Regularizer[_] => RegularizerType.L1L2Regularizer
      }
      regularizerBuilder.setRegularizerType(regularizerType)
      attributeBuilder.setRegularizerValue(regularizerBuilder.build)
    }
  }

}

Source File: SpatialDilatedConvolution.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.nn.quantized

import com.intel.analytics.bigdl.nn.abstractnn.DataFormat
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.{FloatType, QuantizedTensor, Tensor}
import com.intel.analytics.bigdl.utils.serializer.converters.DataConverter
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, ModuleData, SerializeContext}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe

@SerialVersionUID(- 8572055756810843156L)
private[bigdl] class SpatialDilatedConvolution[T: ClassTag](
  nInputPlane: Int, // The number of expected input planes in the image given into forward()
  nOutputPlane: Int, // The number of output planes the convolution layer will produce.
  kernelW: Int, // The kernel width of the convolution
  kernelH: Int, // The kernel height of the convolution
  strideW: Int = 1, // The step of the convolution in the width dimension.
  strideH: Int = 1, // The step of the convolution in the height dimension
  padW: Int = 0, // The additional zeros added per width to the input planes.
  padH: Int = 0, // The additional zeros added per height to the input planes.
  val dilationW: Int = 1,
  val dilationH: Int = 1,
  format: DataFormat = DataFormat.NCHW
)(implicit ev: TensorNumeric[T]) extends SpatialConvolution[T](
  nInputPlane,
  nOutputPlane,
  kernelW,
  kernelH,
  strideW,
  strideH,
  padW,
  padH,
  format = format
) {
  override val dilationWidth: Int = dilationW
  override val dilationHeight: Int = dilationH

  override def toString(): String = {
    s"quantized.SpatialDilatedConvolution($nInputPlane -> $nOutputPlane, $kernelW x" +
      s" $kernelH, $strideW, $strideH, $padW, $padH, $dilationW, $dilationH)"
  }
}

object SpatialDilatedConvolution extends QuantSerializer {
  def apply[T: ClassTag](
    nInputPlane: Int,
    nOutputPlane: Int,
    kW: Int,
    kH: Int,
    dW: Int = 1,
    dH: Int = 1,
    padW: Int = 0,
    padH: Int = 0,
    dilationW: Int = 1,
    dilationH: Int = 1,
    initWeight: Tensor[T] = null,
    initBias: Tensor[T] = null,
    format: DataFormat = DataFormat.NCHW
  )(implicit ev: TensorNumeric[T]) : SpatialDilatedConvolution[T] = {
    val conv = new SpatialDilatedConvolution[T](nInputPlane, nOutputPlane, kW, kH, dW, dH,
      padW, padH, dilationW, dilationH, format = format)
    conv.initWeightAndBias(initWeight, initBias)
  }

  override def serializeWeight[T: ClassTag](context: SerializeContext[T],
    modelBuilder: BigDLModule.Builder)(implicit ev: TensorNumeric[T]): Unit = {
    val module = context.moduleData.module
    val conv = module.asInstanceOf[SpatialDilatedConvolution[T]]
    val weightBuilder = AttrValue.newBuilder
    ev.getType() match {
      case FloatType =>
        DataConverter.setAttributeValue(context, weightBuilder, conv.weight,
          universe.typeOf[Array[Tensor[Float]]])
      case _ => throw new UnsupportedOperationException(s"Only support Float for quantized model")
    }
    modelBuilder.putAttr("weights", weightBuilder.build)
  }

  override def loadWeight[T: ClassTag](context: DeserializeContext,
    moduleData: ModuleData[T])(implicit ev: TensorNumeric[T]): Unit = {
    val conv = moduleData.module.asInstanceOf[SpatialDilatedConvolution[T]]
    val attrMap = context.bigdlModule.getAttrMap
    val weights = DataConverter.getAttributeValue(context, attrMap.get("weights"))
      .asInstanceOf[Array[Tensor[T]]]
    for (i <- 0 until conv.weight.length) {
      conv.weight(i).asInstanceOf[QuantizedTensor[T]].release()
      conv.weight(i).set(weights(i))
    }
  }
}

Source File: RandomUniform.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.nn.ops

import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor._
import com.intel.analytics.bigdl.utils.RandomGenerator
import com.intel.analytics.bigdl.utils.serializer.converters.DataConverter
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, ModuleSerializable, SerializeContext}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe

private[bigdl] trait RandomNode

class RandomUniform[T: ClassTag, D: ClassTag](
  val minVal: Double, val maxVal: Double, val seed: Option[Int] = None
)(implicit ev: TensorNumeric[T], ev2: TensorNumeric[D])
  extends Operation[Tensor[Int], Tensor[D], T] with RandomNode {

  if (seed.isDefined) {
    RandomGenerator.RNG.setSeed(seed.get)
  }

  output = Activity.allocate[Tensor[D], D]()

  override def updateOutput(input: Tensor[Int]): Tensor[D] = {
    require(input.nDimension() == 1, "the shape should be a one-dimensional tensor.")

    val shape = input.storage().toArray
    output.resize(shape).rand(
      minVal,
      maxVal)

    output
  }

  override def getClassTagNumerics() : (Array[ClassTag[_]], Array[TensorNumeric[_]]) = {
    (Array[ClassTag[_]](scala.reflect.classTag[T], scala.reflect.classTag[D]),
      Array[TensorNumeric[_]](ev, ev2))
  }
}

object RandomUniform extends ModuleSerializable {
  def apply[T: ClassTag, D: ClassTag](
                                       minVal: Double,
                                       maxVal: Double,
                                       seed: Option[Int] = None)
                                     (implicit ev: TensorNumeric[T], ev2: TensorNumeric[D]):
  Operation[Activity, Activity, T]
  = ModuleToOperation[T](new RandomUniform[T, D](minVal, maxVal, seed))

  override def doSerializeModule[T: ClassTag](context: SerializeContext[T],
    bigDLModelBuilder: BigDLModule.Builder)(implicit ev: TensorNumeric[T]): Unit = {
    val randomUniform = context.moduleData.module.asInstanceOf[RandomUniform[T, _]]

    val minValBuilder = AttrValue.newBuilder
    DataConverter.setAttributeValue(context, minValBuilder, randomUniform.minVal,
      universe.typeOf[Double])
    bigDLModelBuilder.putAttr("minVal", minValBuilder.build)

    val maxValBuilder = AttrValue.newBuilder
    DataConverter.setAttributeValue(context, maxValBuilder, randomUniform.maxVal,
      universe.typeOf[Double])
    bigDLModelBuilder.putAttr("maxVal", maxValBuilder.build)

    if (randomUniform.seed.isDefined) {
      val seedBuilder = AttrValue.newBuilder
      DataConverter.setAttributeValue(context, seedBuilder, randomUniform.seed.get,
        universe.typeOf[Int])
      bigDLModelBuilder.putAttr("seed", seedBuilder.build)
    }
  }

  override def doLoadModule[T: ClassTag](context: DeserializeContext)
    (implicit ev: TensorNumeric[T]): AbstractModule[Activity, Activity, T] = {
    val attrMap = context.bigdlModule.getAttrMap
    val minVal = attrMap.get("minVal").getDoubleValue
    val maxVal = attrMap.get("maxVal").getDoubleValue
    var seed : Option[Int] = None
    if (attrMap.containsKey("seed")) {
      seed = Option[Int](attrMap.get("seed").getInt32Value)
    }
    RandomUniform(minVal, maxVal, seed)
  }
}

Source File: SimpleScalaRiakDataframesExample.scala From spark-riak-connector with Apache License 2.0

5 votes

package com.basho.riak.spark.examples.dataframes

import com.basho.riak.client.core.query.indexes.LongIntIndex
import com.basho.riak.client.core.query.Namespace
import com.basho.riak.spark._
import com.basho.riak.spark.util.RiakObjectConversionUtil
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.reflect.runtime.universe
import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
import scala.util.{ Failure, Success }
import com.basho.riak.client.core.query.RiakObject
import com.basho.riak.client.api.RiakClient
import com.basho.riak.client.core.query.Location
import com.basho.riak.spark.rdd.RiakFunctions


object SimpleScalaRiakDataframesExample {
  private val bucketName = "users"

  case class UserData(user_id: String, name: String, age: Int, category: String)

  val testData = Seq(
    UserData("u1", "Ben", 23, "CategoryA"),
    UserData("u2", "Clair", 19, "CategoryB"),
    UserData("u3", "John", 21, null),
    UserData("u4", "Chris", 50, "Categoryc"),
    UserData("u5", "Mary", 15, "CategoryB"),
    UserData("u6", "George", 31, "CategoryC")
  )

  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("Riak Spark Dataframes Example")

    setSparkOpt(sparkConf, "spark.master", "local")
    setSparkOpt(sparkConf, "spark.riak.connection.host", "127.0.0.1:8087")

    val sc = new SparkContext(sparkConf)

    // Work with clear bucket
    clearBucket(sparkConf)

    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    // To enable toDF()
    import sqlContext.implicits._

    println(s" Saving data to Riak: \n ${println(testData)}")

    // Save test data from json file to riak bucket
    val inputRDD = sc.parallelize(testData).map {
      line =>
        val obj = RiakObjectConversionUtil.to(line)
        // RiakObjectConversionUtil.to() sets content type to text/plain if String is passed
        // Overwriting content type to application/json will allow automatic conversion to 
        // User defined type when reading from Riak
        obj.setContentType("application/json")
        obj
    }.saveToRiak(bucketName)

    // Read from Riak with UDT to enable schema inference using reflection
    val df = sc.riakBucket[UserData](bucketName).queryAll.toDF

    println(s"Dataframe from Riak query: \n ${df.show()}")

    df.registerTempTable("users")

    println("count by category")
    df.groupBy("category").count.show

    println("sort by num of letters")
    // Register user defined function
    sqlContext.udf.register("stringLength", (s: String) => s.length)
    sqlContext.sql("select user_id, name, stringLength(name) nameLength from users order by nameLength").show

    println("filter age >= 21")
    sqlContext.sql("select * from users where age >= 21").show

  }

  private def clearBucket(sparkConf: SparkConf): Unit = {
    val rf = RiakFunctions(sparkConf)
    rf.withRiakDo(session => {
      rf.resetAndEmptyBucketByName(bucketName)
    })
  }

  private def setSparkOpt(sparkConf: SparkConf, option: String, defaultOptVal: String): SparkConf = {
    val optval = sparkConf.getOption(option).getOrElse(defaultOptVal)
    sparkConf.set(option, optval)
  }

}

Source File: SparkDataframesTest.scala From spark-riak-connector with Apache License 2.0

5 votes

package com.basho.riak.spark.rdd

import scala.reflect.runtime.universe
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SQLContext
import org.junit.Assert._
import org.junit.{ Before, Test }
import com.basho.riak.spark.toSparkContextFunctions
import org.junit.experimental.categories.Category

case class TestData(id: String, name: String, age: Int, category: String)

@Category(Array(classOf[RiakTSTests]))
class SparkDataframesTest extends AbstractRiakSparkTest {

  private val indexName = "creationNo"

  protected override val jsonData = Some(
    """[
      |   {key: 'key1', value: {id: 'u1', name: 'Ben', age: 20, category: 'CategoryA'}},
      |   {key: 'key2', value: {id: 'u2', name: 'Clair', age: 30, category: 'CategoryB'}},
      |   {key: 'key3', value: {id: 'u3', name: 'John', age: 70}},
      |   {key: 'key4', value: {id: 'u4', name: 'Chris', age: 10, category: 'CategoryC'}},
      |   {key: 'key5', value: {id: 'u5', name: 'Mary', age: 40, category: 'CategoryB'}},
      |   {key: 'key6', value: {id: 'u6', name: 'George', age: 50, category: 'CategoryC'}}
      |]""".stripMargin)

  protected override def initSparkConf() = super.initSparkConf().setAppName("Dataframes Test")

  var sqlContextHolder: SQLContext = _
  var df: DataFrame = _

  @Before
  def initializeDF(): Unit = {
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    import sqlContext.implicits._
    sqlContextHolder = sqlContext
    df = sc.riakBucket[TestData](DEFAULT_NAMESPACE.getBucketNameAsString)
      .queryAll().toDF
    df.registerTempTable("test")
  }

  @Test
  def schemaTest(): Unit = {
    df.printSchema()
    val schema = df.schema.map(_.name).toList
    val fields = universe.typeOf[TestData].members.withFilter(!_.isMethod).map(_.name.toString.trim).toList
    assertEquals(schema.sorted, fields.sorted)
  }

  @Test
  def sqlQueryTest(): Unit = {
    val sqlResult = sqlContextHolder.sql("select * from test where category >= 'CategoryC'").toJSON.collect
    val expected =
      """ [
        |   {id:'u4',name:'Chris',age:10,category:'CategoryC'},
        |   {id:'u6',name:'George',age:50,category:'CategoryC'}
        | ]""".stripMargin
    assertEqualsUsingJSONIgnoreOrder(expected, stringify(sqlResult))
  }

  @Test
  def udfTest(): Unit = {
    sqlContextHolder.udf.register("stringLength", (s: String) => s.length)
    val udf = sqlContextHolder.sql("select name, stringLength(name) strLgth from test order by strLgth, name").toJSON.collect
    val expected =
      """ [
        |   {name:'Ben',strLgth:3},
        |   {name:'John',strLgth:4},
        |   {name:'Mary',strLgth:4},
        |   {name:'Chris',strLgth:5},
        |   {name:'Clair',strLgth:5},
        |   {name:'George',strLgth:6}
        | ]""".stripMargin
    assertEqualsUsingJSON(expected, stringify(udf))
  }

  @Test
  def grouppingTest(): Unit = {
    val groupped = df.groupBy("category").count.toJSON.collect
    val expected =
      """ [
        |   {category:'CategoryA',count:1},
        |   {category:'CategoryB',count:2},
        |   {category:'CategoryC',count:2},
        |   {count:1}
        | ]""".stripMargin
    assertEqualsUsingJSONIgnoreOrder(expected, stringify(groupped))
  }

  @Test
  def sqlVsFilterTest(): Unit = {
    val sql = sqlContextHolder.sql("select id, name from test where age >= 50").toJSON.collect
    val filtered = df.where(df("age") >= 50).select("id", "name").toJSON.collect
    assertEqualsUsingJSONIgnoreOrder(stringify(sql), stringify(filtered))
  }

}

Source File: LocalLDAModel.scala From spark-ml-serving with Apache License 2.0

5 votes

package io.hydrosphere.spark_ml_serving.clustering

import io.hydrosphere.spark_ml_serving.TypedTransformerConverter
import io.hydrosphere.spark_ml_serving.common._
import io.hydrosphere.spark_ml_serving.common.utils.{DataUtils, ParamUtils}
import org.apache.spark.ml.clustering.{LocalLDAModel => SparkLocalLDA}
import org.apache.spark.mllib.clustering.{LocalLDAModel => OldSparkLocalLDA}
import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
import org.apache.spark.sql.SparkSession
import DataUtils._
import scala.reflect.runtime.universe

class LocalLDAModel(override val sparkTransformer: SparkLocalLDA)
  extends LocalTransformer[SparkLocalLDA] {

  lazy val oldModel: OldSparkLocalLDA = {
    val mirror     = universe.runtimeMirror(sparkTransformer.getClass.getClassLoader)
    val parentTerm = universe.typeOf[SparkLocalLDA].decl(universe.TermName("oldLocalModel")).asTerm
    mirror.reflect(sparkTransformer).reflectField(parentTerm).get.asInstanceOf[OldSparkLocalLDA]
  }

  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val newData = column.data.mapToMlLibVectors.map(oldModel.topicDistribution(_).toList)
        localData.withColumn(
          LocalDataColumn(
            sparkTransformer.getTopicDistributionCol,
            newData
          )
        )
      case None => localData
    }
  }
}

object LocalLDAModel
  extends SimpleModelLoader[SparkLocalLDA]
  with TypedTransformerConverter[SparkLocalLDA] {

  override def build(metadata: Metadata, data: LocalData): SparkLocalLDA = {
    val topics = DataUtils.constructMatrix(
      data.column("topicsMatrix").get.data.head.asInstanceOf[Map[String, Any]]
    )
    val gammaShape = data.column("gammaShape").get.data.head.asInstanceOf[java.lang.Double]
    val topicConcentration =
      data.column("topicConcentration").get.data.head.asInstanceOf[java.lang.Double]
    val docConcentration = DataUtils.constructVector(
      data.column("docConcentration").get.data.head.asInstanceOf[Map[String, Any]]
    )
    val vocabSize = data.column("vocabSize").get.data.head.asInstanceOf[java.lang.Integer]

    val oldLdaCtor = classOf[OldSparkLocalLDA].getDeclaredConstructor(
      classOf[Matrix],
      classOf[Vector],
      classOf[Double],
      classOf[Double]
    )
    val oldLDA = oldLdaCtor.newInstance(
      Matrices.fromML(topics),
      Vectors.fromML(docConcentration),
      topicConcentration,
      gammaShape
    )

    val ldaCtor = classOf[SparkLocalLDA].getDeclaredConstructor(
      classOf[String],
      classOf[Int],
      classOf[OldSparkLocalLDA],
      classOf[SparkSession]
    )

    val lda = ldaCtor.newInstance(metadata.uid, vocabSize, oldLDA, null)

    ParamUtils.set(lda, lda.optimizer, metadata)
    ParamUtils.set(lda, lda.keepLastCheckpoint, metadata)
    ParamUtils.set(lda, lda.seed, metadata)
    ParamUtils.set(lda, lda.featuresCol, metadata)
    ParamUtils.set(lda, lda.learningDecay, metadata)
    ParamUtils.set(lda, lda.checkpointInterval, metadata)
    ParamUtils.set(lda, lda.learningOffset, metadata)
    ParamUtils.set(lda, lda.maxIter, metadata)
    ParamUtils.set(lda, lda.k, metadata)
    lda
  }

  override implicit def toLocal(sparkTransformer: SparkLocalLDA): LocalTransformer[SparkLocalLDA] =
    new LocalLDAModel(sparkTransformer)

}

Source File: HBaseCredentialProvider.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.deploy.yarn.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging

private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def credentialsRequired(hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
}

Source File: PumpedClass.scala From spark-ml-serving with Apache License 2.0

5 votes

package io.hydrosphere.spark_ml_serving.common.utils

import org.apache.spark.ml.Transformer

import scala.reflect.runtime.universe

class PumpedClass(classz: Class[_]) {
  def companion: Any = {
    val companionClassName = classz.getName + "$"
    val companionClass     = Class.forName(companionClassName)
    val moduleField        = companionClass.getField("MODULE$")
    moduleField.get(null)
  }
}

object PumpedClass {
  def companionFromClassName(className: String): Any = {
    val runtimeMirror = universe.runtimeMirror(this.getClass.getClassLoader)

    val module = runtimeMirror.staticModule(className + "$")
    val obj    = runtimeMirror.reflectModule(module)
    obj.instance
  }
}

Source File: L8-1DataFrameAPI.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions.desc
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object CdrDataframeApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrDataframeApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()

        cdrs.groupBy("countryCode").count().orderBy(desc("count")).show(5)
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: L8-3-6-7DataFrameCreation.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions.desc
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.json4s.native.Serialization.write
import org.json4s.DefaultFormats

object DataframeCreationApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrDataframeApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        //val cdrs = sqlC.createDataFrame(seqToCdr(rdd))
        //val cdrs = sqlC.createDataFrame(seqToCdr(rdd).collect())
        //val cdrs = seqToCdr(rdd).toDF()
        val cdrsJson = seqToCdr(rdd).map(r => {
          implicit val formats = DefaultFormats
          write(r)
        })
        val cdrs = sqlC.read.json(cdrsJson)

        cdrs.groupBy("countryCode").count().orderBy(desc("count")).show(5)
      })

    ssc.start()
    ssc.awaitTermination()

  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: L8-29DataFrameExamplesJoin.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.json4s.DefaultFormats
import org.json4s.JDouble
import org.json4s.JObject
import org.json4s.jvalue2extractable
import org.json4s.jvalue2monadic
import org.json4s.native.JsonMethods.compact
import org.json4s.native.JsonMethods.parse
import org.json4s.native.JsonMethods.render
import org.json4s.string2JsonInput

object CdrDataframeExamples3App {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 5) {
      System.err.println(
        "Usage: CdrDataframeExamples3App <appname> <batchInterval> <hostname> <port> <gridJsonPath>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port, gridJsonPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._
    implicit val formats = DefaultFormats

    val gridFile = scala.io.Source.fromFile(gridJsonPath).mkString
    val gridGeo = (parse(gridFile) \ "features")
    val gridStr = gridGeo.children.map(r => {
      val c = (r \ "geometry" \ "coordinates").extract[List[List[List[Float]]]].flatten.flatten.map(r => JDouble(r))
      val l = List(("id", r \ "id"), ("x1", c(0)), ("y1", c(1)), ("x2", c(2)), ("y2", c(3)),
        ("x3", c(4)), ("y3", c(5)), ("x4", c(6)), ("y4", c(7)))
      compact(render(JObject(l)))
    })

    val gridDF = sqlC.read.json(ssc.sparkContext.makeRDD(gridStr))

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()
        cdrs.join(gridDF, $"squareId" === $"id").show()
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: L8-38SparkR.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import java.nio.file.Paths
import org.apache.spark.SparkFiles

object CdrStreamingSparkRApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 7) {
      System.err.println(
        "Usage: CdrStreamingSparkRApp <appname> <batchInterval> <hostname> <port> <tableName> <RScriptPath> <RScriptLogsPath>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port, tableName, rScriptPath, logsPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val cl = Thread.currentThread().getContextClassLoader()
    val hiveC = new HiveContext(ssc.sparkContext)
    Thread.currentThread().setContextClassLoader(cl)

    import hiveC.implicits._

    ssc.sparkContext.addFile(rScriptPath)
    val rScriptName = SparkFiles.get(Paths.get(rScriptPath).getFileName.toString)
    val master = hiveC.sparkContext.getConf.get("spark.master")

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD((rdd, time) => {
        val iTableName = tableName + time.milliseconds
        seqToCdr(rdd).toDF().write.saveAsTable(iTableName)
        hiveC.sparkContext.parallelize(Array(iTableName)).pipe("%s %s".format(rScriptName, master)).saveAsTextFile(Paths.get(logsPath, iTableName).toString)
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: T8-5-L8-30-34DataFrameExamplesActions.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.functions.desc
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apress.prospark.CdrDataframeExamplesActionsApp.Cdr
import org.json4s.DefaultFormats

object CdrDataframeExamplesActionsApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrDataframeExamplesActionsApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val cl = Thread.currentThread().getContextClassLoader()
    val hiveC = new HiveContext(ssc.sparkContext)
    Thread.currentThread().setContextClassLoader(cl)
    import hiveC.implicits._
    implicit val formats = DefaultFormats

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()

        val counts = cdrs.groupBy("countryCode").count().orderBy(desc("count"))
        counts.show(5)
        counts.show()
        println("head(5): " + counts.head(5))
        println("take(5): " + counts.take(5))
        println("head(): " + counts.head())
        println("first(5): " + counts.first())
        println("count(): " + counts.count())
        println("collect(): " + counts.collect())
        println("collectAsList(): " + counts.collectAsList())
        println("describe(): " + cdrs.describe("smsInActivity", "smsOutActivity", "callInActivity", "callOutActivity", "internetTrafficActivity").show())
        counts.write.format("parquet").save("/tmp/parquent" + rdd.id)
        counts.write.format("json").save("/tmp/json" + rdd.id)
        counts.write.parquet("/tmp/parquent2" + rdd.id)
        counts.write.json("/tmp/json2" + rdd.id)
        counts.write.saveAsTable("count_table")
        cdrs.groupBy("countryCode").count().orderBy(desc("count")).write.mode(SaveMode.Append).save("/tmp/counts")
        val prop: java.util.Properties = new java.util.Properties()
        counts.write.jdbc("jdbc:mysql://hostname:port/cdrsdb", "count_table", prop)
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: L8-10-11UDF.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.io.Source
import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.json4s.jackson.JsonMethods.parse
import org.json4s.jvalue2extractable
import org.json4s.string2JsonInput

object CdrUDFApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrUDFApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    def getCountryCodeMapping() = {
      implicit val formats = org.json4s.DefaultFormats
      parse(Source.fromURL("http://country.io/phone.json").mkString).extract[Map[String, String]].map(_.swap)
    }

    def getCountryNameMapping() = {
      implicit val formats = org.json4s.DefaultFormats
      parse(Source.fromURL("http://country.io/names.json").mkString).extract[Map[String, String]]
    }

    def getCountryName(mappingPhone: Map[String, String], mappingName: Map[String, String], code: Int) = {
      mappingName.getOrElse(mappingPhone.getOrElse(code.toString, "NotFound"), "NotFound")
    }

    val getCountryNamePartial = getCountryName(getCountryCodeMapping(), getCountryNameMapping(), _: Int)

    sqlC.udf.register("getCountryNamePartial", getCountryNamePartial)

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()
        cdrs.registerTempTable("cdrs")

        sqlC.sql("SELECT getCountryNamePartial(countryCode) AS countryName, COUNT(countryCode) AS cCount FROM cdrs GROUP BY countryCode ORDER BY cCount DESC LIMIT 5").show()

      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }

}

Source File: L8-14-27DataFrameExamples.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions._
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object CdrDataframeExamplesApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrDataframeExamplesApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()

        cdrs.select("squareId", "timeInterval", "countryCode").show()
        cdrs.select($"squareId", $"timeInterval", $"countryCode").show()
        cdrs.filter("squareId = 5").show()
        cdrs.drop("countryCode").show()
        cdrs.select($"squareId", $"timeInterval", $"countryCode").where($"squareId" === 5).show()
        cdrs.limit(5).show()
        cdrs.groupBy("squareId").count().show()
        cdrs.groupBy("countryCode").avg("internetTrafficActivity").show()
        cdrs.groupBy("countryCode").max("callOutActivity").show()
        cdrs.groupBy("countryCode").min("callOutActivity").show()
        cdrs.groupBy("squareId").sum("internetTrafficActivity").show()
        cdrs.groupBy("squareId").agg(sum("callOutActivity"), sum("callInActivity"), sum("smsOutActivity"), sum("smsInActivity"), sum("internetTrafficActivity")).show()
        cdrs.groupBy("countryCode").sum("internetTrafficActivity").orderBy(desc("SUM(internetTrafficActivity)")).show()
        cdrs.agg(sum("callOutActivity"), sum("callInActivity"), sum("smsOutActivity"), sum("smsInActivity"), sum("internetTrafficActivity")).show()
        cdrs.rollup("squareId", "countryCode").count().orderBy(desc("squareId"), desc("countryCode")).rdd.saveAsTextFile("/tmp/rollup" + rdd.hashCode())
        cdrs.cube("squareId", "countryCode").count().orderBy(desc("squareId"), desc("countryCode")).rdd.saveAsTextFile("/tmp/cube" + rdd.hashCode())
        cdrs.dropDuplicates(Array("callOutActivity", "callInActivity")).show()
        cdrs.select("squareId", "countryCode", "internetTrafficActivity").distinct.show()
        cdrs.withColumn("endTime", cdrs("timeInterval") + 600000).show()
        cdrs.sample(true, 0.01).show()
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: L8-28DataFrameExamplesOps.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object CdrDataframeExamples2App {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrDataframeExamples2App <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    var previousCdrs: Option[DataFrame] = None

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF().select("squareId", "countryCode").dropDuplicates()
        previousCdrs match {
          case Some(prevCdrs) => cdrs.unionAll(prevCdrs).show()
          //case Some(prevCdrs) => cdrs.intersect(prevCdrs).show()
          //case Some(prevCdrs) => cdrs.except(prevCdrs).show()
          case None => Unit
        }
        previousCdrs = Some(cdrs)
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

Source File: T8-3DataFrameExamplesNA.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.json4s.DefaultFormats
import org.json4s.JDouble
import org.json4s.JObject
import org.json4s.jvalue2extractable
import org.json4s.jvalue2monadic
import org.json4s.native.JsonMethods.compact
import org.json4s.native.JsonMethods.parse
import org.json4s.native.JsonMethods.render
import org.json4s.string2JsonInput

object CdrDataframeExamplesNAApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrDataframeExamplesNAApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._
    implicit val formats = DefaultFormats

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()
        cdrs.na.drop("any").show()
        cdrs.na.fill(0, Array("squareId")).show()
        cdrs.na.replace("squareId", Map(0 -> 1)).show()
        println("Correlation: " + cdrs.stat.corr("smsOutActivity", "callOutActivity"))
        println("Covariance: " + cdrs.stat.cov("smsInActivity", "callInActivity"))
        cdrs.stat.crosstab("squareId", "countryCode").show()
        cdrs.stat.freqItems(Array("squareId", "countryCode"), 0.1).show()
        cdrs.stat.crosstab("callOutActivity", "callInActivity").show()
      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }
}

scala.reflect.runtime.universe Scala Examples