org.apache.log4j.PropertyConfigurator Scala Examples

The following examples show how to use org.apache.log4j.PropertyConfigurator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
package com.chapter14.Serilazition

import org.apache.log4j.{ Level, LogManager, PropertyConfigurator }
import org.apache.spark._
import org.apache.spark.rdd.RDD

class MyMapper2(n: Int) {
  @transient lazy val log = org.apache.log4j.LogManager.getLogger("myLogger")
  def MyMapperDosomething(rdd: RDD[Int]): RDD[String] =
    rdd.map { i =>
      log.warn("mapping: " + i)
      (i + n).toString
    }
}

//Companion object
object MyMapper2 {
  def apply(n: Int): MyMapper = new MyMapper(n)
}

//Main object
object KyroRegistrationDemo {
  def main(args: Array[String]) {
    val log = LogManager.getRootLogger
    log.setLevel(Level.WARN)
    val conf = new SparkConf()
      .setAppName("My App")
      .setMaster("local[*]")
    conf.registerKryoClasses(Array(classOf[MyMapper2]))
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    val sc = new SparkContext(conf)

    log.warn("Started")
    val data = sc.parallelize(1 to 100000)
    val mapper = MyMapper2(10)
    val other = mapper.MyMapperDosomething(data)
    other.collect()
    log.warn("Finished")
  }
} 
Example 2
Source File: MyLogCompleteDemo.scala    From Scala-and-Spark-for-Big-Data-Analytics   with MIT License 5 votes vote down vote up
package com.chapter14.Serilazition

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
import org.apache.spark._
import org.apache.spark.rdd.RDD

class MyMapper(n: Int) extends Serializable{
 @transient lazy val log = org.apache.log4j.LogManager.getLogger("myLogger")
 def MyMapperDosomething(rdd: RDD[Int]): RDD[String] =
   rdd.map{ i =>
     log.warn("mapping: " + i)
     (i + n).toString
   }
}

//Companion object
object MyMapper {
 def apply(n: Int): MyMapper = new MyMapper(n)
}

//Main object
object MyLog {
 def main(args: Array[String]) {
   val log = LogManager.getRootLogger
   log.setLevel(Level.WARN)
   val conf = new SparkConf()
                  .setAppName("My App")
                  .setMaster("local[*]")
   conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")               
   val sc = new SparkContext(conf)

   log.warn("Started")
   val data = sc.parallelize(1 to 100000)
   val mapper = MyMapper(1)
   val other = mapper.MyMapperDosomething(data)
   other.collect()
   log.warn("Finished")
 }
} 
Example 3
Source File: SparkPredictionTrainer.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming._

import io.nats.client.ConnectionFactory._
import java.nio.ByteBuffer

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}

import com.logimethods.connector.nats.to_spark._
import com.logimethods.scala.connector.spark.to_nats._

import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator

import java.util.function._

import java.time.{LocalDateTime, ZoneOffset}
import java.time.DayOfWeek._

import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel

object SparkPredictionTrainer extends App with SparkPredictionProcessor {
  log.setLevel(Level.WARN)

  val (properties, targets, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl) = setup(args)

  val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt
  println("STREAMING_DURATION = " + streamingDuration)

  new Thread(new Runnable {
              def run() {
                 while( true ){
                   try {
                     val data = SparkPredictionProcessor.getData(sc, THRESHOLD)
                     val model = trainer.fit(data)
                     model.write.overwrite.save(PREDICTION_MODEL_PATH)
                     println("New model of size " + data.count() + " trained: " + model.uid)
                     Thread.sleep(streamingDuration)
                   } catch {
                     case e: Throwable => log.error(e)
                   }
                 }
              }
             }).start()
} 
Example 4
Source File: SparkProcessor.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming._

import io.nats.client.Nats._
import io.nats.client.ConnectionFactory._
import java.nio.ByteBuffer

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}

import com.logimethods.connector.nats.to_spark._
import com.logimethods.scala.connector.spark.to_nats._

import java.util.function._

import java.time.{LocalDateTime, ZoneOffset}

trait SparkProcessor {
  def setup(args: Array[String]) = {
    val inputSubject = args(0)
//    val inputNatsStreaming = inputSubject.toUpperCase.contains("STREAMING")
    val outputSubject = args(1)
//    val outputNatsStreaming = outputSubject.toUpperCase.contains("STREAMING")
    println("Will process messages from '" + inputSubject + "' to '" + outputSubject + "'")

    val logLevel = scala.util.Properties.envOrElse("LOG_LEVEL", "INFO")
    println("LOG_LEVEL = " + logLevel)

    val targets = scala.util.Properties.envOrElse("TARGETS", "ALL")
    println("TARGETS = " + targets)

    val cassandraUrl = System.getenv("CASSANDRA_URL")
    println("CASSANDRA_URL = " + cassandraUrl)

    val sparkMasterUrl = System.getenv("SPARK_MASTER_URL")
    println("SPARK_MASTER_URL = " + sparkMasterUrl)

    val sparkCoresMax = System.getenv("SPARK_CORES_MAX")
    println("SPARK_CORES_MAX = " + sparkCoresMax)

    val conf = new SparkConf()
                  .setAppName(args(2))
                  .setMaster(sparkMasterUrl)
                  .set("spark.cores.max", sparkCoresMax)
                  .set("spark.cassandra.connection.host", cassandraUrl);
    val sc = new SparkContext(conf);

//    val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt
//    val ssc = new StreamingContext(sc, new Duration(streamingDuration));
///    ssc.checkpoint("/spark/storage")

    val properties = new Properties();
    val natsUrl = System.getenv("NATS_URI")
    println("NATS_URI = " + natsUrl)
    properties.put("servers", natsUrl)
    properties.put(PROP_URL, natsUrl)

    val clusterId = System.getenv("NATS_CLUSTER_ID")

    val inputNatsStreaming = inputSubject.toUpperCase.contains("STREAMING")
    val outputNatsStreaming = outputSubject.toUpperCase.contains("STREAMING")

    (properties, targets, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl)
  }

  def dataDecoder: Array[Byte] => Tuple2[Long,Float] = bytes => {
        val buffer = ByteBuffer.wrap(bytes);
        val epoch = buffer.getLong()
        val value = buffer.getFloat()
        (epoch, value)
      }
}


trait SparkStreamingProcessor extends SparkProcessor {
  def setupStreaming(args: Array[String]) = {
    val (properties, target, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl) = setup(args)

    val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt
    println("STREAMING_DURATION = " + streamingDuration)

    val ssc = new StreamingContext(sc, new Duration(streamingDuration));
//    ssc.checkpoint("/spark/storage")

    (properties, target, logLevel, sc, ssc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl, streamingDuration)
  }
} 
Example 5
Source File: SparkTemperatureProcessor.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming._
import com.datastax.spark.connector.streaming._
import com.datastax.spark.connector.SomeColumns

import io.nats.client.ConnectionFactory._
import java.nio.ByteBuffer

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}

import com.logimethods.connector.nats.to_spark._
import com.logimethods.scala.connector.spark.to_nats._

import java.util.function._

import java.time.{LocalDateTime, ZoneOffset}

object SparkTemperatureProcessor extends App with SparkStreamingProcessor {
  val log = LogManager.getRootLogger
  log.setLevel(Level.WARN)

  val (properties, target, logLevel, sc, ssc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl, streamingDuration) =
    setupStreaming(args)

  // Temperatures //

  val temperatures =
    if (inputNatsStreaming) {
      NatsToSparkConnector
        .receiveFromNatsStreaming(classOf[Tuple2[Long,Float]], StorageLevel.MEMORY_ONLY, clusterId)
        .withNatsURL(natsUrl)
        .withSubjects(inputSubject)
        .withDataDecoder(dataDecoder)
        .asStreamOf(ssc)
    } else {
      NatsToSparkConnector
        .receiveFromNats(classOf[Tuple2[Long,Float]], StorageLevel.MEMORY_ONLY)
        .withProperties(properties)
        .withSubjects(inputSubject)
        .withDataDecoder(dataDecoder)
        .asStreamOf(ssc)
    }

  // Ideally, should be the AVG
  val singleTemperature = temperatures.reduceByKey(Math.max(_,_))

  if (logLevel.contains("TEMPERATURE")) {
    singleTemperature.print()
  }

  singleTemperature.saveToCassandra("smartmeter", "temperature")

  val temperatureReport = singleTemperature.map({case (epoch, temperature) => (s"""{"epoch": $epoch, "temperature": $temperature}""") })
  SparkToNatsConnectorPool.newPool()
                      .withProperties(properties)
                      .withSubjects(outputSubject) // "smartmeter.extract.temperature"
                      .publishToNats(temperatureReport)

  // Start //
  ssc.start();

  ssc.awaitTermination()
} 
Example 6
Source File: SparkBatch.scala    From smart-meter   with MIT License 5 votes vote down vote up
package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
import org.apache.log4j.Logger

import org.apache.spark.sql.SparkSession

//import com.datastax.spark.connector._
//import com.datastax.spark.connector.cql.CassandraConnector

// @see http://stackoverflow.com/questions/39423131/how-to-use-cassandra-context-in-spark-2-0
// @see https://databricks.com/blog/2016/08/15/how-to-use-sparksession-in-apache-spark-2-0.html
// @see https://dzone.com/articles/cassandra-with-spark-20-building-rest-api
object SparkBatch extends App {
  val logLevel = System.getenv("APP_BATCH_LOG_LEVEL")
  println("APP_BATCH_LOG_LEVEL = " + logLevel)
  if ("DEBUG" != logLevel) {
  	Logger.getLogger("org").setLevel(Level.OFF)
  }
  
  val cassandraUrl = System.getenv("CASSANDRA_URL")
  println("CASSANDRA_URL = " + cassandraUrl)
  
  val sparkMasterUrl = System.getenv("SPARK_MASTER_URL")
  println("SPARK_MASTER_URL = " + sparkMasterUrl)
  
  val spark = SparkSession
    .builder()
    .master(sparkMasterUrl)
    .appName("Smartmeter Batch")
    .config("spark.cassandra.connection.host", cassandraUrl)
    //   .config("spark.sql.warehouse.dir", warehouseLocation)
    //.enableHiveSupport()
    .getOrCreate()
  
  spark
    .read
    .format("org.apache.spark.sql.cassandra")
    .options(Map("keyspace" -> "smartmeter", "table" -> "raw_data"))
    .load
    .createOrReplaceTempView("raw_data")
  
  val rawVoltageData = spark.sql("select * from raw_data")
  rawVoltageData.show(10)
  
  
  // @see http://stackoverflow.com/questions/40324153/what-is-the-best-way-to-insert-update-rows-in-cassandra-table-via-java-spark
  //Save data to Cassandra
  import org.apache.spark.sql.SaveMode
  avgByTransformer.write.format("org.apache.spark.sql.cassandra").options(Map("keyspace" -> "smartmeter", "table" -> "avg_voltage_by_transformer")).mode(SaveMode.Overwrite).save();
} 
Example 7
Source File: SparkLocalContext.scala    From cosine-lsh-join-spark   with MIT License 5 votes vote down vote up
package com.soundcloud.lsh

import java.util.Properties

import org.scalatest.{BeforeAndAfterAll, Suite}
import org.apache.log4j.PropertyConfigurator
import org.apache.spark.{SparkConf, SparkContext}

trait SparkLocalContext extends BeforeAndAfterAll {
  self: Suite =>
  var sc: SparkContext = _

  override def beforeAll() {
    loadTestLog4jConfig()

    val conf = new SparkConf().
      setAppName("test").
      setMaster("local")
    sc = new SparkContext(conf)

    super.beforeAll()
  }

  override def afterAll() {
    if (sc != null) sc.stop()
    super.afterAll()
  }

  private def loadTestLog4jConfig(): Unit = {
    val props = new Properties
    props.load(getClass.getResourceAsStream("/log4j.properties"))
    PropertyConfigurator.configure(props)
  }
}