java.util.Properties Scala Example

Source File: HasDex.scala From matcher with MIT License

8 votes

package com.wavesplatform.dex.it.dex

import java.util.Properties
import java.util.concurrent.ThreadLocalRandom

import cats.Functor
import com.typesafe.config.{Config, ConfigFactory}
import com.wavesplatform.dex.it.api.BaseContainersKit
import com.wavesplatform.dex.it.docker.DexContainer
import com.wavesplatform.dex.it.fp.CanExtract
import mouse.any._
import org.apache.kafka.clients.admin.{AdminClient, NewTopic}

import scala.collection.JavaConverters._

trait HasDex { self: BaseContainersKit =>
  private val defaultTag = Option(System.getenv("DEX_TAG")).getOrElse("latest")

  protected implicit def toDexExplicitGetOps[F[_]: CanExtract: Functor](self: DexApi[F]): DexApiOps.ExplicitGetDexApiOps[F] = {
    new DexApiOps.ExplicitGetDexApiOps[F](self)
  }

  protected def dexInitialSuiteConfig: Config = ConfigFactory.empty()

  protected lazy val dexRunConfig: Config = dexQueueConfig(ThreadLocalRandom.current.nextInt(0, Int.MaxValue))

  protected def kafkaServer: Option[String] = Option { System.getenv("KAFKA_SERVER") }

  protected def dexQueueConfig(queueId: Int): Config = {
    kafkaServer.fold { ConfigFactory.empty() } { kafkaServer =>
      ConfigFactory.parseString(s"""waves.dex.events-queue {
                                   |  type = kafka
                                   |  kafka {
                                   |    servers = "$kafkaServer"
                                   |    topic = "dex-$queueId"
                                   |  }
                                   |}""".stripMargin)
    }
  }

  protected def createDex(name: String,
                          runConfig: Config = dexRunConfig,
                          suiteInitialConfig: Config = dexInitialSuiteConfig,
                          tag: String = defaultTag): DexContainer =
    DexContainer(name, networkName, network, getIp(name), runConfig, suiteInitialConfig, localLogsDir, tag) unsafeTap addKnownContainer

  lazy val dex1: DexContainer = createDex("dex-1")

  protected def createKafkaTopic(name: String): Unit = kafkaServer.foreach { server =>
    val properties = new Properties()
    properties.putAll(
      Map(
        "bootstrap.servers"  -> server,
        "group.id"           -> s"create-$name",
        "key.deserializer"   -> "org.apache.kafka.common.serialization.StringDeserializer",
        "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer"
      ).asJava
    )

    val adminClient = AdminClient.create(properties)

    try {
      val newTopic = new NewTopic(name, 1, 1.toShort)
      adminClient.createTopics(java.util.Collections.singletonList(newTopic))
    } finally {
      adminClient.close()
    }
  }
}

Source File: DNSstat.scala From jdbcsink with Apache License 2.0

6 votes

import org.apache.spark.sql.SparkSession
import java.util.Properties
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions.{from_json,window}
import java.sql.{Connection,Statement,DriverManager}
import org.apache.spark.sql.ForeachWriter
import org.apache.spark.sql.Row

class JDBCSink() extends ForeachWriter[Row]{
 val driver = "com.mysql.jdbc.Driver"
      var connection:Connection = _
      var statement:Statement = _

    def open(partitionId: Long,version: Long): Boolean = {
        Class.forName(driver)
        connection = DriverManager.getConnection("jdbc:mysql://10.88.1.102:3306/aptwebservice", "root", "mysqladmin")
        statement = connection.createStatement
        true
      }
      def process(value: Row): Unit = {
        statement.executeUpdate("replace into DNSStat(ip,domain,time,count) values(" 
                                    + "'" + value.getString(0) + "'" + ","//ip
                                    + "'" + value.getString(1) + "'" + ","//domain
                                    + "'" + value.getTimestamp(2) + "'" + "," //time
                                    + value.getLong(3) //count
                                    + ")") 
      }

      def close(errorOrNull: Throwable): Unit = {
        connection.close
      }
}

object DNSstatJob{

val schema: StructType = StructType(
        Seq(StructField("Vendor", StringType,true),
         StructField("Id", IntegerType,true),
         StructField("Time", LongType,true),
         StructField("Conn", StructType(Seq(
                                        StructField("Proto", IntegerType, true), 
                                        StructField("Sport", IntegerType, true), 
                                        StructField("Dport", IntegerType, true), 
                                        StructField("Sip", StringType, true), 
                                        StructField("Dip", StringType, true)
                                        )), true),
        StructField("Dns", StructType(Seq(
                                        StructField("Domain", StringType, true), 
                                        StructField("IpCount", IntegerType, true), 
                                        StructField("Ip", StringType, true) 
                                        )), true)))

    def main(args: Array[String]) {
    val spark=SparkSession
          .builder
          .appName("DNSJob")
          .config("spark.some.config.option", "some-value")
          .getOrCreate()
    import spark.implicits._
    val connectionProperties = new Properties()
    connectionProperties.put("user", "root")
    connectionProperties.put("password", "mysqladmin")
    val bruteForceTab = spark.read
                .jdbc("jdbc:mysql://10.88.1.102:3306/aptwebservice", "DNSTab",connectionProperties)
    bruteForceTab.registerTempTable("DNSTab")
    val lines = spark
          .readStream
          .format("kafka")
          .option("kafka.bootstrap.servers", "10.94.1.110:9092")
          .option("subscribe","xdr")
          //.option("startingOffsets","earliest")
          .option("startingOffsets","latest")
          .load()
          .select(from_json($"value".cast(StringType),schema).as("jsonData"))
    lines.registerTempTable("xdr")
    val filterDNS = spark.sql("select CAST(from_unixtime(xdr.jsonData.Time DIV 1000000) as timestamp) as time,xdr.jsonData.Conn.Sip as sip, xdr.jsonData.Dns.Domain from xdr inner join DNSTab on xdr.jsonData.Dns.domain = DNSTab.domain")
    
    val windowedCounts = filterDNS
                        .withWatermark("time","5 minutes")
                        .groupBy(window($"time", "1 minutes", "1 minutes"),$"sip",$"domain")
                        .count()
                        .select($"sip",$"domain",$"window.start",$"count")

    val writer = new JDBCSink()
    val query = windowedCounts
       .writeStream
        .foreach(writer)
        .outputMode("update")
        .option("checkpointLocation","/checkpoint/")
        .start()
        query.awaitTermination() 
   } 
}

Source File: ExampleExternalStateSpec.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.example

import java.util.Properties

import com.typesafe.config.ConfigFactory
import io.amient.affinity.core.cluster.Node
import io.amient.affinity.core.util.AffinityTestBase
import io.amient.affinity.kafka.EmbeddedKafka
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.scalatest.concurrent.TimeLimitedTests
import org.scalatest.time.{Millis, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.collection.JavaConverters._

class ExampleExternalStateSpec extends FlatSpec with AffinityTestBase with EmbeddedKafka with Matchers with BeforeAndAfterAll
  with TimeLimitedTests {

  override def numPartitions = 2

  val config = configure(ConfigFactory.load("example-external-state"))

  val topic = config.getString("affinity.keyspace.external.state.news.storage.kafka.topic")

  val node = new Node(configure(config, Some(zkConnect), Some(kafkaBootstrap)))

  override def beforeAll: Unit = try {
    createTopic(topic)
    val externalProducer = createKafkaAvroProducer[String, String]()
    try {
      externalProducer.send(new ProducerRecord(topic, "10:30", "the universe is expanding"))
      externalProducer.send(new ProducerRecord(topic, "11:00", "the universe is still expanding"))
      externalProducer.send(new ProducerRecord(topic, "11:30", "the universe briefly contracted but is expanding again"))
      externalProducer.flush()
    } finally {
      externalProducer.close()
    }
    //the external fixture is produced and the externalProducer is flushed() before the node is started
    node.start()
    node.awaitClusterReady()
    //at this point all stores have loaded everything available in the external topic so the test will be deterministic
  } finally {
    super.beforeAll()
  }

  override def afterAll: Unit = try {
    node.shutdown()
  } finally {
    super.afterAll()
  }

  behavior of "External State"

  val timeLimit = Span(5000, Millis) //it should be much faster but sometimes many tests are run at the same time

  it should "start automatically tailing state partitions on startup even when master" in {
    //we don't need an arbitrary sleep to ensure the tailing state catches up with the writes above
    //before we fetch the latest news because the watermark is built into the request to make the test fast and deterministic
    val response = node.get_text(node.http_get(s"/news/latest"))
    response should include("10:30\tthe universe is expanding")
    response should include("11:00\tthe universe is still expanding")
    response should include("11:30\tthe universe briefly contracted but is expanding again")

  }

  private def createKafkaAvroProducer[K, V]() = new KafkaProducer[K, V](new Properties {
    put("bootstrap.servers", kafkaBootstrap)
    put("acks", "1")
    put("key.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer")
    put("value.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer")
    //this simply adds all configs required by KafkaAvroSerializer
    config.getConfig("affinity.avro").entrySet().asScala.foreach { case (entry) =>
      put(entry.getKey, entry.getValue.unwrapped())
    }
  })


}

Source File: AvroMessageFormatterMain.scala From affinity with Apache License 2.0

5 votes

import java.time.temporal.ChronoUnit
import java.util.Properties

import io.amient.affinity.kafka.AvroMessageFormatter
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.serialization.ByteArrayDeserializer

import scala.collection.JavaConverters._

object AvroMessageFormatterMain extends App {

  val consumerProps = new Properties()
  consumerProps.put("bootstrap.servers", args(0))
  consumerProps.put("security.protocol", "SASL_SSL")
  consumerProps.put("sasl.mechanism", "PLAIN")
  consumerProps.put("client.id", "console")
  consumerProps.put("group.id", "console")
  consumerProps.put("key.deserializer", classOf[ByteArrayDeserializer])
  consumerProps.put("value.deserializer", classOf[ByteArrayDeserializer])
  consumerProps.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required username='" + args(1) + "' password='" + args(2) + "';")
  consumerProps.put("schema.registry.url", args(3))
  val c = new KafkaConsumer[Array[Byte], Array[Byte]](consumerProps)
  c.subscribe(List(args(4)).asJava)
  val formatter = new AvroMessageFormatter()
  formatter.init(consumerProps)
  while (true) {
    c.poll(java.time.Duration.of(1, ChronoUnit.SECONDS)).asScala.foreach {
      record => formatter.writeTo(record, System.out)
    }
  }

}

Source File: EmbeddedConfluentRegistry.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.kafka

import java.nio.channels.ServerSocketChannel
import java.util.Properties

import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient
import io.confluent.kafka.schemaregistry.rest.{SchemaRegistryConfig, SchemaRegistryRestApplication}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.LoggerFactory

trait EmbeddedConfluentRegistry extends EmbeddedKafka with BeforeAndAfterAll {

  self: Suite =>

  private val log = LoggerFactory.getLogger(classOf[EmbeddedConfluentRegistry])

  private val registryConfig: SchemaRegistryConfig = new SchemaRegistryConfig(new Properties() {
    put("listeners", s"http://127.0.0.1:0")
    put("kafkastore.connection.url", zkConnect)
    put("avro.compatibility.level", "full")
    put("kafkastore.init.timeout.ms", "5000")
    put("kafkastore.topic", "_schemas")
    put("debug", "true")
  })
  private val app = new SchemaRegistryRestApplication(registryConfig)
  private val registry = app.createServer
  registry.start()

  val registryUrl = s"http://127.0.0.1:" + registry.getConnectors.head.getTransport.asInstanceOf[ServerSocketChannel].socket.getLocalPort
  log.info("Confluent schema registry listening at: " + registryUrl)
  val registryClient = new CachedSchemaRegistryClient(registryUrl, 20)

  abstract override def afterAll() = try {
    registry.stop()
  } finally {
    super.afterAll()
  }

}

Source File: EmbeddedKafkaServer.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.kafka

import java.io.File
import java.util.Properties
import java.util.concurrent.TimeUnit

import kafka.server.{KafkaConfig, KafkaServerStartable}
import kafka.zk.BrokerIdZNode
import org.I0Itec.zkclient.ZkClient
import org.I0Itec.zkclient.serialize.ZkSerializer
import org.apache.kafka.clients.admin.{AdminClient, NewTopic}
import org.apache.kafka.common.network.ListenerName
import org.apache.kafka.common.security.auth.SecurityProtocol
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.collection.mutable

trait EmbeddedKafkaServer extends EmbeddedService with EmbeddedZookeperServer {

  private val log = LoggerFactory.getLogger(classOf[EmbeddedKafka])

  def numPartitions: Int

  private val embeddedKafkaPath = new File(testDir, "local-kafka-logs")
  private val kafkaConfig = new KafkaConfig(new Properties {
    put("broker.id", "1")
    put("host.name", "localhost")
    put("port", "0")
    put("log.dir", embeddedKafkaPath.toString)
    put("num.partitions", numPartitions.toString)
    put("auto.create.topics.enable", "false")
    put("delete.topic.enable", "true")
    put("zookeeper.connect", zkConnect)
    put("offsets.topic.replication.factor", "1")
  })

  private val kafka = new KafkaServerStartable(kafkaConfig)
  kafka.startup()

  lazy val admin = AdminClient.create(Map[String, AnyRef]("bootstrap.servers" -> kafkaBootstrap).asJava)

  def createTopic(name: String): Unit = {
    admin.createTopics(List(new NewTopic(name, numPartitions, 1)).asJava).all().get(30, TimeUnit.SECONDS)
  }

  def listTopics: mutable.Set[String] = {
    admin.listTopics().names().get(1, TimeUnit.SECONDS).asScala
  }

  val tmpZkClient = new ZkClient(zkConnect, 5000, 6000, new ZkSerializer {
    def serialize(o: Object): Array[Byte] = o.toString.getBytes

    override def deserialize(bytes: Array[Byte]): Object = new String(bytes)
  })

  val broker = BrokerIdZNode.decode(1, tmpZkClient.readData[String]("/brokers/ids/1").getBytes("utf-8")).broker
  val kafkaBootstrap = broker.brokerEndPoint(ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT)).connectionString()
  tmpZkClient.close
  log.info(s"Embedded Kafka $kafkaBootstrap, data dir: $testDir")

  abstract override def close(): Unit = try kafka.shutdown finally super.close

}

Source File: TransactionalProducer.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.kafka

import java.util.Properties

import akka.actor.Actor
import akka.actor.Status.{Failure, Success}
import akka.event.Logging
import com.typesafe.config.Config
import io.amient.affinity.Conf
import io.amient.affinity.core.actor.{TransactionAbort, TransactionBegin, TransactionCommit, TransactionalRecord}
import io.amient.affinity.core.config.CfgStruct
import io.amient.affinity.core.storage.StorageConf
import io.amient.affinity.kafka.KafkaStorage.{KafkaConsumerConf, KafkaProducerConf}
import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer

import scala.collection.JavaConverters._

object KafkaConf extends KafkaConf {
  override def apply(config: Config): KafkaConf = new KafkaConf().apply(config)
}

class KafkaConf extends CfgStruct[KafkaConf](classOf[StorageConf]) {
  val BootstrapServers = string("kafka.bootstrap.servers", true).doc("kafka connection string used for consumer and/or producer")
  val Producer = struct("kafka.producer", new KafkaProducerConf, false).doc("any settings that the underlying version of kafka producer client supports")
  val Consumer = struct("kafka.consumer", new KafkaConsumerConf, false).doc("any settings that the underlying version of kafka consumer client supports")
}

class TransactionalProducer extends Actor {

  val logger = Logging.getLogger(context.system, this)

  private[this] var producer: KafkaProducer[Array[Byte], Array[Byte]] = null

  val kafkaConf = KafkaConf(Conf(context.system.settings.config).Affi.Storage)
  val producerConfig = new Properties() {
    if (kafkaConf.Producer.isDefined) {
      val producerConfig = kafkaConf.Producer.toMap()
      if (producerConfig.containsKey("bootstrap.servers")) throw new IllegalArgumentException("bootstrap.servers cannot be overriden for KafkaStroage producer")
      if (producerConfig.containsKey("key.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom key.serializer")
      if (producerConfig.containsKey("value.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom value.serializer")
      producerConfig.entrySet.asScala.filter(_.getValue.isDefined).foreach { case (entry) =>
        put(entry.getKey, entry.getValue.apply.toString)
      }
    }
    put("bootstrap.servers", kafkaConf.BootstrapServers())
    put("value.serializer", classOf[ByteArraySerializer].getName)
    put("key.serializer", classOf[ByteArraySerializer].getName)
  }

  override def receive: Receive = {

    case req@TransactionBegin(transactionalId) => req(sender) ! {
      if (producer == null) {
        producerConfig.put("transactional.id", transactionalId)
        producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfig)
        logger.debug(s"Transactions.Init(transactional.id = $transactionalId)")
        producer.initTransactions()
      }
      logger.debug("Transactions.Begin()")
      producer.beginTransaction()
    }

    case TransactionalRecord(topic, key, value, timestamp, partition) =>
      val replyto = sender
      val producerRecord = new ProducerRecord(
        topic,
        partition.map(new Integer(_)).getOrElse(null),
        timestamp.map(new java.lang.Long(_)).getOrElse(null),
        key,
        value)
      logger.debug(s"Transactions.Append(topic=$topic)")
      producer.send(producerRecord, new Callback {
        override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
          if (exception != null) {
            replyto ! Failure(exception)
          } else {
            replyto ! Success(metadata.offset())
          }
        }
      })

    case req@TransactionCommit() => req(sender) ! {
      logger.debug("Transactions.Commit()")
      producer.commitTransaction()
    }

    case req@TransactionAbort() => req(sender) ! {
      logger.debug("Transactions.Abort()")
      producer.abortTransaction()
    }
  }
}

Source File: KafkaAvroConsumer.scala From sqs-kafka-connect with Apache License 2.0

5 votes

package com.hivehome.kafka.connect.sqs

import java.util.{Properties, UUID}

import io.confluent.kafka.serializers.KafkaAvroDeserializer
import org.apache.kafka.clients.consumer.{ConsumerConfig => ConsumerConfigConst, KafkaConsumer}
import org.apache.kafka.common.serialization.ByteArrayDeserializer
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.concurrent.duration._
import scala.language.postfixOps


  def poll(numberOfMessagesExpected: Int,
           timeout: FiniteDuration = 30 seconds,
           accept: V => Boolean = _ => true): Vector[V] = {

    val deadline = timeout.fromNow
    var messages = Vector.empty[V]
    while (deadline.hasTimeLeft && messages.size < numberOfMessagesExpected) {
      val records = cons.poll(PollingInterval)
      // convert to Seq so that we have all the messages once we have
      // exhausted the iterator
      val msgsSeq = records.iterator().asScala.toSeq
      messages = messages ++ msgsSeq.map(_.value()).filter(accept).toVector
    }
    logger.debug("Number of messages received {}", messages.size)

    if (messages.size < numberOfMessagesExpected) {
      throw new AssertionError(s"Expected $numberOfMessagesExpected messages within $timeout, but only received ${messages.size}. $messages")
    }

    // Possibly throw exception if too many messages?
    messages
  }
}

object KafkaAvroConsumer {
  val logger = LoggerFactory.getLogger(getClass)

  def apply[K, V](kafkaProps: Map[String, String], topicName: String): KafkaAvroConsumer[K, V] = {
    val props = new Properties()
    props.putAll(kafkaProps.asJava)
    props.put(ConsumerConfigConst.GROUP_ID_CONFIG, "test" + UUID.randomUUID().toString.substring(0, 10))
    props.put(ConsumerConfigConst.KEY_DESERIALIZER_CLASS_CONFIG, classOf[ByteArrayDeserializer])
    props.put(ConsumerConfigConst.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[KafkaAvroDeserializer])

    logger.info(s"Consuming from $topicName with properties $props")
    val cons = new KafkaConsumer[K, V](props)
    cons.subscribe(Seq(topicName).asJava)
    new KafkaAvroConsumer(cons)
  }
}

Source File: package.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn

import java.util.Properties

import scala.util.Try

import yaooqinn.kyuubi.service.ServiceException

package object kyuubi {
  private object BuildInfo {
    private val buildFile = "kyuubi-version-info.properties"
    private val buildFileStream =
      Thread.currentThread().getContextClassLoader.getResourceAsStream(buildFile)
    private val unknown = "<unknown>"
    private val props = new Properties()

    try {
      props.load(buildFileStream)
    } catch {
      case e: Exception => throw new ServiceException(e)
    } finally {
      Try(buildFileStream.close())
    }

    val version: String = props.getProperty("kyuubi_version", unknown)
    val sparkVersion: String = props.getProperty("spark_version", unknown)
    val branch: String = props.getProperty("branch", unknown)
    val jar: String = props.getProperty("jar", unknown)
    val revision: String = props.getProperty("revision", unknown)
    val user: String = props.getProperty("user", unknown)
    val repoUrl: String = props.getProperty("url", unknown)
    val buildDate: String = props.getProperty("date", unknown)
  }

  val KYUUBI_VERSION: String = BuildInfo.version
  val SPARK_COMPILE_VERSION: String = BuildInfo.sparkVersion
  val BRANCH: String = BuildInfo.branch
  val KYUUBI_JAR_NAME: String = BuildInfo.jar
  val REVISION: String = BuildInfo.revision
  val BUILD_USER: String = BuildInfo.user
  val REPO_URL: String = BuildInfo.repoUrl
  val BUILD_DATE: String = BuildInfo.buildDate
}

Source File: packageSuite.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi

import java.util.Properties

import org.apache.spark.SparkFunSuite

class packageSuite extends SparkFunSuite {
  test("build info") {
    val buildFile = "kyuubi-version-info.properties"
    val str = Thread.currentThread().getContextClassLoader.getResourceAsStream(buildFile)
    val props = new Properties()
    assert(str !== null)
    props.load(str)
    str.close()
    assert(props.getProperty("kyuubi_version") === KYUUBI_VERSION)
    assert(props.getProperty("spark_version") === SPARK_COMPILE_VERSION)
    assert(props.getProperty("branch") === BRANCH)
    assert(props.getProperty("jar") === KYUUBI_JAR_NAME)
    assert(props.getProperty("revision") === REVISION)
    assert(props.getProperty("user") === BUILD_USER)
    assert(props.getProperty("url") === REPO_URL)
  }

}

Source File: Utils.scala From kyuubi with Apache License 2.0

5 votes

package org.apache.kyuubi

import java.io.{File, InputStreamReader, IOException}
import java.net.{URI, URISyntaxException}
import java.nio.charset.StandardCharsets
import java.util.{Properties, UUID}

import scala.collection.JavaConverters._
import scala.util.{Success, Try}

private[kyuubi] object Utils extends Logging {

  import org.apache.kyuubi.config.KyuubiConf._

  def strToSeq(s: String): Seq[String] = {
    require(s != null)
    s.split(",").map(_.trim).filter(_.nonEmpty)
  }

  def getSystemProperties: Map[String, String] = {
    sys.props.toMap
  }

  def getDefaultPropertiesFile(env: Map[String, String] = sys.env): Option[File] = {
    env.get(KYUUBI_CONF_DIR)
      .orElse(env.get(KYUUBI_HOME).map(_ + File.separator + "/conf"))
      .map( d => new File(d + File.separator + KYUUBI_CONF_FILE_NAME))
      .filter(f => f.exists() && f.isFile)
  }

  def getPropertiesFromFile(file: Option[File]): Map[String, String] = {
    file.map { f =>
      info(s"Loading Kyuubi properties from ${f.getAbsolutePath}")
      val reader = new InputStreamReader(f.toURI.toURL.openStream(), StandardCharsets.UTF_8)
      try {
        val properties = new Properties()
        properties.load(reader)
        properties.stringPropertyNames().asScala.map { k =>
          (k, properties.getProperty(k).trim)
        }.toMap
      } catch {
        case e: IOException =>
          throw new KyuubiException(
            s"Failed when loading Kyuubi properties from ${f.getAbsolutePath}", e)
      } finally {
        reader.close()
      }
    }.getOrElse(Map.empty)
  }


  
  def createTempDir(
      root: String = System.getProperty("java.io.tmpdir"),
      namePrefix: String = "kyuubi"): File = {
    val dir = createDirectory(root, namePrefix)
    dir.deleteOnExit()
    dir
  }

}

Source File: package.scala From kyuubi with Apache License 2.0

5 votes

package org.apache

import java.util.Properties

import scala.util.Try

package object kyuubi {
  private object BuildInfo {
    private val buildFile = "kyuubi-version-info.properties"
    private val buildFileStream =
      Thread.currentThread().getContextClassLoader.getResourceAsStream(buildFile)
    private val unknown = "<unknown>"
    private val props = new Properties()

    try {
      props.load(buildFileStream)
    } catch {
      case e: Exception => throw new KyuubiException(e)
    } finally {
      Try(buildFileStream.close())
    }

    val version: String = props.getProperty("kyuubi_version", unknown)
    val sparkVersion: String = props.getProperty("spark_version", unknown)
    val branch: String = props.getProperty("branch", unknown)
    val jar: String = props.getProperty("jar", unknown)
    val revision: String = props.getProperty("revision", unknown)
    val user: String = props.getProperty("user", unknown)
    val repoUrl: String = props.getProperty("url", unknown)
    val buildDate: String = props.getProperty("date", unknown)
  }

  val KYUUBI_VERSION: String = BuildInfo.version
  val SPARK_COMPILE_VERSION: String = BuildInfo.sparkVersion
  val BRANCH: String = BuildInfo.branch
  val KYUUBI_JAR_NAME: String = BuildInfo.jar
  val REVISION: String = BuildInfo.revision
  val BUILD_USER: String = BuildInfo.user
  val REPO_URL: String = BuildInfo.repoUrl
  val BUILD_DATE: String = BuildInfo.buildDate
}

Source File: MetadataIteratorSpec.scala From jgit-spark-connector with Apache License 2.0

5 votes

package tech.sourced.engine.iterator

import java.nio.file.Paths
import java.util.{Properties, UUID}

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.types.{Metadata, StringType, StructType}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
import tech.sourced.engine.{BaseSparkSpec, Schema}

class JDBCQueryIteratorSpec
  extends FlatSpec with Matchers with BeforeAndAfterAll with BaseSparkSpec {
  private val tmpPath = Paths.get(
    System.getProperty("java.io.tmpdir"),
    UUID.randomUUID.toString
  )

  private val dbPath = tmpPath.resolve("test.db")

  override def beforeAll(): Unit = {
    super.beforeAll()
    tmpPath.toFile.mkdir()
    val rdd = ss.sparkContext.parallelize(Seq(
      Row("id1"),
      Row("id2"),
      Row("id3")
    ))

    val properties = new Properties()
    properties.put("driver", "org.sqlite.JDBC")
    val df = ss.createDataFrame(rdd, StructType(Seq(Schema.repositories.head)))
    df.write.jdbc(s"jdbc:sqlite:${dbPath.toString}", "repositories", properties)
  }

  override def afterAll(): Unit = {
    super.afterAll()
    FileUtils.deleteQuietly(tmpPath.toFile)
  }

  "JDBCQueryIterator" should "return all rows for the query" in {
    val iter = new JDBCQueryIterator(
      Seq(attr("id")),
      dbPath.toString,
      "SELECT id FROM repositories ORDER BY id"
    )

    // calling hasNext more than one time does not cause rows to be lost
    iter.hasNext
    iter.hasNext
    val rows = (for (row <- iter) yield row).toArray
    rows.length should be(3)
    rows(0).length should be(1)
    rows(0)(0).toString should be("id1")
    rows(1)(0).toString should be("id2")
    rows(2)(0).toString should be("id3")
  }

  private def attr(name: String): Attribute = AttributeReference(
    name, StringType, nullable = false, Metadata.empty
  )()
}

Source File: EmailNotifierConfig.scala From vinyldns with Apache License 2.0

5 votes

package vinyldns.api.notifier.email

import scala.collection.JavaConverters._
import javax.mail.Address
import javax.mail.internet.InternetAddress
import pureconfig.ConfigReader
import scala.util.Try
import pureconfig.error.CannotConvert
import java.util.Properties
import com.typesafe.config.{ConfigObject, ConfigValue}
import com.typesafe.config.ConfigValueType

object EmailNotifierConfig {

  implicit val smtpPropertiesReader: ConfigReader[Properties] = {
    ConfigReader[ConfigObject].map { config =>
      val props = new Properties()

      def convertToProperties(baseKey: String, config: ConfigObject): Unit =
        config.keySet().asScala.foreach {
          case key =>
            config.get(key) match {
              case value: ConfigObject =>
                convertToProperties(s"${baseKey}.${key}", value)
              case value: ConfigValue if value.valueType != ConfigValueType.NULL =>
                props.put(s"${baseKey}.${key}", value.unwrapped())
              case _ =>
            }
        }

      convertToProperties("mail.smtp", config)
      props
    }
  }

  implicit val addressReader: ConfigReader[Address] = ConfigReader[String].emap { s =>
    Try(new InternetAddress(s)).toEither.left.map { exc =>
      CannotConvert(s, "InternetAddress", exc.getMessage)
    }
  }

}

case class EmailNotifierConfig(from: Address, smtp: Properties = new Properties())

Source File: UserBehaviorMsgProducer.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.examples.streaming.IBMKafkaStream

import scala.util.Random
import java.util.Properties
import kafka.producer.KeyedMessage
import kafka.producer.ProducerConfig
import kafka.producer.Producer

class UserBehaviorMsgProducer(brokers: String, topic: String) extends Runnable {
  private val brokerList = brokers
  private val targetTopic = topic
  private val props = new Properties()
  props.put("metadata.broker.list", this.brokerList)
  props.put("serializer.class", "kafka.serializer.StringEncoder")
  props.put("producer.type", "async")
  private val config = new ProducerConfig(this.props)
  private val producer = new Producer[String, String](this.config)

  private val PAGE_NUM = 100
  private val MAX_MSG_NUM = 3
  private val MAX_CLICK_TIME = 5
  private val MAX_STAY_TIME = 10
  //Like,1;Dislike -1;No Feeling 0
  private val LIKE_OR_NOT = Array[Int](1, 0, -1)

  def run(): Unit = {
    val rand = new Random()
    while (true) {
      //how many user behavior messages will be produced
      val msgNum = rand.nextInt(MAX_MSG_NUM) + 1
      try {
        //generate the message with format like page1|2|7.123|1
        //网页 ID|点击次数|停留时间 (分钟)|是否点赞
        //(page001.html, 1, 0.5, 1)
        //向量的第一项表示网页的ID,第二项表示从进入网站到离开对该网页的点击次数,第三项表示停留时间,以分钟为单位,第四项是代表是否点赞，1 为赞，-1 表示踩，0 表示中立。
        for (i <- 0 to msgNum) {
          var msg = new StringBuilder()
          msg.append("page" + (rand.nextInt(PAGE_NUM) + 1))
          msg.append("|")
          msg.append(rand.nextInt(MAX_CLICK_TIME) + 1)
          msg.append("|")
          msg.append(rand.nextInt(MAX_CLICK_TIME) + rand.nextFloat())
          msg.append("|")
          msg.append(LIKE_OR_NOT(rand.nextInt(3)))
          println(msg.toString())
          //send the generated message to broker
          sendMessage(msg.toString())
        }
        println("%d user behavior messages produced.".format(msgNum+1))
      } catch {
        case e: Exception => println(e)
      }
      try {
        //sleep for 5 seconds after send a micro batch of message
        //每隔 5 秒钟会随机的向 user-behavior-topic 主题推送 0 到 50 条行为数据消息
        Thread.sleep(5000)
      } catch {
        case e: Exception => println(e)
      }
    }
  }
  def sendMessage(message: String) = {
    try {
      val data = new KeyedMessage[String, String](this.topic, message);
      producer.send(data);
    } catch {
      case e:Exception => println(e)
    }
  }
}
object UserBehaviorMsgProducerClient {
  def main(args: Array[String]) {

    //start the message producer thread
    val Array(zkServers,processingInterval) = Array("192.168.200.80:9092","topic")//args

    new Thread(new UserBehaviorMsgProducer(zkServers,processingInterval)).start()
  }
}

Source File: JDBCRelation.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.jdbc

import java.util.Properties

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.Partition
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Row, SQLContext, SaveMode}


  def columnPartition(partitioning: JDBCPartitioningInfo): Array[Partition] = {
    if (partitioning == null) return Array[Partition](JDBCPartition(null, 0))

    val numPartitions = partitioning.numPartitions
    val column = partitioning.column
    if (numPartitions == 1) return Array[Partition](JDBCPartition(null, 0))
    // Overflow and silliness can happen if you subtract then divide.
    // Here we get a little roundoff, but that's (hopefully) OK.
    val stride: Long = (partitioning.upperBound / numPartitions
                      - partitioning.lowerBound / numPartitions)
    var i: Int = 0
    var currentValue: Long = partitioning.lowerBound
    var ans = new ArrayBuffer[Partition]()
    while (i < numPartitions) {
      val lowerBound = if (i != 0) s"$column >= $currentValue" else null
      currentValue += stride
      val upperBound = if (i != numPartitions - 1) s"$column < $currentValue" else null
      val whereClause =
        if (upperBound == null) {
          lowerBound
        } else if (lowerBound == null) {
          upperBound
        } else {
          s"$lowerBound AND $upperBound"
        }
      ans += JDBCPartition(whereClause, i)
      i = i + 1
    }
    ans.toArray
  }
}

private[sql] case class JDBCRelation(
    url: String,
    table: String,
    parts: Array[Partition],
    properties: Properties = new Properties())(@transient val sqlContext: SQLContext)
  extends BaseRelation
  with PrunedFilteredScan
  with InsertableRelation {

  override val needConversion: Boolean = false

  override val schema: StructType = JDBCRDD.resolveTable(url, table, properties)

  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
    val driver: String = DriverRegistry.getDriverClassName(url)
    // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row]
    JDBCRDD.scanTable(
      sqlContext.sparkContext,
      schema,
      driver,
      url,
      properties,
      table,
      requiredColumns,
      filters,
      parts).asInstanceOf[RDD[Row]]
  }

  override def insert(data: DataFrame, overwrite: Boolean): Unit = {
    data.write
      .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append)
      .jdbc(url, table, properties)
  }
}

Source File: DefaultSource.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.jdbc

import java.util.Properties

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, DataSourceRegister}

class DefaultSource extends RelationProvider with DataSourceRegister {

  override def shortName(): String = "jdbc"

  
  override def createRelation(
      sqlContext: SQLContext,
      parameters: Map[String, String]): BaseRelation = {
    val url = parameters.getOrElse("url", sys.error("Option 'url' not specified"))
    val driver = parameters.getOrElse("driver", null)
    val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified"))
    val partitionColumn = parameters.getOrElse("partitionColumn", null)
    val lowerBound = parameters.getOrElse("lowerBound", null)
    val upperBound = parameters.getOrElse("upperBound", null)
    val numPartitions = parameters.getOrElse("numPartitions", null)

    if (driver != null) DriverRegistry.register(driver)

    if (partitionColumn != null
      && (lowerBound == null || upperBound == null || numPartitions == null)) {
      sys.error("Partitioning incompletely specified")
    }

    val partitionInfo = if (partitionColumn == null) {
      null
    } else {
      JDBCPartitioningInfo(
        partitionColumn,
        lowerBound.toLong,
        upperBound.toLong,
        numPartitions.toInt)
    }
    val parts = JDBCRelation.columnPartition(partitionInfo)
    //我们将传递给getConnection的其他属性
    val properties = new Properties() // Additional properties that we will pass to getConnection
    parameters.foreach(kv => properties.setProperty(kv._1, kv._2))
    JDBCRelation(url, table, parts, properties)(sqlContext)
  }
}

Source File: DefaultSource.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources

import java.util.Properties

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.execution.datasources.jdbc.{JDBCRelation, JDBCPartitioningInfo, DriverRegistry}
import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider}


class DefaultSource extends RelationProvider with DataSourceRegister {

  override def shortName(): String = "jdbc"

  
  override def createRelation(
      sqlContext: SQLContext,
      parameters: Map[String, String]): BaseRelation = {
    val url = parameters.getOrElse("url", sys.error("Option 'url' not specified"))
    val driver = parameters.getOrElse("driver", null)
    val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified"))
    val partitionColumn = parameters.getOrElse("partitionColumn", null)
    val lowerBound = parameters.getOrElse("lowerBound", null)
    val upperBound = parameters.getOrElse("upperBound", null)
    val numPartitions = parameters.getOrElse("numPartitions", null)

    if (driver != null) DriverRegistry.register(driver)

    if (partitionColumn != null
      && (lowerBound == null || upperBound == null || numPartitions == null)) {
      sys.error("Partitioning incompletely specified")
    }

    val partitionInfo = if (partitionColumn == null) {
      null
    } else {
      JDBCPartitioningInfo(
        partitionColumn,
        lowerBound.toLong,
        upperBound.toLong,
        numPartitions.toInt)
    }
    val parts = JDBCRelation.columnPartition(partitionInfo)
    //我们将传递给getConnection的其他属性
    val properties = new Properties() // Additional properties that we will pass to getConnection
    parameters.foreach(kv => properties.setProperty(kv._1, kv._2))
    JDBCRelation(url, table, parts, properties)(sqlContext)
  }
}

Source File: SQLExecutionSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import java.util.Properties

import scala.collection.parallel.CompositeThrowable

import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.sql.SQLContext
//SQL执行测试套件
class SQLExecutionSuite extends SparkFunSuite {
  //并发查询执行
  test("concurrent query execution (SPARK-10548)") {
    // Try to reproduce the issue with the old SparkContext
    //尝试旧sparkcontext再现问题
    val conf = new SparkConf()
      .setMaster("local[*]")
      .setAppName("test")
    val badSparkContext = new BadSparkContext(conf)
    try {
      testConcurrentQueryExecution(badSparkContext)
      fail("unable to reproduce SPARK-10548")
    } catch {
      case e: IllegalArgumentException =>
        assert(e.getMessage.contains(SQLExecution.EXECUTION_ID_KEY))
    } finally {
      badSparkContext.stop()
    }

    // Verify that the issue is fixed with the latest SparkContext
    //验证问题是固定的最新sparkcontext
    val goodSparkContext = new SparkContext(conf)
    try {
      testConcurrentQueryExecution(goodSparkContext)
    } finally {
      goodSparkContext.stop()
    }
  }

  
private class BadSparkContext(conf: SparkConf) extends SparkContext(conf) {
  protected[spark] override val localProperties = new InheritableThreadLocal[Properties] {
    override protected def childValue(parent: Properties): Properties = new Properties(parent)
    override protected def initialValue(): Properties = new Properties()
  }
}

Source File: GangliaSink.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.ganglia.GangliaReporter
import info.ganglia.gmetric4j.gmetric.GMetric
import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

class GangliaSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GANGLIA_KEY_PERIOD = "period"
  val GANGLIA_DEFAULT_PERIOD = 10

  val GANGLIA_KEY_UNIT = "unit"
  val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS

  val GANGLIA_KEY_MODE = "mode"
  val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST

  // TTL for multicast messages. If listeners are X hops away in network, must be at least X.
  val GANGLIA_KEY_TTL = "ttl"
  val GANGLIA_DEFAULT_TTL = 1

  val GANGLIA_KEY_HOST = "host"
  val GANGLIA_KEY_PORT = "port"

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) {
    throw new Exception("Ganglia sink requires 'host' property.")
  }

  if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) {
    throw new Exception("Ganglia sink requires 'port' property.")
  }

  val host = propertyToOption(GANGLIA_KEY_HOST).get
  val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt
  val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL)
  val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE)
    .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE)
  val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt)
    .getOrElse(GANGLIA_DEFAULT_PERIOD)
  val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT)
    .map(u => TimeUnit.valueOf(u.toUpperCase))
    .getOrElse(GANGLIA_DEFAULT_UNIT)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val ganglia = new GMetric(host, port, mode, ttl)
  val reporter: GangliaReporter = GangliaReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build(ganglia)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: MetricsConfig.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.metrics

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import scala.collection.mutable
import scala.util.matching.Regex

import org.apache.spark.util.Utils
import org.apache.spark.{Logging, SparkConf}

private[spark] class MetricsConfig(conf: SparkConf) extends Logging {

  private val DEFAULT_PREFIX = "*"
  private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
  private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties"

  private[metrics] val properties = new Properties()
  private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null

  private def setDefaultProperties(prop: Properties) {
    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
    prop.setProperty("*.sink.servlet.path", "/metrics/json")
    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
  }

  def initialize() {
    // Add default properties in case there's no properties file
    // 添加默认属性的情况下,没有任何属性文件
    setDefaultProperties(properties)

    loadPropertiesFromFile(conf.getOption("spark.metrics.conf"))

    // Also look for the properties in provided Spark configuration
    //还要查找提供的Spark配置中的属性
    val prefix = "spark.metrics.conf."
    conf.getAll.foreach {
      case (k, v) if k.startsWith(prefix) =>
        properties.setProperty(k.substring(prefix.length()), v)
      case _ =>
    }

    propertyCategories = subProperties(properties, INSTANCE_REGEX)
    if (propertyCategories.contains(DEFAULT_PREFIX)) {
      import scala.collection.JavaConversions._

      val defaultProperty = propertyCategories(DEFAULT_PREFIX)
      for { (inst, prop) <- propertyCategories
            if (inst != DEFAULT_PREFIX)
            (k, v) <- defaultProperty
            if (prop.getProperty(k) == null) } {
        prop.setProperty(k, v)
      }
    }
  }
//使用正则匹配properties中以source.开头的属性,然后将属性中的source反映得到的实例加入HashMap
  def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
    val subProperties = new mutable.HashMap[String, Properties]
    import scala.collection.JavaConversions._
    prop.foreach { kv =>
      if (regex.findPrefixOf(kv._1).isDefined) {
        val regex(prefix, suffix) = kv._1
        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
      }
    }
    subProperties
  }

  def getInstance(inst: String): Properties = {
    propertyCategories.get(inst) match {
      case Some(s) => s
      case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties)
    }
  }

  
  private[this] def loadPropertiesFromFile(path: Option[String]): Unit = {
    var is: InputStream = null
    try {
      is = path match {
        case Some(f) => new FileInputStream(f)
        case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)
      }

      if (is != null) {
        properties.load(is)
      }
    } catch {
      case e: Exception =>
        val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME)
        logError(s"Error loading configuration file $file", e)
    } finally {
      if (is != null) {
        is.close()
      }
    }
  }

}

Source File: CsvSink.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.io.File
import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{CsvReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CSV_KEY_PERIOD = "period"
  val CSV_KEY_UNIT = "unit"
  val CSV_KEY_DIR = "directory"

  val CSV_DEFAULT_PERIOD = 10
  val CSV_DEFAULT_UNIT = "SECONDS"
  val CSV_DEFAULT_DIR = "/tmp/"

  val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CSV_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match {
    case Some(s) => s
    case None => CSV_DEFAULT_DIR
  }

  val reporter: CsvReporter = CsvReporter.forRegistry(registry)
      .formatFor(Locale.US)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build(new File(pollDir))

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: MetricsServlet.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import javax.servlet.http.HttpServletRequest

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.eclipse.jetty.servlet.ServletContextHandler

import org.apache.spark.SecurityManager
import org.apache.spark.ui.JettyUtils._

private[spark] class MetricsServlet(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {

  val SERVLET_KEY_PATH = "path"
  val SERVLET_KEY_SAMPLE = "sample"

  val SERVLET_DEFAULT_SAMPLE = false

  val servletPath = property.getProperty(SERVLET_KEY_PATH)

  val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean)
    .getOrElse(SERVLET_DEFAULT_SAMPLE)

  val mapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))
  //最终生成处理/metrics/json请求的ServletContextHandler,而请求的真正处理由getMetricsSnapshot方法
  //利用fastJson解析,生成
  def getHandlers: Array[ServletContextHandler] = {
    Array[ServletContextHandler](
      createServletHandler(servletPath,
        new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr)
    )
  }

  def getMetricsSnapshot(request: HttpServletRequest): String = {
    mapper.writeValueAsString(registry)
  }

  override def start() { }

  override def stop() { }

  override def report() { }
}

Source File: Slf4jSink.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{Slf4jReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class Slf4jSink(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {
  val SLF4J_DEFAULT_PERIOD = 10
  val SLF4J_DEFAULT_UNIT = "SECONDS"

  val SLF4J_KEY_PERIOD = "period"
  val SLF4J_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => SLF4J_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry)
    .convertDurationsTo(TimeUnit.MILLISECONDS)
    .convertRatesTo(TimeUnit.SECONDS)
    .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ConsoleSink.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{ConsoleReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CONSOLE_DEFAULT_PERIOD = 10
  val CONSOLE_DEFAULT_UNIT = "SECONDS"

  val CONSOLE_KEY_PERIOD = "period"
  val CONSOLE_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CONSOLE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: GraphiteSink.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.net.InetSocketAddress
import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.graphite.{GraphiteUDP, Graphite, GraphiteReporter}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GRAPHITE_DEFAULT_PERIOD = 10
  val GRAPHITE_DEFAULT_UNIT = "SECONDS"
  val GRAPHITE_DEFAULT_PREFIX = ""

  val GRAPHITE_KEY_HOST = "host"
  val GRAPHITE_KEY_PORT = "port"
  val GRAPHITE_KEY_PERIOD = "period"
  val GRAPHITE_KEY_UNIT = "unit"
  val GRAPHITE_KEY_PREFIX = "prefix"
  val GRAPHITE_KEY_PROTOCOL = "protocol"

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) {
    throw new Exception("Graphite sink requires 'host' property.")
  }

  if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) {
    throw new Exception("Graphite sink requires 'port' property.")
  }

  val host = propertyToOption(GRAPHITE_KEY_HOST).get
  val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt

  val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match {
    case Some(s) => s.toInt
    case None => GRAPHITE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT)
  }

  val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match {
    case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port))
    case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port))
    case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p")
  }

  val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .prefixedWith(prefix)
      .build(graphite)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ActiveJob.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.util.Properties

import org.apache.spark.TaskContext
import org.apache.spark.util.CallSite


private[spark] class ActiveJob(
    val jobId: Int,//每个作业都分配一个唯一的I
    val finalStage: ResultStage,//最终的stage
    val func: (TaskContext, Iterator[_]) => _,//作用于最后一个stage上的函数
    val partitions: Array[Int],//分区列表,注意这里表示从多少个分区读入数据并进行处理
    val callSite: CallSite,
    val listener: JobListener,//Job监听器
    val properties: Properties) {
 //任务的分区数量
  val numPartitions = partitions.length
  //标识每个partition相关的任务是否完成
  val finished = Array.fill[Boolean](numPartitions)(false)
  //已经完成的任务数
  var numFinished = 0
}

Source File: ResetSystemProperties.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.util

import java.util.Properties

import org.apache.commons.lang3.SerializationUtils
import org.scalatest.{BeforeAndAfterEach, Suite}

import org.apache.spark.SparkFunSuite


private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite =>
  var oldProperties: Properties = null

  override def beforeEach(): Unit = {
    // we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because
    // the later way of creating a copy does not copy the properties but it initializes a new
    // Properties object with the given properties as defaults. They are not recognized at all
    // by standard Scala wrapper over Java Properties then.
    oldProperties = SerializationUtils.clone(System.getProperties)
    super.beforeEach()
  }

  override def afterEach(): Unit = {
    try {
      super.afterEach()
    } finally {
      System.setProperties(oldProperties)
      oldProperties = null
    }
  }
}

Source File: KafkaWriter.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.indexer.writers.kafka

import java.util.Properties

import com.codahale.metrics.Meter
import com.expedia.open.tracing.buffer.SpanBuffer
import com.expedia.www.haystack.commons.metrics.MetricsSupport
import com.expedia.www.haystack.trace.commons.packer.PackedMessage
import com.expedia.www.haystack.trace.indexer.metrics.AppMetricNames
import com.expedia.www.haystack.trace.indexer.writers.TraceWriter
import org.apache.kafka.clients.producer._
import org.slf4j.LoggerFactory

import scala.util.Try

object KafkaWriter extends MetricsSupport {
  protected val kafkaProducerFailures: Meter = metricRegistry.meter(AppMetricNames.KAFKA_PRODUCE_FAILURES)
}

class KafkaWriter(producerConfig: Properties, topic: String) extends TraceWriter {
  private val LOGGER = LoggerFactory.getLogger(classOf[KafkaWriter])

  private val producer = new KafkaProducer[String, Array[Byte]](producerConfig)

  override def writeAsync(traceId: String, packedSpanBuffer: PackedMessage[SpanBuffer], isLastSpanBuffer: Boolean): Unit = {
    val record = new ProducerRecord[String, Array[Byte]](topic, traceId, packedSpanBuffer.packedDataBytes)
    producer.send(record, (_: RecordMetadata, exception: Exception) => {
      if (exception != null) {
        LOGGER.error(s"Fail to write the span buffer record to kafka topic=$topic", exception)
        KafkaWriter.kafkaProducerFailures.mark()
      }
    })
  }

  override def close(): Unit = Try(producer.close())
}

Source File: KafkaConfiguration.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.indexer.config.entities

import java.util.Properties


case class KafkaConfiguration(numStreamThreads: Int,
                              pollTimeoutMs: Long,
                              consumerProps: Properties,
                              producerProps: Properties,
                              produceTopic: String,
                              consumeTopic: String,
                              consumerCloseTimeoutInMillis: Int,
                              commitOffsetRetries: Int,
                              commitBackoffInMillis: Long,
                              maxWakeups: Int,
                              wakeupTimeoutInMillis: Int)

Source File: KafkaTestClient.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.indexer.integration.clients

import java.util.Properties

import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration
import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer}
import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster

object KafkaTestClient {
  val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1)
  KAFKA_CLUSTER.start()
}

class KafkaTestClient {
  import KafkaTestClient._

  val INPUT_TOPIC = "spans"
  val OUTPUT_TOPIC = "span-buffer"

  val APP_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer])
    props
  }

  val APP_CONSUMER_CONFIG: Properties = new Properties()

  val TEST_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer])
    props
  }

  val RESULT_CONSUMER_CONFIG = new Properties()

  def buildConfig = KafkaConfiguration(numStreamThreads = 1,
    pollTimeoutMs = 100,
    APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC,
    consumerCloseTimeoutInMillis = 3000,
    commitOffsetRetries = 3,
    commitBackoffInMillis = 250,
    maxWakeups = 5, wakeupTimeoutInMillis = 3000)

  def prepare(appId: String): Unit = {
    APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")

    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer])

    deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC)
    KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1)
    KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC)
  }

  private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*)
}

Source File: DistributedProcessing.scala From aecor with MIT License

5 votes

package aecor.kafkadistributedprocessing

import java.util.Properties

import aecor.kafkadistributedprocessing.internal.Kafka
import aecor.kafkadistributedprocessing.internal.Kafka._
import cats.effect.{ ConcurrentEffect, ContextShift, Timer }
import cats.implicits._
import cats.effect.implicits._
import fs2.Stream
import org.apache.kafka.clients.consumer.ConsumerConfig

import scala.concurrent.duration._

final class DistributedProcessing(settings: DistributedProcessingSettings) {

  private def assignRange(size: Int, partitionCount: Int, partition: Int): Option[(Int, Int)] = {
    val even = size / partitionCount
    val reminder = size % partitionCount
    if (partition >= partitionCount) {
      none
    } else {
      if (partition < reminder) {
        (partition * (even + 1), even + 1).some
      } else if (even > 0) {
        (reminder + partition * even, even).some
      } else none
    }
  }

  
  def start[F[_]: ConcurrentEffect: Timer: ContextShift](name: String,
                                                         processes: List[F[Unit]]): F[Unit] =
    Kafka
      .assignPartitions(
        settings.asProperties(name),
        settings.topicName,
        settings.pollingInterval,
        settings.pollTimeout
      )
      .parEvalMapUnordered(Int.MaxValue) {
        case AssignedPartition(partition, partitionCount, watchRevocation, release) =>
          assignRange(processes.size, partitionCount, partition).fold(release) {
            case (offset, processCount) =>
              Stream
                .range[F](offset, offset + processCount)
                .parEvalMapUnordered(processCount)(processes)
                .compile
                .drain
                .race(watchRevocation)
                .flatMap {
                  case Left(_)         => release
                  case Right(callback) => callback
                }
          }
      }
      .compile
      .drain
}

object DistributedProcessing {
  def apply(settings: DistributedProcessingSettings): DistributedProcessing =
    new DistributedProcessing(settings)
}

final case class DistributedProcessingSettings(brokers: Set[String],
                                               topicName: String,
                                               pollingInterval: FiniteDuration = 500.millis,
                                               pollTimeout: FiniteDuration = 50.millis,
                                               consumerSettings: Map[String, String] = Map.empty) {
  def withClientId(clientId: String): DistributedProcessingSettings =
    withConsumerSetting(ConsumerConfig.CLIENT_ID_CONFIG, clientId)

  def clientId: Option[String] = consumerSettings.get(ConsumerConfig.CLIENT_ID_CONFIG)

  def withConsumerSetting(key: String, value: String): DistributedProcessingSettings =
    copy(consumerSettings = consumerSettings.updated(key, value))

  def asProperties(groupId: String): Properties = {
    val properties = new Properties()
    consumerSettings.foreach {
      case (key, value) => properties.setProperty(key, value)
    }
    properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers.mkString(","))
    properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId)
    properties
  }

}

Source File: KafkaConsumer.scala From aecor with MIT License

5 votes

package aecor.kafkadistributedprocessing.internal

import java.time.Duration
import java.util.Properties
import java.util.concurrent.Executors

import cats.effect.{ Async, ContextShift, Resource }
import cats.~>
import org.apache.kafka.clients.consumer.{ Consumer, ConsumerRebalanceListener, ConsumerRecords }
import org.apache.kafka.common.PartitionInfo
import org.apache.kafka.common.serialization.Deserializer

import scala.collection.JavaConverters._
import scala.concurrent.ExecutionContext
import scala.concurrent.duration.FiniteDuration

private[kafkadistributedprocessing] final class KafkaConsumer[F[_], K, V](
  withConsumer: (Consumer[K, V] => *) ~> F
) {

  def subscribe(topics: Set[String], listener: ConsumerRebalanceListener): F[Unit] =
    withConsumer(_.subscribe(topics.asJava, listener))

  def subscribe(topics: Set[String]): F[Unit] =
    withConsumer(_.subscribe(topics.asJava))

  val unsubscribe: F[Unit] =
    withConsumer(_.unsubscribe())

  def partitionsFor(topic: String): F[Set[PartitionInfo]] =
    withConsumer(_.partitionsFor(topic).asScala.toSet)

  def close: F[Unit] =
    withConsumer(_.close())

  def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] =
    withConsumer(_.poll(Duration.ofNanos(timeout.toNanos)))
}

private[kafkadistributedprocessing] object KafkaConsumer {
  final class Create[F[_]] {
    def apply[K, V](
      config: Properties,
      keyDeserializer: Deserializer[K],
      valueDeserializer: Deserializer[V]
    )(implicit F: Async[F], contextShift: ContextShift[F]): Resource[F, KafkaConsumer[F, K, V]] = {
      val create = F.suspend {

        val executor = Executors.newSingleThreadExecutor()

        def eval[A](a: => A): F[A] =
          contextShift.evalOn(ExecutionContext.fromExecutor(executor)) {
            F.async[A] { cb =>
              executor.execute(new Runnable {
                override def run(): Unit =
                  cb {
                    try Right(a)
                    catch {
                      case e: Throwable => Left(e)
                    }
                  }
              })
            }
          }

        eval {
          val original = Thread.currentThread.getContextClassLoader
          Thread.currentThread.setContextClassLoader(null)
          val consumer = new org.apache.kafka.clients.consumer.KafkaConsumer[K, V](
            config,
            keyDeserializer,
            valueDeserializer
          )
          Thread.currentThread.setContextClassLoader(original)
          val withConsumer = new ((Consumer[K, V] => *) ~> F) {
            def apply[A](f: Consumer[K, V] => A): F[A] =
              eval(f(consumer))
          }
          new KafkaConsumer[F, K, V](withConsumer)
        }
      }
      Resource.make(create)(_.close)
    }
  }
  def create[F[_]]: Create[F] = new Create[F]
}

Source File: KafkaTest.scala From aecor with MIT License

5 votes

package aecor.kafkadistributedprocessing

import java.util.Properties

import aecor.kafkadistributedprocessing.internal.Kafka.UnitDeserializer
import aecor.kafkadistributedprocessing.internal.RebalanceEvents.RebalanceEvent
import aecor.kafkadistributedprocessing.internal.RebalanceEvents.RebalanceEvent.{
  PartitionsAssigned,
  PartitionsRevoked
}
import aecor.kafkadistributedprocessing.internal.{ Kafka, KafkaConsumer }
import cats.effect.IO
import cats.implicits._
import fs2.Stream
import fs2.concurrent.Queue
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.scalatest.funsuite.AnyFunSuite

import scala.concurrent.duration._

class KafkaTest extends AnyFunSuite with IOSupport with KafkaSupport {
  val topic = "test"
  val partitionCount = 4

  createCustomTopic(topic, partitions = partitionCount)

  val createConsumerAccess = {
    val properties = new Properties()
    properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers.mkString(","))
    properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "test")
    KafkaConsumer.create[IO](properties, new UnitDeserializer, new UnitDeserializer)
  }

  val watchRebalanceEvents =
    Stream
      .resource(createConsumerAccess)
      .flatMap(Kafka.watchRebalanceEvents(_, topic, 500.millis, 50.millis))

  test("Rebalance event stream") {

    val program = for {
      queue <- Queue.unbounded[IO, (Int, RebalanceEvent)]

      run = (n: Int) =>
        watchRebalanceEvents
          .evalMap { x =>
            val e = n -> x.value
            queue.enqueue1(e) >> x.commit
          }
          .compile
          .drain
          .start

      p1 <- run(1)

      l1 <- queue.dequeue.take(2).compile.toList

      p2 <- run(2)

      l2 <- queue.dequeue.take(4).compile.toList

      _ <- p1.cancel

      l3 <- queue.dequeue.take(2).compile.toList

      _ <- p2.cancel

    } yield (l1, l2, l3)

    val (l1, l2, l3) = program.unsafeRunTimed(40.seconds).get

    def fold(list: List[(Int, RebalanceEvent)]): Map[Int, Set[Int]] =
      list.foldLeft(Map.empty[Int, Set[Int]]) {
        case (s, (c, e)) =>
          e match {
            case PartitionsRevoked(partitions) =>
              s.updated(c, s.getOrElse(c, Set.empty[Int]) -- partitions.map(_.partition()))
            case PartitionsAssigned(partitions) =>
              s.updated(c, s.getOrElse(c, Set.empty[Int]) ++ partitions.map(_.partition()))
          }
      }

    assert(fold(l1) == Map(1 -> Set(1, 0, 3, 2)))
    assert(fold(l2) == Map(1 -> Set(1, 0), 2 -> Set(2, 3)))
    assert(fold(l3) == Map(2 -> Set(1, 0, 3, 2)))

  }

  test("Topic partitions query works before subscription") {
    val program = createConsumerAccess.use(_.partitionsFor(topic))
    val result = program.unsafeRunTimed(2.seconds).get
    assert(result.size == partitionCount)
  }

}

Source File: EventTimeHeartBeatExample.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.eventtime

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.tmalaska.flinktraining.example.session.HeartBeat
import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.scala._
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010

object EventTimeHeartBeatExample {
  def main(args: Array[String]) {

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val typeOfWindow = args(4)

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    // create a stream using socket

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val heartbeatStream:DataStream[HeartBeat] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))
      .map(json => {
        implicit val formats = DefaultFormats
        read[HeartBeat](json)
      })
      .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[HeartBeat]() {
        override def getCurrentWatermark: Watermark = {
          new Watermark(System.currentTimeMillis() - 10000)
        }

        override def extractTimestamp(element: HeartBeat, previousElementTimestamp: Long): Long = {
          element.eventTime
        }
      })

    // implement word count
    val entityCount = heartbeatStream
      .map(heartBeat => (heartBeat.entityId, 1))

    val keyValuePair = entityCount.keyBy(0)

    val countPair = if (typeOfWindow.equals("slidingCount")) {
      //Slide by count.  Have a sliding window of 5 messages and trigger or slide 2 messages
      keyValuePair.countWindow(5, 2).sum(1)
    } else if (typeOfWindow.equals("tumbleTime")) {
      //Tumble by time.  Trigger and Slide by 5 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1)
    } else if (typeOfWindow.equals("slidingTime")) {
      //Slide by time.  Have a sliding window of 5 seconds that tiggers every 2 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1)
    } else {
      //Tumble by time.  Trigger every 5 seconds
      keyValuePair.countWindow(5).sum(1)
    }

    // print the results

    countPair.print()

    // execute the program

    env.execute("Scala WordCount Example")

  }
}


class MessageTimestamp extends AssignerWithPeriodicWatermarks[HeartBeat] {
  override def getCurrentWatermark: Watermark = {
    //TODO
    null
  }

  override def extractTimestamp(t: HeartBeat, l: Long): Long = {
    //TODO
    -1
  }
}

Source File: SessionKafkaProducer.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.session

import java.util.{Properties, Random}

import net.liftweb.json.DefaultFormats
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import net.liftweb.json.Serialization.write

object SessionKafkaProducer {
  def main(args:Array[String]): Unit = {

    implicit val formats = DefaultFormats

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)
    val numberOfEntities = args(3).toInt
    val numberOfMessagesPerEntity = args(4).toInt
    val waitTimeBetweenMessageBatch = args(5).toInt
    val chancesOfMissing = args(6).toInt

    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put("acks", "all")
    props.put("retries", "0")
    props.put("batch.size", "16384")
    props.put("linger.ms", "1")
    props.put("buffer.memory", "33554432")
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    val r = new Random()
    var sentCount = 0

    println("About to send to " + topic)
    for (j <- 0 to numberOfMessagesPerEntity) {
      for (i <- 0 to numberOfEntities) {
        if (r.nextInt(chancesOfMissing) != 0) {
          val message = write(HeartBeat(i.toString, System.currentTimeMillis()))
          val producerRecord = new ProducerRecord[String,String](topic, message)
          producer.send(producerRecord)
          sentCount += 1
        }
      }
      println("Sent Count:" + sentCount)
      Thread.sleep(waitTimeBetweenMessageBatch)
    }

    producer.close()
  }
}

Source File: StreamingSessionExample.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.session

import java.util.Properties

import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._

object StreamingSessionExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val sessionTimeOut = args(4).toInt

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //val socketStream = env.socketTextStream("localhost",9999, '\n')

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val messageStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    val heartBeatStream = messageStream
      .map(str => {
        implicit val formats = DefaultFormats
        println("str:" + str)
        val hb = read[HeartBeat](str)
        (hb.entityId, hb.eventTime)
      }).keyBy(0).process(new MyProcessFunction(sessionTimeOut))

    heartBeatStream.map(session => {
      println("session:" + session)
      session
    })

    heartBeatStream.print()

    env.execute()
  }
}

class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] {


  private var state:ValueState[SessionObj] = null


  override def open(parameters: Configuration): Unit = {
    state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj]))
  }

  override def processElement(value: (String, Long),
                              ctx: ProcessFunction[(String, Long), SessionObj]#Context,
                              out: Collector[SessionObj]): Unit = {
    val currentSession = state.value()
    var outBoundSessionRecord:SessionObj = null
    if (currentSession == null) {
      outBoundSessionRecord = SessionObj(value._2, value._2, 1)
    } else {
      outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1)

    }
    state.update(outBoundSessionRecord)
    out.collect(outBoundSessionRecord)
    ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut)
  }

  override def onTimer(timestamp: Long,
                       ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext,
                       out: Collector[SessionObj]): Unit = {
    val result = state.value
    if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout
      state.clear()
    }
  }
}

case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int)

Source File: StreamingSQL.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.wordcount

import java.util.Properties

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.table.api.scala._
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.flink.types.Row


object StreamingSQL {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val tableEnv = TableEnvironment.getTableEnvironment(env)

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val entityCountStream:DataStream[(String, Int)] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))
      .flatMap(line => line.toUpperCase.split(' '))
      .map(word => (word, 1))

    tableEnv.registerDataStream("myTable2", entityCountStream, 'word, 'frequency)


    val roleUp = tableEnv.sqlQuery("SELECT word, SUM(frequency) FROM myTable2 GROUP BY word")

    val typeInfo = createTypeInformation[(String, Int)]
    val outStream = roleUp.toRetractStream(typeInfo)
    outStream.print()
    env.execute("Scala SQL Example")

  }
}

class CustomSinkFunction() extends SinkFunction[Row] {
  @throws[Exception]
  def invoke(value: Row): Unit = {
    //Do something
    println("-" + value)

  }
}

Source File: SimpleWordCount.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.wordcount

import java.util.Properties
import java.util.concurrent.TimeUnit

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time


object SimpleWordCount {
  def main(args: Array[String]) {

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val typeOfWindow = args(4)

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    // create a stream using socket

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val wordCountStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    // implement word count
    val wordsStream = wordCountStream
      .flatMap(line => line.toUpperCase.split(' '))
      .map(word => (word, 1))
      //.flatMap{_.toUpperCase.split(' ')}
      //.map{ (_,1) }

    val keyValuePair = wordsStream.keyBy(0)

    val countPair = if (typeOfWindow.equals("slidingCount")) {
      //Slide by count.  Have a sliding window of 5 messages and trigger or slide 2 messages
      keyValuePair.countWindow(5, 2).sum(1)
    } else if (typeOfWindow.equals("tumbleTime")) {
      //Tumble by time.  Trigger and Slide by 5 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1)
    } else if (typeOfWindow.equals("slidingTime")) {
      //Slide by time.  Have a sliding window of 5 seconds that tiggers every 2 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1)
    } else {
      //Tumble by time.  Trigger every 5 seconds
      keyValuePair.countWindow(5).sum(1)
    }

    // print the results

    countPair.print()

    // execute the program

    env.execute("Scala WordCount Example")

  }
}

Source File: DriverInitContainerBootstrapStep.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.deploy.k8s.submit.steps

import java.io.StringWriter
import java.util.Properties

import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, ContainerBuilder, HasMetadata}

import org.apache.spark.deploy.k8s.Config._
import org.apache.spark.deploy.k8s.KubernetesUtils
import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec
import org.apache.spark.deploy.k8s.submit.steps.initcontainer.{InitContainerConfigurationStep, InitContainerSpec}


private[spark] class DriverInitContainerBootstrapStep(
    steps: Seq[InitContainerConfigurationStep],
    configMapName: String,
    configMapKey: String)
  extends DriverConfigurationStep {

  override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = {
    var initContainerSpec = InitContainerSpec(
      properties = Map.empty[String, String],
      driverSparkConf = Map.empty[String, String],
      initContainer = new ContainerBuilder().build(),
      driverContainer = driverSpec.driverContainer,
      driverPod = driverSpec.driverPod,
      dependentResources = Seq.empty[HasMetadata])
    for (nextStep <- steps) {
      initContainerSpec = nextStep.configureInitContainer(initContainerSpec)
    }

    val configMap = buildConfigMap(
      configMapName,
      configMapKey,
      initContainerSpec.properties)
    val resolvedDriverSparkConf = driverSpec.driverSparkConf
      .clone()
      .set(INIT_CONTAINER_CONFIG_MAP_NAME, configMapName)
      .set(INIT_CONTAINER_CONFIG_MAP_KEY_CONF, configMapKey)
      .setAll(initContainerSpec.driverSparkConf)
    val resolvedDriverPod = KubernetesUtils.appendInitContainer(
      initContainerSpec.driverPod, initContainerSpec.initContainer)

    driverSpec.copy(
      driverPod = resolvedDriverPod,
      driverContainer = initContainerSpec.driverContainer,
      driverSparkConf = resolvedDriverSparkConf,
      otherKubernetesResources =
        driverSpec.otherKubernetesResources ++
          initContainerSpec.dependentResources ++
          Seq(configMap))
  }

  private def buildConfigMap(
      configMapName: String,
      configMapKey: String,
      config: Map[String, String]): ConfigMap = {
    val properties = new Properties()
    config.foreach { entry =>
      properties.setProperty(entry._1, entry._2)
    }
    val propertiesWriter = new StringWriter()
    properties.store(propertiesWriter,
      s"Java properties built from Kubernetes config map with name: $configMapName " +
        s"and config map key: $configMapKey")
    new ConfigMapBuilder()
      .withNewMetadata()
        .withName(configMapName)
        .endMetadata()
      .addToData(configMapKey, propertiesWriter.toString)
      .build()
  }
}

Source File: GangliaSink.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.ganglia.GangliaReporter
import info.ganglia.gmetric4j.gmetric.GMetric
import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

class GangliaSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GANGLIA_KEY_PERIOD = "period"
  val GANGLIA_DEFAULT_PERIOD = 10

  val GANGLIA_KEY_UNIT = "unit"
  val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS

  val GANGLIA_KEY_MODE = "mode"
  val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST

  // TTL for multicast messages. If listeners are X hops away in network, must be at least X.
  val GANGLIA_KEY_TTL = "ttl"
  val GANGLIA_DEFAULT_TTL = 1

  val GANGLIA_KEY_HOST = "host"
  val GANGLIA_KEY_PORT = "port"

  val GANGLIA_KEY_DMAX = "dmax"
  val GANGLIA_DEFAULT_DMAX = 0

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) {
    throw new Exception("Ganglia sink requires 'host' property.")
  }

  if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) {
    throw new Exception("Ganglia sink requires 'port' property.")
  }

  val host = propertyToOption(GANGLIA_KEY_HOST).get
  val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt
  val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL)
  val dmax = propertyToOption(GANGLIA_KEY_DMAX).map(_.toInt).getOrElse(GANGLIA_DEFAULT_DMAX)
  val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE)
    .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE)
  val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt)
    .getOrElse(GANGLIA_DEFAULT_PERIOD)
  val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT)
    .map(u => TimeUnit.valueOf(u.toUpperCase))
    .getOrElse(GANGLIA_DEFAULT_UNIT)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val ganglia = new GMetric(host, port, mode, ttl)
  val reporter: GangliaReporter = GangliaReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .withDMax(dmax)
      .build(ganglia)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: KafkaContinuousSourceSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.kafka010

import java.util.Properties
import java.util.concurrent.atomic.AtomicInteger

import org.scalatest.time.SpanSugar._
import scala.collection.mutable
import scala.util.Random

import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, Dataset, ForeachWriter, Row}
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
import org.apache.spark.sql.execution.streaming.StreamExecution
import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
import org.apache.spark.sql.streaming.{StreamTest, Trigger}
import org.apache.spark.sql.test.{SharedSQLContext, TestSparkSession}

// Run tests in KafkaSourceSuiteBase in continuous execution mode.
class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuousTest

class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest {
  import testImplicits._

  override val brokerProps = Map("auto.create.topics.enable" -> "false")

  test("subscribing topic by pattern with topic deletions") {
    val topicPrefix = newTopic()
    val topic = topicPrefix + "-seems"
    val topic2 = topicPrefix + "-bad"
    testUtils.createTopic(topic, partitions = 5)
    testUtils.sendMessages(topic, Array("-1"))
    require(testUtils.getLatestOffsets(Set(topic)).size === 5)

    val reader = spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
      .option("kafka.metadata.max.age.ms", "1")
      .option("subscribePattern", s"$topicPrefix-.*")
      .option("failOnDataLoss", "false")

    val kafka = reader.load()
      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
      .as[(String, String)]
    val mapped = kafka.map(kv => kv._2.toInt + 1)

    testStream(mapped)(
      makeSureGetOffsetCalled,
      AddKafkaData(Set(topic), 1, 2, 3),
      CheckAnswer(2, 3, 4),
      Execute { query =>
        testUtils.deleteTopic(topic)
        testUtils.createTopic(topic2, partitions = 5)
        eventually(timeout(streamingTimeout)) {
          assert(
            query.lastExecution.logical.collectFirst {
              case DataSourceV2Relation(_, r: KafkaContinuousReader) => r
            }.exists { r =>
              // Ensure the new topic is present and the old topic is gone.
              r.knownPartitions.exists(_.topic == topic2)
            },
            s"query never reconfigured to new topic $topic2")
        }
      },
      AddKafkaData(Set(topic2), 4, 5, 6),
      CheckAnswer(2, 3, 4, 5, 6, 7)
    )
  }
}

class KafkaContinuousSourceStressForDontFailOnDataLossSuite
    extends KafkaSourceStressForDontFailOnDataLossSuite {
  override protected def startStream(ds: Dataset[Int]) = {
    ds.writeStream
      .format("memory")
      .queryName("memory")
      .trigger(Trigger.Continuous("1 second"))
      .start()
  }
}

Source File: RowDataSourceStrategySuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources

import java.sql.DriverManager
import java.util.Properties

import org.scalatest.BeforeAndAfter

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.sources._
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils

class RowDataSourceStrategySuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext {
  import testImplicits._

  val url = "jdbc:h2:mem:testdb0"
  val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass"
  var conn: java.sql.Connection = null

  before {
    Utils.classForName("org.h2.Driver")
    // Extra properties that will be specified for our database. We need these to test
    // usage of parameters from OPTIONS clause in queries.
    val properties = new Properties()
    properties.setProperty("user", "testUser")
    properties.setProperty("password", "testPass")
    properties.setProperty("rowId", "false")

    conn = DriverManager.getConnection(url, properties)
    conn.prepareStatement("create schema test").executeUpdate()
    conn.prepareStatement("create table test.inttypes (a INT, b INT, c INT)").executeUpdate()
    conn.prepareStatement("insert into test.inttypes values (1, 2, 3)").executeUpdate()
    conn.commit()
    sql(
      s"""
        |CREATE OR REPLACE TEMPORARY VIEW inttypes
        |USING org.apache.spark.sql.jdbc
        |OPTIONS (url '$url', dbtable 'TEST.INTTYPES', user 'testUser', password 'testPass')
       """.stripMargin.replaceAll("\n", " "))
  }

  after {
    conn.close()
  }

  test("SPARK-17673: Exchange reuse respects differences in output schema") {
    val df = sql("SELECT * FROM inttypes")
    val df1 = df.groupBy("a").agg("b" -> "min")
    val df2 = df.groupBy("a").agg("c" -> "min")
    val res = df1.union(df2)
    assert(res.distinct().count() == 2)  // would be 1 if the exchange was incorrectly reused
  }
}

Source File: StatsdSink.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry

import org.apache.spark.SecurityManager
import org.apache.spark.internal.Logging
import org.apache.spark.metrics.MetricsSystem

private[spark] object StatsdSink {
  val STATSD_KEY_HOST = "host"
  val STATSD_KEY_PORT = "port"
  val STATSD_KEY_PERIOD = "period"
  val STATSD_KEY_UNIT = "unit"
  val STATSD_KEY_PREFIX = "prefix"

  val STATSD_DEFAULT_HOST = "127.0.0.1"
  val STATSD_DEFAULT_PORT = "8125"
  val STATSD_DEFAULT_PERIOD = "10"
  val STATSD_DEFAULT_UNIT = "SECONDS"
  val STATSD_DEFAULT_PREFIX = ""
}

private[spark] class StatsdSink(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink with Logging {
  import StatsdSink._

  val host = property.getProperty(STATSD_KEY_HOST, STATSD_DEFAULT_HOST)
  val port = property.getProperty(STATSD_KEY_PORT, STATSD_DEFAULT_PORT).toInt

  val pollPeriod = property.getProperty(STATSD_KEY_PERIOD, STATSD_DEFAULT_PERIOD).toInt
  val pollUnit =
    TimeUnit.valueOf(property.getProperty(STATSD_KEY_UNIT, STATSD_DEFAULT_UNIT).toUpperCase)

  val prefix = property.getProperty(STATSD_KEY_PREFIX, STATSD_DEFAULT_PREFIX)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter = new StatsdReporter(registry, host, port, prefix)

  override def start(): Unit = {
    reporter.start(pollPeriod, pollUnit)
    logInfo(s"StatsdSink started with prefix: '$prefix'")
  }

  override def stop(): Unit = {
    reporter.stop()
    logInfo("StatsdSink stopped.")
  }

  override def report(): Unit = reporter.report()
}

Source File: CsvSink.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.io.File
import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{CsvReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CSV_KEY_PERIOD = "period"
  val CSV_KEY_UNIT = "unit"
  val CSV_KEY_DIR = "directory"

  val CSV_DEFAULT_PERIOD = 10
  val CSV_DEFAULT_UNIT = "SECONDS"
  val CSV_DEFAULT_DIR = "/tmp/"

  val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CSV_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT))
    case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match {
    case Some(s) => s
    case None => CSV_DEFAULT_DIR
  }

  val reporter: CsvReporter = CsvReporter.forRegistry(registry)
      .formatFor(Locale.US)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build(new File(pollDir))

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: MetricsServlet.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit
import javax.servlet.http.HttpServletRequest

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.eclipse.jetty.servlet.ServletContextHandler

import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.ui.JettyUtils._

private[spark] class MetricsServlet(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {

  val SERVLET_KEY_PATH = "path"
  val SERVLET_KEY_SAMPLE = "sample"

  val SERVLET_DEFAULT_SAMPLE = false

  val servletPath = property.getProperty(SERVLET_KEY_PATH)

  val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean)
    .getOrElse(SERVLET_DEFAULT_SAMPLE)

  val mapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))

  def getHandlers(conf: SparkConf): Array[ServletContextHandler] = {
    Array[ServletContextHandler](
      createServletHandler(servletPath,
        new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr, conf)
    )
  }

  def getMetricsSnapshot(request: HttpServletRequest): String = {
    mapper.writeValueAsString(registry)
  }

  override def start() { }

  override def stop() { }

  override def report() { }
}

Source File: Slf4jSink.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{MetricRegistry, Slf4jReporter}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class Slf4jSink(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {
  val SLF4J_DEFAULT_PERIOD = 10
  val SLF4J_DEFAULT_UNIT = "SECONDS"

  val SLF4J_KEY_PERIOD = "period"
  val SLF4J_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => SLF4J_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT))
    case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry)
    .convertDurationsTo(TimeUnit.MILLISECONDS)
    .convertRatesTo(TimeUnit.SECONDS)
    .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ConsoleSink.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{ConsoleReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CONSOLE_DEFAULT_PERIOD = 10
  val CONSOLE_DEFAULT_UNIT = "SECONDS"

  val CONSOLE_KEY_PERIOD = "period"
  val CONSOLE_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CONSOLE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT))
    case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: GraphiteSink.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.net.InetSocketAddress
import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.graphite.{Graphite, GraphiteReporter, GraphiteUDP}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GRAPHITE_DEFAULT_PERIOD = 10
  val GRAPHITE_DEFAULT_UNIT = "SECONDS"
  val GRAPHITE_DEFAULT_PREFIX = ""

  val GRAPHITE_KEY_HOST = "host"
  val GRAPHITE_KEY_PORT = "port"
  val GRAPHITE_KEY_PERIOD = "period"
  val GRAPHITE_KEY_UNIT = "unit"
  val GRAPHITE_KEY_PREFIX = "prefix"
  val GRAPHITE_KEY_PROTOCOL = "protocol"

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) {
    throw new Exception("Graphite sink requires 'host' property.")
  }

  if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) {
    throw new Exception("Graphite sink requires 'port' property.")
  }

  val host = propertyToOption(GRAPHITE_KEY_HOST).get
  val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt

  val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match {
    case Some(s) => s.toInt
    case None => GRAPHITE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT))
    case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT)
  }

  val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase(Locale.ROOT)) match {
    case Some("udp") => new GraphiteUDP(host, port)
    case Some("tcp") | None => new Graphite(host, port)
    case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p")
  }

  val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .prefixedWith(prefix)
      .build(graphite)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ResultTask.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io._
import java.lang.management.ManagementFactory
import java.nio.ByteBuffer
import java.util.Properties

import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD


private[spark] class ResultTask[T, U](
    stageId: Int,
    stageAttemptId: Int,
    taskBinary: Broadcast[Array[Byte]],
    partition: Partition,
    locs: Seq[TaskLocation],
    val outputId: Int,
    localProperties: Properties,
    serializedTaskMetrics: Array[Byte],
    jobId: Option[Int] = None,
    appId: Option[String] = None,
    appAttemptId: Option[String] = None)
  extends Task[U](stageId, stageAttemptId, partition.index, localProperties, serializedTaskMetrics,
    jobId, appId, appAttemptId)
  with Serializable {

  @transient private[this] val preferredLocs: Seq[TaskLocation] = {
    if (locs == null) Nil else locs.toSet.toSeq
  }

  override def runTask(context: TaskContext): U = {
    // Deserialize the RDD and the func using the broadcast variables.
    val threadMXBean = ManagementFactory.getThreadMXBean
    val deserializeStartTime = System.currentTimeMillis()
    val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
      threadMXBean.getCurrentThreadCpuTime
    } else 0L
    val ser = SparkEnv.get.closureSerializer.newInstance()
    val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime
    _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
      threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
    } else 0L

    func(context, rdd.iterator(partition, context))
  }

  // This is only callable on the driver side.
  override def preferredLocations: Seq[TaskLocation] = preferredLocs

  override def toString: String = "ResultTask(" + stageId + ", " + partitionId + ")"
}

Source File: DAGSchedulerEvent.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.util.Properties

import scala.language.existentials

import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.util.{AccumulatorV2, CallSite}


private[scheduler] case class MapStageSubmitted(
  jobId: Int,
  dependency: ShuffleDependency[_, _, _],
  callSite: CallSite,
  listener: JobListener,
  properties: Properties = null)
  extends DAGSchedulerEvent

private[scheduler] case class StageCancelled(
    stageId: Int,
    reason: Option[String])
  extends DAGSchedulerEvent

private[scheduler] case class JobCancelled(
    jobId: Int,
    reason: Option[String])
  extends DAGSchedulerEvent

private[scheduler] case class JobGroupCancelled(groupId: String) extends DAGSchedulerEvent

private[scheduler] case object AllJobsCancelled extends DAGSchedulerEvent

private[scheduler]
case class BeginEvent(task: Task[_], taskInfo: TaskInfo) extends DAGSchedulerEvent

private[scheduler]
case class GettingResultEvent(taskInfo: TaskInfo) extends DAGSchedulerEvent

private[scheduler] case class CompletionEvent(
    task: Task[_],
    reason: TaskEndReason,
    result: Any,
    accumUpdates: Seq[AccumulatorV2[_, _]],
    taskInfo: TaskInfo)
  extends DAGSchedulerEvent

private[scheduler] case class ExecutorAdded(execId: String, host: String) extends DAGSchedulerEvent

private[scheduler] case class ExecutorLost(execId: String, reason: ExecutorLossReason)
  extends DAGSchedulerEvent

private[scheduler] case class WorkerRemoved(workerId: String, host: String, message: String)
  extends DAGSchedulerEvent

private[scheduler]
case class TaskSetFailed(taskSet: TaskSet, reason: String, exception: Option[Throwable])
  extends DAGSchedulerEvent

private[scheduler] case object ResubmitFailedStages extends DAGSchedulerEvent

private[scheduler]
case class SpeculativeTaskSubmitted(task: Task[_]) extends DAGSchedulerEvent

Source File: package.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache

import java.util.Properties


package object spark {

  private object SparkBuildInfo {

    val (
        spark_version: String,
        spark_branch: String,
        spark_revision: String,
        spark_build_user: String,
        spark_repo_url: String,
        spark_build_date: String) = {

      val resourceStream = Thread.currentThread().getContextClassLoader.
        getResourceAsStream("spark-version-info.properties")
      if (resourceStream == null) {
        throw new SparkException("Could not find spark-version-info.properties")
      }

      try {
        val unknownProp = "<unknown>"
        val props = new Properties()
        props.load(resourceStream)
        (
          props.getProperty("version", unknownProp),
          props.getProperty("branch", unknownProp),
          props.getProperty("revision", unknownProp),
          props.getProperty("user", unknownProp),
          props.getProperty("url", unknownProp),
          props.getProperty("date", unknownProp)
        )
      } catch {
        case e: Exception =>
          throw new SparkException("Error loading properties from spark-version-info.properties", e)
      } finally {
        if (resourceStream != null) {
          try {
            resourceStream.close()
          } catch {
            case e: Exception =>
              throw new SparkException("Error closing spark build info resource stream", e)
          }
        }
      }
    }
  }

  val SPARK_VERSION = SparkBuildInfo.spark_version
  val SPARK_BRANCH = SparkBuildInfo.spark_branch
  val SPARK_REVISION = SparkBuildInfo.spark_revision
  val SPARK_BUILD_USER = SparkBuildInfo.spark_build_user
  val SPARK_REPO_URL = SparkBuildInfo.spark_repo_url
  val SPARK_BUILD_DATE = SparkBuildInfo.spark_build_date
}

Source File: MemoryTestingUtils.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.memory

import java.util.Properties

import org.apache.spark.{SparkEnv, TaskContext, TaskContextImpl}


object MemoryTestingUtils {
  def fakeTaskContext(env: SparkEnv): TaskContext = {
    val taskMemoryManager = new TaskMemoryManager(env.memoryManager, 0)
    new TaskContextImpl(
      stageId = 0,
      stageAttemptNumber = 0,
      partitionId = 0,
      taskAttemptId = 0,
      attemptNumber = 0,
      taskMemoryManager = taskMemoryManager,
      localProperties = new Properties,
      metricsSystem = env.metricsSystem)
  }
}

Source File: FakeTask.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.util.Properties

import org.apache.spark.{Partition, SparkEnv, TaskContext}
import org.apache.spark.executor.TaskMetrics

class FakeTask(
    stageId: Int,
    partitionId: Int,
    prefLocs: Seq[TaskLocation] = Nil,
    serializedTaskMetrics: Array[Byte] =
      SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array())
  extends Task[Int](stageId, 0, partitionId, new Properties, serializedTaskMetrics) {

  override def runTask(context: TaskContext): Int = 0
  override def preferredLocations: Seq[TaskLocation] = prefLocs
}

object FakeTask {
  
  def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
    createTaskSet(numTasks, stageAttemptId = 0, prefLocs: _*)
  }

  def createTaskSet(numTasks: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
    createTaskSet(numTasks, stageId = 0, stageAttemptId, prefLocs: _*)
  }

  def createTaskSet(numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*):
  TaskSet = {
    if (prefLocs.size != 0 && prefLocs.size != numTasks) {
      throw new IllegalArgumentException("Wrong number of task locations")
    }
    val tasks = Array.tabulate[Task[_]](numTasks) { i =>
      new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil)
    }
    new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null)
  }

  def createShuffleMapTaskSet(
      numTasks: Int,
      stageId: Int,
      stageAttemptId: Int,
      prefLocs: Seq[TaskLocation]*): TaskSet = {
    if (prefLocs.size != 0 && prefLocs.size != numTasks) {
      throw new IllegalArgumentException("Wrong number of task locations")
    }
    val tasks = Array.tabulate[Task[_]](numTasks) { i =>
      new ShuffleMapTask(stageId, stageAttemptId, null, new Partition {
        override def index: Int = i
      }, prefLocs(i), new Properties,
        SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array())
    }
    new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null)
  }
}

Source File: TaskDescriptionSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{ByteArrayOutputStream, DataOutputStream, UTFDataFormatException}
import java.nio.ByteBuffer
import java.util.Properties

import scala.collection.mutable.HashMap

import org.apache.spark.SparkFunSuite

class TaskDescriptionSuite extends SparkFunSuite {
  test("encoding and then decoding a TaskDescription results in the same TaskDescription") {
    val originalFiles = new HashMap[String, Long]()
    originalFiles.put("fileUrl1", 1824)
    originalFiles.put("fileUrl2", 2)

    val originalJars = new HashMap[String, Long]()
    originalJars.put("jar1", 3)

    val originalProperties = new Properties()
    originalProperties.put("property1", "18")
    originalProperties.put("property2", "test value")
    // SPARK-19796 -- large property values (like a large job description for a long sql query)
    // can cause problems for DataOutputStream, make sure we handle correctly
    val sb = new StringBuilder()
    (0 to 10000).foreach(_ => sb.append("1234567890"))
    val largeString = sb.toString()
    originalProperties.put("property3", largeString)
    // make sure we've got a good test case
    intercept[UTFDataFormatException] {
      val out = new DataOutputStream(new ByteArrayOutputStream())
      try {
        out.writeUTF(largeString)
      } finally {
        out.close()
      }
    }

    // Create a dummy byte buffer for the task.
    val taskBuffer = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4))

    val originalTaskDescription = new TaskDescription(
      taskId = 1520589,
      attemptNumber = 2,
      executorId = "testExecutor",
      name = "task for test",
      index = 19,
      originalFiles,
      originalJars,
      originalProperties,
      taskBuffer
    )

    val serializedTaskDescription = TaskDescription.encode(originalTaskDescription)
    val decodedTaskDescription = TaskDescription.decode(serializedTaskDescription)

    // Make sure that all of the fields in the decoded task description match the original.
    assert(decodedTaskDescription.taskId === originalTaskDescription.taskId)
    assert(decodedTaskDescription.attemptNumber === originalTaskDescription.attemptNumber)
    assert(decodedTaskDescription.executorId === originalTaskDescription.executorId)
    assert(decodedTaskDescription.name === originalTaskDescription.name)
    assert(decodedTaskDescription.index === originalTaskDescription.index)
    assert(decodedTaskDescription.addedFiles.equals(originalFiles))
    assert(decodedTaskDescription.addedJars.equals(originalJars))
    assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties))
    assert(decodedTaskDescription.serializedTask.equals(taskBuffer))
  }
}

Source File: ResetSystemProperties.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.util

import java.util.Properties

import org.apache.commons.lang3.SerializationUtils
import org.scalatest.{BeforeAndAfterEach, Suite}


private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite =>
  var oldProperties: Properties = null

  override def beforeEach(): Unit = {
    // we need SerializationUtils.clone instead of `new Properties(System.getProperties())` because
    // the later way of creating a copy does not copy the properties but it initializes a new
    // Properties object with the given properties as defaults. They are not recognized at all
    // by standard Scala wrapper over Java Properties then.
    oldProperties = SerializationUtils.clone(System.getProperties)
    super.beforeEach()
  }

  override def afterEach(): Unit = {
    try {
      super.afterEach()
    } finally {
      System.setProperties(oldProperties)
      oldProperties = null
    }
  }
}

Source File: VelocityRenderer.scala From shellbase with Apache License 2.0

5 votes

package com.sumologic.shellbase

import java.io._
import java.util.Properties

import org.apache.commons.io.IOUtils
import org.apache.velocity.VelocityContext
import org.apache.velocity.app.{Velocity, VelocityEngine}

import scala.collection.JavaConversions._

object VelocityRenderer {

  val props = new Properties()
  props.setProperty("runtime.references.strict", "true")
  props.setProperty("velocimacro.arguments.strict", "true")
  props.setProperty("velocimacro.permissions.allow.inline.local.scope", "true")
  props.setProperty("directive.foreach.skip.invalid", "false")
  props.setProperty("runtime.log.logsystem.log4j.logger", "org.apache.velocity")
  props.setProperty("resource.loader", "class,file")
  props.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader")
  props.setProperty("class.resource.loader.class",
    "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader")
  Velocity.init(props)

  def render(map: Iterable[(String, AnyRef)], reader: Reader, writer: Writer): Unit = {
    val context = new VelocityContext()
    map.foreach { pair =>
      context.put(pair._1, pair._2)
    }
    Velocity.evaluate(context, writer, "ops util velocity renderer", reader)
  }

  def render(templateVars: Iterable[(String, AnyRef)], templatePath: String, outputPath: String): Unit = {
    val templateReader = new InputStreamReader(getClass.getClassLoader.getResourceAsStream(templatePath))
    val outputWriter = new OutputStreamWriter(new FileOutputStream(outputPath))

    try {
      render(templateVars, templateReader, outputWriter)
    } finally {
      IOUtils.closeQuietly(templateReader)
      IOUtils.closeQuietly(outputWriter)
    }
  }

  def createScriptFromTemplate(scriptResource: String,
                               variables: Map[AnyRef, AnyRef] = Map[AnyRef, AnyRef]()): File = {
    val engine = new VelocityEngine()
    engine.setProperty("resource.loader", "class")
    engine.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader")
    engine.setProperty("class.resource.loader.class",
      "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader")
    engine.init()

    val template = engine.getTemplate(scriptResource)

    val tempFile = File.createTempFile(".tmp", ".sh")

    val modifiableVariables = new java.util.HashMap[AnyRef, AnyRef]()
    modifiableVariables ++= variables

    val writer = new FileWriter(tempFile)
    try {
      template.merge(new VelocityContext(modifiableVariables), writer)
    } finally {
      IOUtils.closeQuietly(writer)
    }

    tempFile
  }

}

Source File: StreamingDemo.scala From flink-demos with Apache License 2.0

5 votes

package com.dataartisans.flink.example.eventpattern

import java.text.SimpleDateFormat
import java.util
import java.util.{Calendar, Properties, UUID}

import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer

import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction}
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.util.Collector

import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.Requests


class StateMachineMapper extends RichFlatMapFunction[Event, Alert] {
  
  private[this] var currentState: ValueState[State] = _
    
  override def open(config: Configuration): Unit = {
    currentState = getRuntimeContext.getState(
      new ValueStateDescriptor("state", classOf[State], InitialState))
  }
  
  override def flatMap(t: Event, out: Collector[Alert]): Unit = {
    val state = currentState.value()
    val nextState = state.transition(t.event)
    
    nextState match {
      case InvalidTransition =>
        out.collect(Alert(t.sourceAddress, state, t.event))
      case x if x.terminal =>
        currentState.clear()
      case x =>
        currentState.update(nextState)
    }
  }
}

Source File: JDBCRelation.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.jdbc

import java.util.Properties

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.Partition
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Row, SQLContext, SaveMode}


  def columnPartition(partitioning: JDBCPartitioningInfo): Array[Partition] = {
    if (partitioning == null) return Array[Partition](JDBCPartition(null, 0))

    val numPartitions = partitioning.numPartitions
    val column = partitioning.column
    if (numPartitions == 1) return Array[Partition](JDBCPartition(null, 0))
    // Overflow and silliness can happen if you subtract then divide.
    // Here we get a little roundoff, but that's (hopefully) OK.
    val stride: Long = (partitioning.upperBound / numPartitions
                      - partitioning.lowerBound / numPartitions)
    var i: Int = 0
    var currentValue: Long = partitioning.lowerBound
    var ans = new ArrayBuffer[Partition]()
    while (i < numPartitions) {
      val lowerBound = if (i != 0) s"$column >= $currentValue" else null
      currentValue += stride
      val upperBound = if (i != numPartitions - 1) s"$column < $currentValue" else null
      val whereClause =
        if (upperBound == null) {
          lowerBound
        } else if (lowerBound == null) {
          upperBound
        } else {
          s"$lowerBound AND $upperBound"
        }
      ans += JDBCPartition(whereClause, i)
      i = i + 1
    }
    ans.toArray
  }
}

private[sql] case class JDBCRelation(
    url: String,
    table: String,
    parts: Array[Partition],
    properties: Properties = new Properties())(@transient val sqlContext: SQLContext)
  extends BaseRelation
  with PrunedFilteredScan
  with InsertableRelation {

  override val needConversion: Boolean = false

  override val schema: StructType = JDBCRDD.resolveTable(url, table, properties)

  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
    // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row]
    JDBCRDD.scanTable(
      sqlContext.sparkContext,
      schema,
      url,
      properties,
      table,
      requiredColumns,
      filters,
      parts).asInstanceOf[RDD[Row]]
  }

  override def insert(data: DataFrame, overwrite: Boolean): Unit = {
    data.write
      .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append)
      .jdbc(url, table, properties)
  }
}

Source File: DefaultSource.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.jdbc

import java.util.Properties

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, DataSourceRegister}

class DefaultSource extends RelationProvider with DataSourceRegister {

  override def shortName(): String = "jdbc"

  
  override def createRelation(
      sqlContext: SQLContext,
      parameters: Map[String, String]): BaseRelation = {
    val url = parameters.getOrElse("url", sys.error("Option 'url' not specified"))
    val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified"))
    val partitionColumn = parameters.getOrElse("partitionColumn", null)
    val lowerBound = parameters.getOrElse("lowerBound", null)
    val upperBound = parameters.getOrElse("upperBound", null)
    val numPartitions = parameters.getOrElse("numPartitions", null)

    if (partitionColumn != null
      && (lowerBound == null || upperBound == null || numPartitions == null)) {
      sys.error("Partitioning incompletely specified")
    }

    val partitionInfo = if (partitionColumn == null) {
      null
    } else {
      JDBCPartitioningInfo(
        partitionColumn,
        lowerBound.toLong,
        upperBound.toLong,
        numPartitions.toInt)
    }
    val parts = JDBCRelation.columnPartition(partitionInfo)
    val properties = new Properties() // Additional properties that we will pass to getConnection
    parameters.foreach(kv => properties.setProperty(kv._1, kv._2))
    JDBCRelation(url, table, parts, properties)(sqlContext)
  }
}

Source File: SQLExecutionSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import java.util.Properties

import scala.collection.parallel.CompositeThrowable

import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.sql.SQLContext

class SQLExecutionSuite extends SparkFunSuite {

  test("concurrent query execution (SPARK-10548)") {
    // Try to reproduce the issue with the old SparkContext
    val conf = new SparkConf()
      .setMaster("local[*]")
      .setAppName("test")
    val badSparkContext = new BadSparkContext(conf)
    try {
      testConcurrentQueryExecution(badSparkContext)
      fail("unable to reproduce SPARK-10548")
    } catch {
      case e: IllegalArgumentException =>
        assert(e.getMessage.contains(SQLExecution.EXECUTION_ID_KEY))
    } finally {
      badSparkContext.stop()
    }

    // Verify that the issue is fixed with the latest SparkContext
    val goodSparkContext = new SparkContext(conf)
    try {
      testConcurrentQueryExecution(goodSparkContext)
    } finally {
      goodSparkContext.stop()
    }
  }

  
private class BadSparkContext(conf: SparkConf) extends SparkContext(conf) {
  protected[spark] override val localProperties = new InheritableThreadLocal[Properties] {
    override protected def childValue(parent: Properties): Properties = new Properties(parent)
    override protected def initialValue(): Properties = new Properties()
  }
}

Source File: PostgresIntegrationSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.jdbc

import java.sql.Connection
import java.util.Properties

import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.expressions.{Literal, If}
import org.apache.spark.tags.DockerTest

@DockerTest
class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
  override val db = new DatabaseOnDocker {
    override val imageName = "postgres:9.4.5"
    override val env = Map(
      "POSTGRES_PASSWORD" -> "rootpass"
    )
    override val jdbcPort = 5432
    override def getJdbcUrl(ip: String, port: Int): String =
      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
  }

  override def dataPreparation(conn: Connection): Unit = {
    conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
    conn.setCatalog("foo")
    conn.prepareStatement("CREATE TABLE bar (c0 text, c1 integer, c2 double precision, c3 bigint, "
      + "c4 bit(1), c5 bit(10), c6 bytea, c7 boolean, c8 inet, c9 cidr, "
      + "c10 integer[], c11 text[], c12 real[])").executeUpdate()
    conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', "
      + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', "
      + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}')""").executeUpdate()
  }

  test("Type mapping for various types") {
    val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties)
    val rows = df.collect()
    assert(rows.length == 1)
    val types = rows(0).toSeq.map(x => x.getClass)
    assert(types.length == 13)
    assert(classOf[String].isAssignableFrom(types(0)))
    assert(classOf[java.lang.Integer].isAssignableFrom(types(1)))
    assert(classOf[java.lang.Double].isAssignableFrom(types(2)))
    assert(classOf[java.lang.Long].isAssignableFrom(types(3)))
    assert(classOf[java.lang.Boolean].isAssignableFrom(types(4)))
    assert(classOf[Array[Byte]].isAssignableFrom(types(5)))
    assert(classOf[Array[Byte]].isAssignableFrom(types(6)))
    assert(classOf[java.lang.Boolean].isAssignableFrom(types(7)))
    assert(classOf[String].isAssignableFrom(types(8)))
    assert(classOf[String].isAssignableFrom(types(9)))
    assert(classOf[Seq[Int]].isAssignableFrom(types(10)))
    assert(classOf[Seq[String]].isAssignableFrom(types(11)))
    assert(classOf[Seq[Double]].isAssignableFrom(types(12)))
    assert(rows(0).getString(0).equals("hello"))
    assert(rows(0).getInt(1) == 42)
    assert(rows(0).getDouble(2) == 1.25)
    assert(rows(0).getLong(3) == 123456789012345L)
    assert(rows(0).getBoolean(4) == false)
    // BIT(10)'s come back as ASCII strings of ten ASCII 0's and 1's...
    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](5),
      Array[Byte](49, 48, 48, 48, 49, 48, 48, 49, 48, 49)))
    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6),
      Array[Byte](0xDE.toByte, 0xAD.toByte, 0xBE.toByte, 0xEF.toByte)))
    assert(rows(0).getBoolean(7) == true)
    assert(rows(0).getString(8) == "172.16.0.42")
    assert(rows(0).getString(9) == "192.168.0.0/16")
    assert(rows(0).getSeq(10) == Seq(1, 2))
    assert(rows(0).getSeq(11) == Seq("a", null, "b"))
    assert(rows(0).getSeq(12).toSeq == Seq(0.11f, 0.22f))
  }

  test("Basic write test") {
    val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties)
    // Test only that it doesn't crash.
    df.write.jdbc(jdbcUrl, "public.barcopy", new Properties)
    // Test write null values.
    df.select(df.queryExecution.analyzed.output.map { a =>
      Column(Literal.create(null, a.dataType)).as(a.name)
    }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties)
  }
}

Source File: GangliaSink.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.ganglia.GangliaReporter
import info.ganglia.gmetric4j.gmetric.GMetric
import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

class GangliaSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GANGLIA_KEY_PERIOD = "period"
  val GANGLIA_DEFAULT_PERIOD = 10

  val GANGLIA_KEY_UNIT = "unit"
  val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS

  val GANGLIA_KEY_MODE = "mode"
  val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST

  // TTL for multicast messages. If listeners are X hops away in network, must be at least X.
  val GANGLIA_KEY_TTL = "ttl"
  val GANGLIA_DEFAULT_TTL = 1

  val GANGLIA_KEY_HOST = "host"
  val GANGLIA_KEY_PORT = "port"

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) {
    throw new Exception("Ganglia sink requires 'host' property.")
  }

  if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) {
    throw new Exception("Ganglia sink requires 'port' property.")
  }

  val host = propertyToOption(GANGLIA_KEY_HOST).get
  val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt
  val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL)
  val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE)
    .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE)
  val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt)
    .getOrElse(GANGLIA_DEFAULT_PERIOD)
  val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT)
    .map(u => TimeUnit.valueOf(u.toUpperCase))
    .getOrElse(GANGLIA_DEFAULT_UNIT)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val ganglia = new GMetric(host, port, mode, ttl)
  val reporter: GangliaReporter = GangliaReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build(ganglia)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: MetricsConfig.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.metrics

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.matching.Regex

import org.apache.spark.util.Utils
import org.apache.spark.{Logging, SparkConf}

private[spark] class MetricsConfig(conf: SparkConf) extends Logging {

  private val DEFAULT_PREFIX = "*"
  private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
  private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties"

  private[metrics] val properties = new Properties()
  private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null

  private def setDefaultProperties(prop: Properties) {
    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
    prop.setProperty("*.sink.servlet.path", "/metrics/json")
    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
  }

  def initialize() {
    // Add default properties in case there's no properties file
    setDefaultProperties(properties)

    loadPropertiesFromFile(conf.getOption("spark.metrics.conf"))

    // Also look for the properties in provided Spark configuration
    val prefix = "spark.metrics.conf."
    conf.getAll.foreach {
      case (k, v) if k.startsWith(prefix) =>
        properties.setProperty(k.substring(prefix.length()), v)
      case _ =>
    }

    propertyCategories = subProperties(properties, INSTANCE_REGEX)
    if (propertyCategories.contains(DEFAULT_PREFIX)) {
      val defaultProperty = propertyCategories(DEFAULT_PREFIX).asScala
      for((inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX);
          (k, v) <- defaultProperty if (prop.get(k) == null)) {
        prop.put(k, v)
      }
    }
  }

  def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
    val subProperties = new mutable.HashMap[String, Properties]
    prop.asScala.foreach { kv =>
      if (regex.findPrefixOf(kv._1.toString).isDefined) {
        val regex(prefix, suffix) = kv._1.toString
        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2.toString)
      }
    }
    subProperties
  }

  def getInstance(inst: String): Properties = {
    propertyCategories.get(inst) match {
      case Some(s) => s
      case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties)
    }
  }

  
  private[this] def loadPropertiesFromFile(path: Option[String]): Unit = {
    var is: InputStream = null
    try {
      is = path match {
        case Some(f) => new FileInputStream(f)
        case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)
      }

      if (is != null) {
        properties.load(is)
      }
    } catch {
      case e: Exception =>
        val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME)
        logError(s"Error loading configuration file $file", e)
    } finally {
      if (is != null) {
        is.close()
      }
    }
  }

}

Source File: CsvSink.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.io.File
import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{CsvReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CSV_KEY_PERIOD = "period"
  val CSV_KEY_UNIT = "unit"
  val CSV_KEY_DIR = "directory"

  val CSV_DEFAULT_PERIOD = 10
  val CSV_DEFAULT_UNIT = "SECONDS"
  val CSV_DEFAULT_DIR = "/tmp/"

  val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CSV_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match {
    case Some(s) => s
    case None => CSV_DEFAULT_DIR
  }

  val reporter: CsvReporter = CsvReporter.forRegistry(registry)
      .formatFor(Locale.US)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build(new File(pollDir))

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: MetricsServlet.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import javax.servlet.http.HttpServletRequest

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.eclipse.jetty.servlet.ServletContextHandler

import org.apache.spark.{SparkConf, SecurityManager}
import org.apache.spark.ui.JettyUtils._

private[spark] class MetricsServlet(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {

  val SERVLET_KEY_PATH = "path"
  val SERVLET_KEY_SAMPLE = "sample"

  val SERVLET_DEFAULT_SAMPLE = false

  val servletPath = property.getProperty(SERVLET_KEY_PATH)

  val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean)
    .getOrElse(SERVLET_DEFAULT_SAMPLE)

  val mapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))

  def getHandlers(conf: SparkConf): Array[ServletContextHandler] = {
    Array[ServletContextHandler](
      createServletHandler(servletPath,
        new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr, conf)
    )
  }

  def getMetricsSnapshot(request: HttpServletRequest): String = {
    mapper.writeValueAsString(registry)
  }

  override def start() { }

  override def stop() { }

  override def report() { }
}

Source File: Slf4jSink.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{Slf4jReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class Slf4jSink(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {
  val SLF4J_DEFAULT_PERIOD = 10
  val SLF4J_DEFAULT_UNIT = "SECONDS"

  val SLF4J_KEY_PERIOD = "period"
  val SLF4J_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => SLF4J_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry)
    .convertDurationsTo(TimeUnit.MILLISECONDS)
    .convertRatesTo(TimeUnit.SECONDS)
    .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ConsoleSink.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{ConsoleReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CONSOLE_DEFAULT_PERIOD = 10
  val CONSOLE_DEFAULT_UNIT = "SECONDS"

  val CONSOLE_KEY_PERIOD = "period"
  val CONSOLE_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CONSOLE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: GraphiteSink.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.net.InetSocketAddress
import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.graphite.{GraphiteUDP, Graphite, GraphiteReporter}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GRAPHITE_DEFAULT_PERIOD = 10
  val GRAPHITE_DEFAULT_UNIT = "SECONDS"
  val GRAPHITE_DEFAULT_PREFIX = ""

  val GRAPHITE_KEY_HOST = "host"
  val GRAPHITE_KEY_PORT = "port"
  val GRAPHITE_KEY_PERIOD = "period"
  val GRAPHITE_KEY_UNIT = "unit"
  val GRAPHITE_KEY_PREFIX = "prefix"
  val GRAPHITE_KEY_PROTOCOL = "protocol"

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) {
    throw new Exception("Graphite sink requires 'host' property.")
  }

  if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) {
    throw new Exception("Graphite sink requires 'port' property.")
  }

  val host = propertyToOption(GRAPHITE_KEY_HOST).get
  val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt

  val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match {
    case Some(s) => s.toInt
    case None => GRAPHITE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT)
  }

  val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match {
    case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port))
    case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port))
    case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p")
  }

  val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .prefixedWith(prefix)
      .build(graphite)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ResetSystemProperties.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.util

import java.util.Properties

import org.apache.commons.lang3.SerializationUtils
import org.scalatest.{BeforeAndAfterEach, Suite}

import org.apache.spark.SparkFunSuite


private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite =>
  var oldProperties: Properties = null

  override def beforeEach(): Unit = {
    // we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because
    // the later way of creating a copy does not copy the properties but it initializes a new
    // Properties object with the given properties as defaults. They are not recognized at all
    // by standard Scala wrapper over Java Properties then.
    oldProperties = SerializationUtils.clone(System.getProperties)
    super.beforeEach()
  }

  override def afterEach(): Unit = {
    try {
      super.afterEach()
    } finally {
      System.setProperties(oldProperties)
      oldProperties = null
    }
  }
}

Source File: PropertiesConversions.scala From asura with MIT License

5 votes

package asura.common.config

import java.util.Properties

import com.typesafe.config.Config

object PropertiesConversions {

  def toProperties(configs: Config*): Properties = {
    val properties = new Properties()
    configs.foreach(config => {
      config.entrySet().forEach(entry => {
        properties.put(entry.getKey, config.getString(entry.getKey))
      })
    })
    properties
  }
}

Source File: PackageTest.scala From lighthouse with Apache License 2.0

5 votes

package be.dataminded.lighthouse.datalake

import java.util.Properties

import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers

class PackageTest extends AnyFunSpec with Matchers {

  describe("asProperties") {
    it("should convert a Scala to Java Properties implicitly") {
      val properties: Properties = Map("test1" -> "1", "test2" -> "2")

      properties.getProperty("test1") should equal("1")
      properties.getProperty("test2") should equal("2")
    }

    it("does only convert maps of type Map[String, String]") {
      assertDoesNotCompile("val properties: Properties = Map(\"test1\" -> 1, \"test2\" -> 2)")
    }
  }
}

Source File: BuildProperties.scala From cosmos with Apache License 2.0

5 votes

package com.mesosphere.cosmos

import java.util.Properties

private[cosmos] class BuildProperties private[cosmos](resourceName: String) {
  private val props = {
    val props = new Properties()
    Option(this.getClass.getResourceAsStream(resourceName)) match {
      case Some(is) =>
        props.load(is)
        is.close()
        props
      case _ =>
        throw new IllegalStateException(s"Unable to load classpath resources: $resourceName")
    }
  }
}

object BuildProperties {
  private[this] val loaded = new BuildProperties("/build.properties")

  def apply(): BuildProperties = loaded

  implicit class BuildPropertiesOps(val bp: BuildProperties) extends AnyVal {

    def cosmosVersion: String = Option(bp.props.getProperty("cosmos.version")).getOrElse("unknown-version")

  }

}

Source File: KafkaBatchProducer.scala From gimel with Apache License 2.0

5 votes

package com.paypal.gimel.kafka2.writer

import java.util.Properties

import scala.collection.JavaConverters._
import scala.collection.immutable.Map
import scala.language.implicitConversions
import scala.reflect.runtime.universe._

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame

import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants}
import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException}


  def produceToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): Unit = {
    def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName()
    logger.info(" @Begin --> " + MethodName)

    val kafkaProps: Properties = conf.kafkaProducerProps
    logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}")
    val kafkaTopic = conf.kafkaTopics
    val kafkaTopicsOptionsMap : Map[String, Map[String, String]] =
      KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf)
    logger.info("Kafka options loaded -> " + kafkaTopicsOptionsMap)
    val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap)
    logger.info("Begin Publishing to Kafka....")
    try {
      val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(kafkaTopic)
      kafkaTopicOptions match {
        case None =>
          throw new IllegalStateException(s"""Could not load options for the kafka topic -> $kafkaTopic""")
        case Some(kafkaOptions) =>
          dataFrame
            .write
            .format(KafkaConstants.KAFKA_FORMAT)
            .option(KafkaConstants.KAFKA_TOPIC, kafkaTopic)
            .options(kafkaOptions)
            .save()
      }
    }
    catch {
      case ex: Throwable => {
        ex.printStackTrace()
        val msg =
          s"""
             |kafkaTopic -> ${kafkaTopic}
             |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}}
          """.stripMargin
        throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}")
      }
    }
    logger.info("Publish to Kafka - Completed !")
  }
}

Source File: KafkaStreamProducer.scala From gimel with Apache License 2.0

5 votes

package com.paypal.gimel.kafka2.writer

import java.util.Properties

import scala.collection.JavaConverters._
import scala.collection.immutable.Map
import scala.language.implicitConversions

import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.streaming.DataStreamWriter

import com.paypal.gimel.common.conf.GimelConstants
import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants}
import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException}

object KafkaStreamProducer {
  val logger = com.paypal.gimel.logger.Logger()

  
  def produceStreamToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): DataStreamWriter[Row] = {
    def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName()
    logger.info(" @Begin --> " + MethodName)

    val kafkaProps: Properties = conf.kafkaProducerProps
    logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}")
    logger.info("Begin Publishing to Kafka....")
    // Retrieve kafka options from OptionsLoader if specified
    val kafkaTopicsOptionsMap : Map[String, Map[String, String]] = KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf)
    logger.info("kafkaTopicsOptionsMap -> " + kafkaTopicsOptionsMap)
    try {
      val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap)
      val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(conf.kafkaTopics)
      kafkaTopicOptions match {
        case None =>
          throw new IllegalStateException(s"""Could not load options for the kafka topic -> $conf.kafkaTopics""")
        case Some(kafkaOptions) =>
          dataFrame
            .writeStream
            .format(KafkaConstants.KAFKA_FORMAT)
            .option(KafkaConstants.KAFKA_TOPIC, conf.kafkaTopics)
            .option(GimelConstants.STREAMING_CHECKPOINT_LOCATION, conf.streamingCheckpointLocation)
            .outputMode(conf.streamingOutputMode)
            .options(kafkaOptions)
      }
    }
    catch {
      case ex: Throwable => {
        ex.printStackTrace()
        val msg =
          s"""
             |kafkaTopic -> ${conf.kafkaTopics}
             |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}}
          """.stripMargin
        throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}")
      }
    }
  }
}

Source File: GimelServicesProperties.scala From gimel with Apache License 2.0

5 votes

package com.paypal.gimel.common.gimelservices

import java.util.Properties

import scala.collection.JavaConverters._

import com.paypal.gimel.common.conf.{GimelConstants, PCatalogUrlConfigs}
import com.paypal.gimel.common.utilities.GenericUtils.{getConfigValue, withResources}
import com.paypal.gimel.logger.Logger

class GimelServicesProperties(userProps: Map[String, String] = Map[String, String]()) {

  // Get Logger
  val logger = Logger()
  // Get Properties
  val props: Map[String, String] = getProps
  // Rest Services Method, Host & Port
  val restMethod: String = getConfigValue(PCatalogUrlConfigs.API_PROTOCOL, userProps, props)
  val restHost: String = getConfigValue(PCatalogUrlConfigs.API_HOST, userProps, props)
  val restPort: String = getConfigValue(PCatalogUrlConfigs.API_PORT, userProps, props)
  // Rest APIs
  val apiClusterByName: String = userProps.getOrElse(PCatalogUrlConfigs.CLUSTER_BY_NAME, props(PCatalogUrlConfigs.CLUSTER_BY_NAME))
  val apiClusterById: String = userProps.getOrElse(PCatalogUrlConfigs.CLUSTER_BY_ID, props(PCatalogUrlConfigs.CLUSTER_BY_ID))
  val apiClusters: String = userProps.getOrElse(PCatalogUrlConfigs.API_CLUSTERS, props(PCatalogUrlConfigs.API_CLUSTERS))
  val apiObjectSchema: String = userProps.getOrElse(PCatalogUrlConfigs.API_OBJECT_SCHEMA, props(PCatalogUrlConfigs.API_OBJECT_SCHEMA))
  val apiDeactivateObjectSchema: String = userProps.getOrElse(PCatalogUrlConfigs.DEACTIVATE_OBJECT_SCHEMA, props(PCatalogUrlConfigs.DEACTIVATE_OBJECT_SCHEMA))
  val apiObjectSchemaByStorageSystemId: String = userProps.getOrElse(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_ID, props(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_ID))
  val apiPagedObjectSchemaByStorageSystemId: String = userProps.getOrElse(PCatalogUrlConfigs.PAGED_OBJECT_SCHEMA_BY_SYSTEM_ID, props(PCatalogUrlConfigs.PAGED_OBJECT_SCHEMA_BY_SYSTEM_ID))
  val apiUnregisteredObjectSchemaByStorageSystemId: String = userProps.getOrElse(PCatalogUrlConfigs.UNREGISTERED_OBJECT_SCHEMA_BY_SYSTEM_ID, props(PCatalogUrlConfigs.UNREGISTERED_OBJECT_SCHEMA_BY_SYSTEM_ID))
  val apiObjectSchemaByStorageSystemIdContainerObject: String = userProps.getOrElse(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_CONTAINER_OBJECT, props(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_CONTAINER_OBJECT))
  val apiDataSetRegister: String = userProps.getOrElse(PCatalogUrlConfigs.REGISTER_DATASET, props(PCatalogUrlConfigs.REGISTER_DATASET))
  val apiDataSetChangeLog: String = userProps.getOrElse(PCatalogUrlConfigs.CHANGE_LOG_DATASET, props(PCatalogUrlConfigs.CHANGE_LOG_DATASET))
  val apiDataSetDeploymentStatus: String = userProps.getOrElse(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_SUCESS, props(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_SUCESS))
  val apiDataSetFailureDeploymentStatus: String = userProps.getOrElse(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_FAILURE, props(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_FAILURE))
  val apiStorageSystems: String = userProps.getOrElse(PCatalogUrlConfigs.API_STORAGE_SYSTEMS, props(PCatalogUrlConfigs.API_STORAGE_SYSTEMS))
  val apiStorageSystemById: String = userProps.getOrElse(PCatalogUrlConfigs.SYSTEM_BY_ID, props(PCatalogUrlConfigs.SYSTEM_BY_ID))
  val apiStorageSystemAttributesByName: String = userProps.getOrElse(PCatalogUrlConfigs.SYSTEM_ATTRIBUTES_BY_NAME, props(PCatalogUrlConfigs.SYSTEM_ATTRIBUTES_BY_NAME))
  val apiStorageTypeById: String = userProps.getOrElse(PCatalogUrlConfigs.TYPE_BY_ID, props(PCatalogUrlConfigs.TYPE_BY_ID))
  val apiObjectSchemaContainers: String = userProps.getOrElse(PCatalogUrlConfigs.CONTAINERS_BY_OBJECT_SCHEMA, props(PCatalogUrlConfigs.CONTAINERS_BY_OBJECT_SCHEMA))
  val apiStorageTypeAttributeKeys: String = userProps.getOrElse(PCatalogUrlConfigs.ATTRIBUTE_KEYS_BY_TYPE_ID, props(PCatalogUrlConfigs.ATTRIBUTE_KEYS_BY_TYPE_ID))
  val apiUserByName: String = userProps.getOrElse(PCatalogUrlConfigs.USER_BY_NAME, props(PCatalogUrlConfigs.USER_BY_NAME))
  val apiDatasetPost: String = userProps.getOrElse(PCatalogUrlConfigs.REGISTER_DATASET, props(PCatalogUrlConfigs.REGISTER_DATASET))
  val apiStorageSystemContainers: String = userProps.getOrElse(PCatalogUrlConfigs.STORAGE_SYSTEM_CONTAINERS, props(PCatalogUrlConfigs.STORAGE_SYSTEM_CONTAINERS))
  val apiDatasetByName: String = userProps.getOrElse(PCatalogUrlConfigs.DATASET_BY_NAME, props(PCatalogUrlConfigs.DATASET_BY_NAME))
  val apiRangerPoliciesByLocation: String = userProps.getOrElse(PCatalogUrlConfigs.RANGER_POLICIES_BY_LOCATION, props(PCatalogUrlConfigs.RANGER_POLICIES_BY_LOCATION))
  val apiStorageTypes = userProps.getOrElse(PCatalogUrlConfigs.STORAGE_TYPES, props(PCatalogUrlConfigs.STORAGE_TYPES))
  val apiZoneByName = userProps.getOrElse(PCatalogUrlConfigs.ZONE_BY_NAME, props(PCatalogUrlConfigs.ZONE_BY_NAME))
  val apiPostStorageSystem = userProps.getOrElse(PCatalogUrlConfigs.POST_STORAGE_SYSTEM, props(PCatalogUrlConfigs.POST_STORAGE_SYSTEM))
  val appName = userProps.getOrElse(GimelConstants.APP_NAME, userProps.getOrElse(GimelConstants.SPARK_APP_NAME, "Unknown"))
  val appId = userProps.getOrElse(GimelConstants.APP_ID, userProps.getOrElse(GimelConstants.SPARK_APP_ID, "Unknown"))
  val appTag: String = userProps.getOrElse(GimelConstants.APP_TAG, "Unknown").toString

  // Rest URLs
  val baseUrl = s"$restMethod://$restHost:$restPort"
  val urlClusterByName = s"$baseUrl$apiClusterByName"
  val urlClusterById = s"$baseUrl$apiClusterById"
  val urlClusters = s"$baseUrl$apiClusters"
  val urlObjectSchema = s"$baseUrl$apiObjectSchema"
  val urlDeactivateObject = s"$baseUrl$apiDeactivateObjectSchema"
  val urlObjectSchemaByStorageSystemId = s"$baseUrl$apiObjectSchemaByStorageSystemId"
  val urlPagedObjectSchemaByStorageSystemId = s"$baseUrl$apiPagedObjectSchemaByStorageSystemId"
  val urlUnregisteredObjectSchemaByStorageSystemId = s"$baseUrl$apiUnregisteredObjectSchemaByStorageSystemId"
  val urlObjectSchemaBySystemContainerObject = s"$baseUrl$apiObjectSchemaByStorageSystemIdContainerObject"
  val urlDataSetRegister = s"$baseUrl$apiDataSetRegister"
  val urlDataSetChangeLog = s"$baseUrl$apiDataSetChangeLog"
  val urlDataSetDeploymentStatus = s"$baseUrl$apiDataSetDeploymentStatus"
  val urlDataSetFailureDeploymentStatus = s"$baseUrl$apiDataSetFailureDeploymentStatus"
  val urlStorageSystems = s"$baseUrl$apiStorageSystems"
  val urlStorageSystemById = s"$baseUrl$apiStorageSystemById"
  val urlStorageSystemAttributesByName = s"$baseUrl$apiStorageSystemAttributesByName"
  val urlStorageTypeById = s"$baseUrl$apiStorageTypeById"
  val urlObjectSchemaContainers = s"$baseUrl$apiObjectSchemaContainers"
  val urlStorageTypeAttributeKeys = s"$baseUrl$apiStorageTypeAttributeKeys"
  val urlUserByName = s"$baseUrl$apiUserByName"
  val urlDataSetPost = s"$baseUrl$apiDatasetPost"
  val urlStorageSystemContainers = s"$baseUrl$apiStorageSystemContainers"
  val urlRangerPoliciesByLocation = s"$baseUrl$apiRangerPoliciesByLocation"
  val urlDataSetByName = s"$baseUrl$apiDatasetByName"
  val urlStorageTypes = s"$baseUrl$apiStorageTypes"
  val urlByZoneName = s"$baseUrl$apiZoneByName"
  val urlStorageSystemPost = s"$baseUrl$apiPostStorageSystem"

  // Druid URLs
  val restDruidMethod: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_PROTOCOL, props(PCatalogUrlConfigs.REST_DRUID_PROTOCOL))
  val restDruidHost: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_HOST, props(PCatalogUrlConfigs.REST_DRUID_HOST))
  val restDruidPort: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_PORT, props(PCatalogUrlConfigs.REST_DRUID_PORT))
  val baseDruidUrl = s"$restDruidMethod://$restDruidHost:$restDruidPort"
  val apiDruidDataSource: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_DATASOURCES, props(PCatalogUrlConfigs.REST_DRUID_DATASOURCES))
  val urlDruidDataSource = s"$baseDruidUrl$apiDruidDataSource"
  val apiDruidFull: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_FULL, props(PCatalogUrlConfigs.REST_DRUID_FULL))

  
  def apply(params: Map[String, String]): GimelServicesProperties = new GimelServicesProperties(params)

}

Source File: KafkaAdminUtils.scala From gimel with Apache License 2.0

5 votes

package com.paypal.gimel.common.storageadmin

import java.util.Properties

import kafka.admin._
import kafka.server.ConfigType
import kafka.utils.ZkUtils
import org.I0Itec.zkclient.{ZkClient, ZkConnection}
import org.I0Itec.zkclient.exception.ZkMarshallingError
import org.I0Itec.zkclient.serialize.ZkSerializer
import org.apache.kafka.common.security.JaasUtils

import com.paypal.gimel.logger.Logger

object KafkaAdminUtils {

  val logger = Logger()

  val isSecurityEnabled = JaasUtils.isZkSecurityEnabled()
  val sessionTimeOutInMs: Int = 10 * 1000
  val connectionTimeOutInMs: Int = 10 * 1000
  val zkClient: (String) => ZkClient = new ZkClient(_: String, sessionTimeOutInMs, connectionTimeOutInMs, GimelZKStringSerializer)
  val zkConnection: (String) => ZkConnection = new ZkConnection(_: String, sessionTimeOutInMs)

  
  def isTopicExists(zookKeeperHostAndPort: String, kafkaTopicName: String): Boolean = {
    def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName()
    logger.info(" @Begin --> " + MethodName)

    val client = zkClient(zookKeeperHostAndPort)
    val connect = zkConnection(zookKeeperHostAndPort)
    val zkUtil: ZkUtils = new ZkUtils(client, connect, isSecurityEnabled)
    val result = AdminUtils.topicExists(zkUtil, kafkaTopicName)
    connect.close()
    result
  }

}


object GimelZKStringSerializer extends ZkSerializer {

  @throws(classOf[ZkMarshallingError])
  def serialize(data: Object): Array[Byte] = {
    data.asInstanceOf[String].getBytes("UTF-8")
  }

  @throws(classOf[ZkMarshallingError])
  def deserialize(bytes: Array[Byte]): Object = {
    if (bytes == null) {
      null
    }
    else {
      new String(bytes, "UTF-8")
    }
  }
}

Source File: GimelProperties.scala From gimel with Apache License 2.0

5 votes

package com.paypal.gimel.common.conf

import java.io.{File, FileInputStream}
import java.util.{Calendar, Properties}

import scala.collection.JavaConverters._
import scala.collection.mutable

import com.paypal.gimel.logger.Logger

class GimelProperties(userProps: Map[String, String] = Map[String, String]()) {
  // Get Logger
  val logger = Logger()
  logger.info(s"Initiating --> ${this.getClass.getName}")
  // Get Properties
  val props: mutable.Map[String, String] = getProps
  val runTagUUID: String = java.util.UUID.randomUUID.toString
  val startTimeMS: String = Calendar.getInstance().getTimeInMillis.toString
  val tagToAdd: String = s"_$startTimeMS"

  private def getConf(key: String): String = {
    userProps.getOrElse(key, props(key))
  }

  // Kafka Properties
  val kafkaBroker: String = getConf(GimelConstants.KAFKA_BROKER_LIST)
  val kafkaConsumerCheckPointRoot: String = getConf(GimelConstants.KAFKA_CONSUMER_CHECKPOINT_PATH)
  val kafkaAvroSchemaKey: String = getConf(GimelConstants.KAFKA_CDH_SCHEMA)
  val confluentSchemaURL: String = getConf(GimelConstants.CONFLUENT_SCHEMA_URL)
  val hbaseNameSpace: String = getConf(GimelConstants.HBASE_NAMESPACE)
  val zkHostAndPort: String = getConf(GimelConstants.ZOOKEEPER_LIST)
  val zkPrefix: String = getConf(GimelConstants.ZOOKEEPER_STATE)
  val esHost: String = getConf(GimelConstants.ES_NODE)
  val esPort: String = getConf(GimelConstants.ES_PORT)

  // Kerberos
  val keytab: String = getConf(GimelConstants.KEY_TAB)
  val principal: String = getConf(GimelConstants.KEY_TAB_PRINCIPAL)
  val cluster: String = getConf(GimelConstants.CLUSTER)
  val dataSetDeploymentClusters: String = getConf(GimelConstants.DEPLOYMENT_CLUSTERS)


  val defaultESCluster: String = props(GimelConstants.ES_POLLING_STORAGES)

  def hiveURL(cluster: String): String = {
    userProps.getOrElse(s"gimel.hive.$cluster.url", props(s"gimel.hive.$cluster.url"))
  }

  def esURL(escluster: String): String = {
    val alternateConfig = props(s"gimel.es.${defaultESCluster}.url")
    userProps.getOrElse(GimelConstants.ES_URL_WITH_PORT, alternateConfig)
  }

  
  def apply(params: Map[String, String]): GimelProperties = new GimelProperties(params)

}

Source File: MavenMutantRunner.scala From stryker4s with Apache License 2.0

5 votes

package stryker4s.maven.runner

import java.nio.file.Path
import java.util.Properties

import better.files._
import org.apache.maven.project.MavenProject
import org.apache.maven.shared.invoker.{DefaultInvocationRequest, InvocationRequest, Invoker}
import stryker4s.config.Config
import stryker4s.model.{Killed, MavenRunnerContext, Mutant, MutantRunResult, Survived}
import stryker4s.mutants.findmutants.SourceCollector
import stryker4s.report.Reporter
import stryker4s.run.MutantRunner

import scala.collection.JavaConverters._
import stryker4s.config.TestFilter

class MavenMutantRunner(project: MavenProject, invoker: Invoker, sourceCollector: SourceCollector, reporter: Reporter)(
    implicit config: Config
) extends MutantRunner(sourceCollector, reporter) {
  type Context = MavenRunnerContext

  def initializeTestContext(tmpDir: File): Context = {
    val goals = List("test")

    val properties = new Properties(project.getProperties)
    setTestProperties(properties)
    invoker.setWorkingDirectory(tmpDir.toJava)

    MavenRunnerContext(properties, goals, tmpDir)
  }

  override def runInitialTest(context: Context): Boolean = {
    val request = createRequest(context)

    val result = invoker.execute(request)

    result.getExitCode == 0
  }

  override def runMutant(mutant: Mutant, context: Context): Path => MutantRunResult = {
    val request = createRequestWithMutation(mutant, context)

    val result = invoker.execute(request)

    result.getExitCode match {
      case 0 => Survived(mutant, _)
      case _ => Killed(mutant, _)
    }
  }

  private def createRequest(context: Context): InvocationRequest =
    new DefaultInvocationRequest()
      .setGoals(context.goals.asJava)
      .setOutputHandler(debug(_))
      .setBatchMode(true)
      .setProperties(context.properties)

  private def createRequestWithMutation(mutant: Mutant, context: Context): InvocationRequest =
    createRequest(context)
      .addShellEnvironment("ACTIVE_MUTATION", String.valueOf(mutant.id))

  private def setTestProperties(properties: Properties): Unit = {
    // Stop after first failure. Only works with surefire plugin, not scalatest
    properties.setProperty(
      "surefire.skipAfterFailureCount",
      1.toString
    )

    // https://maven.apache.org/surefire/maven-surefire-plugin/examples/single-test.html
    val surefireFilter = "test"
    // https://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin
    val scalatestFilter = "wildcardSuites"

    if (config.testFilter.nonEmpty) {
      if (properties.getProperty(surefireFilter) != null) {
        val newTestProperty = properties.getProperty(surefireFilter) +: config.testFilter
        properties.setProperty(surefireFilter, newTestProperty.mkString(", "))

      } else if (properties.getProperty(scalatestFilter) != null) {
        val newTestProperty = properties.getProperty(scalatestFilter) +: config.testFilter
        properties.setProperty(scalatestFilter, newTestProperty.mkString(","))

      } else {
        properties.setProperty(surefireFilter, config.testFilter.mkString(", "))
        properties.setProperty(scalatestFilter, config.testFilter.mkString(","))
      }
    }

  }
}

Source File: EmailSender.scala From diffy with GNU Affero General Public License v3.0

5 votes

package ai.diffy.util

import com.twitter.logging.Logger
import com.twitter.util.{FuturePool, Future}

import javax.mail._
import javax.mail.internet.{InternetAddress, MimeMessage}
import java.util.Properties

case class SimpleMessage(
  from: String,
  to: String,
  bcc: String,
  subject: String,
  body: String)

class EmailSender(log: Logger, send: MimeMessage => Unit = Transport.send) {
  private[this] val props = new Properties
  props.put("mail.smtp.host", "localhost")
  props.put("mail.smtp.auth", "false")
  props.put("mail.smtp.port", "25")

  private[this] val session = Session.getDefaultInstance(props, null)

  def apply(msg: SimpleMessage): Future[Unit] =
    FuturePool.unboundedPool {
      val message = new MimeMessage(session)
      message.setFrom(new InternetAddress(msg.from))
      message.setRecipients(
        Message.RecipientType.TO,
        InternetAddress.parse(msg.to) map { _.asInstanceOf[Address]}
      )
      message.addRecipients(
        Message.RecipientType.BCC,
        InternetAddress.parse(msg.bcc) map { _.asInstanceOf[Address]}
      )
      message.setSubject(msg.subject)
      message.setContent(msg.body, "text/html; charset=utf-8")
      try {
        send(message)
      } catch { case e =>
        log.error("Failed to send email report. Ensure Diffy can access port 25.")
      }
    }
}

Source File: StanfordBaseWrapperPlugin.scala From recogito2 with Apache License 2.0

5 votes

package org.pelagios.recogito.plugins.ner.stanford

import java.util.{ArrayList, Properties}
import edu.stanford.nlp.ling.CoreAnnotations
import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP}
import edu.stanford.nlp.util.StringUtils
import org.pelagios.recogito.sdk.PluginEnvironment
import org.pelagios.recogito.sdk.ner._
import scala.collection.JavaConverters._
import org.slf4j.LoggerFactory

case class StanfordEntity(chars: String, entityTag: String, charOffset: Int)


abstract class StanfordBaseWrapperPlugin(
  lang: String,
  config: String,
  description: String
) extends NERPlugin {
  
  private val logger = LoggerFactory.getLogger(this.getClass)

  private lazy val pipeline = {
    logger.info("Initializing NER pipeline")
    val pipeline = new StanfordCoreNLP(props)
    logger.info("Pipeline initialized")
    pipeline
  }

  private def toEntityType(entityTag: String) = entityTag match {
    case "LOCATION" | "CITY" | "COUNTRY" | "STATE_OR_PROVINCE" | "NATIONALITY" => Some(EntityType.LOCATION)
    case "PERSON" => Some(EntityType.PERSON)
    case "DATE" => Some(EntityType.DATE)
    case _ => None
  }
  
  private lazy val props = {
    val props = StringUtils.argsToProperties(Seq("-props", config):_*)
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner")
    props
  }
  
  override val getName = "Stanford CoreNLP"
  
  override val getDescription = description
  
  override val getOrganization = "Stanford NLP Group"
  
  override val getVersion = "3.9.1"
  
  override val getSupportedLanguages = Seq(lang).asJava
  
  override def parse(text: String, env: PluginEnvironment) = {
    val document = new CoreDocument(text) 
    pipeline.annotate(document)    
    
    val entities = document.tokens().asScala.foldLeft(Seq.empty[StanfordEntity]) { (result, token) =>
      val entityTag = token.get(classOf[CoreAnnotations.NamedEntityTagAnnotation])
      val chars = token.get(classOf[CoreAnnotations.TextAnnotation])
      val charOffset = token.beginPosition
      
      result.headOption match {
        case Some(previousEntity) if previousEntity.entityTag == entityTag =>
          // Append to previous phrase if entity tag is the same
          StanfordEntity(previousEntity.chars + " " + chars, entityTag, previousEntity.charOffset) +: result.tail
  
        case _ =>
          // Either this is the first token (result.headOption == None), or a new phrase
          StanfordEntity(chars, entityTag, charOffset) +: result
      }
    }

    // StanfordCoreNLP.clearAnnotatorPool

    entities.withFilter(_.entityTag != "O")
      .flatMap(e => toEntityType(e.entityTag).map(etype => new Entity(e.chars, etype, e.charOffset))).asJava
  }
  
}

Source File: RocksDB.scala From unicorn with Apache License 2.0

5 votes

package unicorn.bigtable.rocksdb

import java.io.File
import java.util.Properties
import org.rocksdb.{ColumnFamilyDescriptor, Options}
import unicorn.bigtable.Database


  def create(path: String): RocksDB = {
    val dir = new java.io.File(path)
    require(!dir.exists, s"Directory $path exists")

    dir.mkdir
    new RocksDB(path)
  }

  def apply(path: String): RocksDB = {
    new RocksDB(path)
  }
}

Source File: Cassandra.scala From unicorn with Apache License 2.0

5 votes

package unicorn.bigtable.cassandra

import java.util.Properties
import java.net.{InetAddress, UnknownHostException}
import scala.collection.JavaConversions._
import org.apache.cassandra.locator.SimpleSnitch
import org.apache.cassandra.thrift.Cassandra.Client
import org.apache.cassandra.thrift.{ConsistencyLevel, KsDef, CfDef}
import org.apache.thrift.transport.TFramedTransport
import org.apache.thrift.transport.TSocket
import org.apache.thrift.protocol.TBinaryProtocol
import unicorn.bigtable._
import unicorn.util.Logging


  override def compactTable(name: String): Unit = {
    // fail silently
    log.warn("Cassandra client API doesn't support compaction")
  }
}

object Cassandra {
  def apply(host: String, port: Int): Cassandra = {
    // For ultra-wide row, we set the maxLength to 16MB.
    // Note that we also need to set the server side configuration
    // thrift_framed_transport_size_in_mb in cassandra.yaml
    // In case of ultra-wide row, it is better to use intra row scan.
    val transport = new TFramedTransport(new TSocket(host, port), 16 * 1024 * 1024)
    transport.open

    new Cassandra(transport)
  }
}

Source File: NatsOutputMonitor.scala From smart-meter with MIT License

5 votes

package com.logimethods.nats.connector.spark.monitor

import java.util.Properties
import org.nats._
import java.nio.ByteBuffer;

// @see https://github.com/tyagihas/scala_nats
object NatsOutputMonitor extends App {
  Thread.sleep(2000)

  val inputSubject = args(0)
  println("Will be listening to messages from " + inputSubject)
    
  val natsUrl = System.getenv("NATS_URI")
  
  if (inputSubject.toUpperCase().contains("STREAMING")) {
    val clusterID = System.getenv("NATS_CLUSTER_ID");
		System.out.println("NATS_CLUSTER_ID = " + clusterID);

    NatsStreamingOutputMonitor.main(inputSubject, natsUrl, clusterID)
  } else {
    val properties = new Properties()
    //@see https://github.com/tyagihas/java_nats/blob/master/src/main/java/org/nats/Connection.java
    properties.put("servers", natsUrl)
    val conn = Conn.connect(properties)
  
    if (args.length > 1) {// TEST mode
			// TODO Consider Integers & Floats
      val espectedValue = args(1).toInt
      println("Is especting a value equals to " + espectedValue)
      
      var iterations = 3
      conn.subscribe(inputSubject, (msg: Msg) => {
        println("Received message: " + msg.body)
        iterations -= 1
        if (iterations <= 0) {
          val receivedValue = msg.body.toInt
          if (receivedValue == espectedValue) { // "Tests passed!"
            System.exit(0)
          } else { // "Tests failed!"
            System.exit(1)
          }
        }
      })          
    } else { // REGULAR mode
      conn.subscribe(inputSubject, (msg: MsgB) => {
        import java.time._

        val f = ByteBuffer.wrap(msg.body)
        if (msg.subject.contains("max")) {         
          println(s"Received message: (${msg.subject}, ${LocalDateTime.ofEpochSecond(f.getLong(), 0, ZoneOffset.MIN)}, ${f.getFloat()})")
        } else if (msg.subject.contains("alert")) {
           println(s"Received message: (${msg.subject}, ${LocalDateTime.ofEpochSecond(f.getLong(), 0, ZoneOffset.MIN)}, ${f.getInt()})")
        } else {
           println(s"Received message: (${msg.subject}, ${f})")
        }      
      })    
    }
  }
}

Source File: NatsInjection.scala From smart-meter with MIT License

5 votes

package com.logimethods.smartmeter.inject

import akka.actor.{ActorRef, Props}
import io.gatling.core.Predef._
import io.gatling.core.action.builder.ActionBuilder

import com.logimethods.connector.gatling.to_nats._

import scala.concurrent.duration._
import java.util.Properties
import io.nats.client.Nats.PROP_URL

import com.logimethods.smartmeter.generate._

class NatsInjection extends Simulation {

//  println("System properties: " + System.getenv())

  try {
    val properties = new Properties()
    val natsUrl = System.getenv("NATS_URI")
    properties.setProperty(PROP_URL, natsUrl)

    val subject = System.getenv("GATLING_TO_NATS_SUBJECT")
    if (subject == null) {
      println("No Subject has been defined through the 'GATLING_TO_NATS_SUBJECT' Environment Variable!!!")
    } else {
      println("Will emit messages to " + subject)
      val natsProtocol = NatsProtocol(properties, subject)

      val usersPerSec = System.getenv("GATLING_USERS_PER_SEC").toDouble
      val duration = System.getenv("GATLING_DURATION").toInt
      val streamingDuration = System.getenv("STREAMING_DURATION").toInt
      val slot = System.getenv("TASK_SLOT").toInt
      val randomness = System.getenv("RANDOMNESS").toFloat
      val predictionLength = System.getenv("PREDICTION_LENGTH").toInt
      val timeRoot = System.getenv("TIME_ROOT").toInt
      TimeProvider.config = Some(timeRoot)

      val natsScn = scenario("smartmeter_"+slot).exec(
          NatsBuilder(new ConsumerInterpolatedVoltageProvider(slot, usersPerSec, streamingDuration,
                                                              randomness, predictionLength)))

      setUp(
        natsScn.inject(constantUsersPerSec(usersPerSec) during (duration minute))
      ).protocols(natsProtocol)
    }
  } catch {
    case e: Exception => {
      println(e.toString())
      e.printStackTrace()
    }
  }
}

Source File: NatsStreamingInjection.scala From smart-meter with MIT License

5 votes

package com.logimethods.smartmeter.inject

import akka.actor.{ActorRef, Props}
import io.gatling.core.Predef._
import io.gatling.core.action.builder.ActionBuilder

import com.logimethods.connector.gatling.to_nats._

import scala.concurrent.duration._
import java.util.Properties
import com.logimethods.smartmeter.generate._

class NatsStreamingInjection extends Simulation {

//  println("System properties: " + System.getenv())

try {
    val natsUrl = System.getenv("NATS_URI")
    val clusterID = System.getenv("NATS_CLUSTER_ID")

    var subject = System.getenv("GATLING_TO_NATS_SUBJECT")
    if (subject == null) {
      println("No Subject has been defined through the 'GATLING_TO_NATS_SUBJECT' Environment Variable!!!")
    } else {
      println("Will emit messages to " + subject)
      val natsProtocol = NatsStreamingProtocol(natsUrl, clusterID, subject)

      val usersPerSec = System.getenv("GATLING_USERS_PER_SEC").toDouble
      val duration = System.getenv("GATLING_DURATION").toInt
      val streamingDuration = System.getenv("STREAMING_DURATION").toInt
      val slot = System.getenv("TASK_SLOT").toInt
      val randomness = System.getenv("RANDOMNESS").toFloat
      val predictionLength = System.getenv("PREDICTION_LENGTH").toInt
      val timeRoot = System.getenv("TIME_ROOT").toInt
      TimeProvider.config = Some(timeRoot)

      val natsScn = scenario("smartmeter_"+slot).exec(
          NatsStreamingBuilder(new ConsumerInterpolatedVoltageProvider(slot, usersPerSec, streamingDuration,
                                                                       randomness, predictionLength)))
      setUp(
        natsScn.inject(constantUsersPerSec(usersPerSec) during (duration minute))
      ).protocols(natsProtocol)
    }
  } catch {
    case e: Exception => {
      println(e.toString())
      e.printStackTrace()
    }
  }
}

Source File: Credentials.scala From sbt-coursier with Apache License 2.0

5 votes

package coursier

import java.io.{File, FileInputStream}
import java.util.Properties

import lmcoursier.definitions.Authentication

// actually deprecated (all public ways of creating that are)
sealed abstract class Credentials extends Product with Serializable {
  def user: String
  def password: String

  def authentication: Authentication =
    Authentication(user, password)
}

object Credentials {

  private final case class Direct(user: String, password: String) extends Credentials {
    override def toString = s"Direct($user, ******)"
  }

  private final case class FromFile(file: File) extends Credentials {

    private lazy val props = {
      val p = new Properties()
      p.load(new FileInputStream(file))
      p
    }

    private def findKey(keys: Seq[String]) = keys
      .iterator
      .map(props.getProperty)
      .filter(_ != null)
      .toStream
      .headOption
      .getOrElse {
        throw new NoSuchElementException(s"${keys.head} key in $file")
      }

    lazy val user: String = findKey(FromFile.fileUserKeys)
    lazy val password: String = findKey(FromFile.filePasswordKeys)
  }

  private object FromFile {
    // from sbt.Credentials
    private val fileUserKeys = Seq("user", "user.name", "username")
    private val filePasswordKeys = Seq("password", "pwd", "pass", "passwd")
  }


  @deprecated("Use coursierExtraCredentials rather than coursierCredentials", "1.1.0-M14")
  def apply(user: String, password: String): Credentials =
    Direct(user, password)

  @deprecated("Use coursierExtraCredentials rather than coursierCredentials", "1.1.0-M14")
  def apply(file: File): Credentials =
    FromFile(file)

}

Source File: Application.scala From kafka-serde-scala with Apache License 2.0

5 votes

package io.github.azhur.kafkaserdescala.example
import java.util.Properties

import io.github.azhur.kafkaserdecirce.CirceSupport
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.streams.{ KafkaStreams, StreamsConfig, Topology }
import org.apache.kafka.streams.scala.StreamsBuilder

object Application extends App with CirceSupport {
  import io.circe.generic.auto._
  import org.apache.kafka.streams.scala.Serdes._
  import org.apache.kafka.streams.scala.ImplicitConversions._

  case class User(id: Long, name: String, age: Int)

  val topology = buildTopology("input_users", "output_users")

  val streamingApp = new KafkaStreams(topology, streamProperties())
  streamingApp.start()

  sys.addShutdownHook({
    streamingApp.close()
  })

  def buildTopology(inputTopic: String, outputTopic: String): Topology = {
    val streamsBuilder = new StreamsBuilder()
    streamsBuilder
      .stream[String, User](inputTopic)
      .filter((_, user) => user.age > 18)
      .to(outputTopic)

    streamsBuilder.build()
  }

  def streamProperties(): Properties = {
    val streamsConfiguration = new Properties
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-app")
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
                             Topology.AutoOffsetReset.EARLIEST.toString.toLowerCase)
    streamsConfiguration
  }
}

Source File: SQLConnectorParser.scala From fusion-data with Apache License 2.0

5 votes

package mass.connector.sql

import java.util.Properties

import helloscala.common.Configuration
import mass.connector.{ ConnectorParser, ConnectorSetting }
import mass.core.XmlUtils

import scala.xml.Node

class SQLConnectorParser extends ConnectorParser {
  import mass.core.XmlUtils.XmlRich

  override val `type` = "jdbc"

  def parseSettingFromXML(node: Node): ConnectorSetting = {
    val props = new Properties()

    val id = node.attr("name")
    props.put("poolName", id)
    (node \\ "props" \\ "prop").foreach { prop =>
      val key = (prop \\ "@key").text
      val value = getText(prop)
      props.put(key, value)
    }
    ConnectorSetting(Configuration.load(props))
  }

  override def parseFromXML(node: Node): SQLConnector = {
    val setting = parseSettingFromXML(node)
    SQLConnector(node.attr("name"), setting)
  }

  @inline private def getText(prop: Node): String =
    prop.getAttr("value").getOrElse(XmlUtils.text(prop \ "value"))
}

Source File: Producer.scala From fusion-data with Apache License 2.0

5 votes

package kafkasample.demo

import java.util.Properties
import java.util.concurrent.TimeUnit

import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord, RecordMetadata }

object Producer {
  def main(args: Array[String]): Unit = {
    val props = new Properties()
    props.put("bootstrap.servers", "localhost:9092")
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)
    try {
      run(producer)
    } finally {
      TimeUnit.SECONDS.sleep(5)
      producer.close()
    }
  }

  private def run[K, V](producer: KafkaProducer[String, String]) {
    val record =
      new ProducerRecord[String, String]("customerCountries", "羊八井222")
    producer.send(record, (metadata: RecordMetadata, e: Exception) => {
      if (e ne null) {
        e.printStackTrace()
      }
      println(s"metadata: $metadata")
    })
  }
}

Source File: Consumer.scala From fusion-data with Apache License 2.0

5 votes

package kafkasample.demo

import java.util.{ Collections, Properties }
import java.util.concurrent.TimeUnit

import org.apache.kafka.clients.consumer.KafkaConsumer

object Consumer {
  @volatile private var isStop = false

  def main(args: Array[String]): Unit = {
    val props = new Properties()
    props.put("bootstrap.servers", "localhost:9092")
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    props.put("group.id", "CountryCounter")
    val consumer = new KafkaConsumer[String, String](props)
    val thread = new Thread() {
      override def run(): Unit = Consumer.run(consumer)
    }
    try {
      thread.start()
    } finally {
      TimeUnit.SECONDS.sleep(50)
      isStop = true
      thread.join()
      consumer.close()
    }
  }

  private def run(consumer: KafkaConsumer[String, String]): Unit = {
    consumer.subscribe(Collections.singleton("customerCountries"))
    while (!isStop && !Thread.currentThread().isInterrupted) {
      val records = consumer.poll(java.time.Duration.ofMillis(100))
      records.forEach { record =>
        println(s"topic = ${record.topic()}, partition = ${record.partition()}, offset = ${record
          .offset()}, key: ${record.key()}, value = ${record.value()}")
      }
      consumer.commitAsync()
    }
  }
}

Source File: HazelCastConnection.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.hazelcast

import java.io.{File, FileNotFoundException}
import java.net.URI
import java.util.{Properties, UUID}

import com.datamountaineer.streamreactor.connect.hazelcast.config.{HazelCastConnectionConfig, HazelCastSocketConfig}
import com.hazelcast.cache.HazelcastCachingProvider
import com.hazelcast.client.HazelcastClient
import com.hazelcast.client.config.{ClientConfig, ClientNetworkConfig, SocketOptions}
import com.hazelcast.config.{GroupConfig, SSLConfig}
import com.hazelcast.core.HazelcastInstance
import javax.cache.{CacheManager, Caching}

import scala.collection.JavaConverters._


object HazelCastConnection {
  def buildClient(config: HazelCastConnectionConfig): HazelcastInstance = {
   val clientConfig = new ClientConfig
   val networkConfig = clientConfig.getNetworkConfig

   if (config.sslEnabled) {
     setSSLOptions(config)
     networkConfig.setSSLConfig(new SSLConfig().setEnabled(true))
   }
    networkConfig.setAddresses(config.members.toList.asJava)

    val groupConfig = new GroupConfig(config.group, config.pass)
    clientConfig.setGroupConfig(groupConfig)

    buildSocketOptions(networkConfig, config.socketConfig)
    clientConfig.setInstanceName(config.group + "-kafka-connect-" + UUID.randomUUID().toString)
    HazelcastClient.newHazelcastClient(clientConfig)
  }

  private def buildSocketOptions(clientNetworkConfig: ClientNetworkConfig, socketConfig: HazelCastSocketConfig): SocketOptions = {
   val socketOptions = clientNetworkConfig.getSocketOptions
   socketOptions.setKeepAlive(socketConfig.keepAlive)
   socketOptions.setTcpNoDelay(socketConfig.tcpNoDelay)
   socketOptions.setReuseAddress(socketConfig.reuseAddress)
   socketOptions.setLingerSeconds(socketConfig.lingerSeconds)
   socketOptions.setBufferSize(socketConfig.bufferSize)
   socketOptions
  }

  def getCacheManager(client: HazelcastInstance, name: String) : CacheManager = {
    val instanceName = client.getName()
    val cachingProvider = Caching.getCachingProvider()

    // Create Properties instance pointing to a named HazelcastInstance
    val properties = new Properties()
    properties.setProperty(HazelcastCachingProvider.HAZELCAST_INSTANCE_NAME, instanceName)
    val cacheManagerName = new URI(name )
    val cacheManager = cachingProvider.getCacheManager(cacheManagerName, null, properties )
    cacheManager
  }

  def setSSLOptions(config: HazelCastConnectionConfig) = {
    config.keyStoreLocation match {
      case Some(path) =>
        if (!new File(path).exists) {
          throw new FileNotFoundException(s"Keystore not found in: $path")
        }

        System.setProperty("javax.net.ssl.keyStorePassword", config.keyStorePassword.getOrElse(""))
        System.setProperty("javax.net.ssl.keyStore", path)
        System.setProperty("javax.net.ssl.keyStoreType", config.keyStoreType.getOrElse("jks"))

      case None =>
    }

    config.trustStoreLocation match {
      case Some(path) =>
        if (!new File(path).exists) {
          throw new FileNotFoundException(s"Truststore not found in: $path")
        }

        System.setProperty("javax.net.ssl.trustStorePassword", config.trustStorePassword.getOrElse(""))
        System.setProperty("javax.net.ssl.trustStore", path)
        System.setProperty("javax.net.ssl.trustStoreType", config.trustStoreType.getOrElse("jks"))

      case None =>
    }
  }
}

Source File: GitRepositoryState.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.ftp.source

import java.util.Properties

object GitRepositoryState {
  val props = {
    val p = new Properties()
    p.load(getClass.getClassLoader.getResourceAsStream("git.properties"))
    p
  }

  def describe: String = props.getProperty("git.commit.id.describe")
  def build: String = props.getProperty("git.build.time")
  def commitDate: String = props.getProperty("git.commit.time")
  def commitId: String = props.getProperty("git.commit.id")

  def summary:String = s"$describe ($commitId), committed at $commitDate, built at $build"
}

Source File: MySqlDemo.scala From Hands-On-Deep-Learning-with-Apache-Spark with MIT License

5 votes

package org.googlielmo.sparkdatabase

import java.sql.DriverManager
import java.util.Properties
import org.apache.spark.sql.SparkSession

object MySqlDemo {
  @throws[Exception]
    def main(args: Array[String]): Unit = {
      var jdbcUsername = "myslus"
      var jdbcPassword = "your_password"
      
      Class.forName("com.mysql.jdbc.Driver")
      
      val jdbcHostname = "your_db_hostname_or_ip"
      val jdbcPort = 3306
      val jdbcDatabase ="sparkdb"
      // Create the JDBC URL without passing in the user and password parameters.
      val jdbcUrl = s"jdbc:mysql://${jdbcHostname}:${jdbcPort}/${jdbcDatabase}"

      // Create a Properties() object to hold the parameters.
      val connectionProperties = new Properties()
      connectionProperties.put("user", s"${jdbcUsername}")
      connectionProperties.put("password", s"${jdbcPassword}")
      
      val connection = DriverManager.getConnection(jdbcUrl, jdbcUsername, jdbcPassword)
      connection.isClosed()

      val spark = SparkSession
        .builder()
        .master("local[*]")
        .appName("Spark MySQL basic example")
        .getOrCreate()
        
      import spark.implicits._
      
      val jdbcDF = spark.read
        .format("jdbc")
        .option("url", jdbcUrl)
        .option("dbtable", s"${jdbcDatabase}.sparkexample")
        .option("user", jdbcUsername)
        .option("password", jdbcPassword)
        .load()
       
      jdbcDF.printSchema()
      println("Record count = " + jdbcDF.count())
      
      val filteredJDBC = jdbcDF.select("MerchantCountryCode", "TransactionAmountUSD")
                          .groupBy("MerchantCountryCode")
                          .avg("TransactionAmountUSD")
      filteredJDBC.collect.foreach { println }
      
      spark.close()
  }
}

Source File: functions.scala From Hands-On-Deep-Learning-with-Apache-Spark with MIT License

5 votes

package com.databricks.spark.corenlp

import java.util.Properties

import scala.collection.JavaConverters._

import edu.stanford.nlp.ling.CoreAnnotations
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations
import edu.stanford.nlp.pipeline.{Annotation, CleanXmlAnnotator, StanfordCoreNLP, TokenizerAnnotator}
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations
import edu.stanford.nlp.simple.{Document, Sentence}
import edu.stanford.nlp.util.Quadruple

import org.apache.spark.sql.functions.udf

object functions {
  @transient private var sentimentPipeline: StanfordCoreNLP = _

  private def getOrCreateSentimentPipeline(): StanfordCoreNLP = {
    if (sentimentPipeline == null) {
      val props = new Properties()
      props.setProperty("annotators", "tokenize, ssplit, parse, sentiment")
      sentimentPipeline = new StanfordCoreNLP(props)
    }
    sentimentPipeline
  }

  private case class OpenIE(subject: String, relation: String, target: String, confidence: Double) {
    def this(quadruple: Quadruple[String, String, String, java.lang.Double]) =
      this(quadruple.first, quadruple.second, quadruple.third, quadruple.fourth)
  }

  private case class CorefMention(sentNum: Int, startIndex: Int, mention: String)

  private case class CorefChain(representative: String, mentions: Seq[CorefMention])

  private case class SemanticGraphEdge(
    source: String,
    sourceIndex: Int,
    relation: String,
    target: String,
    targetIndex: Int,
    weight: Double)

  
  def sentiment = udf { sentence: String =>
    val pipeline = getOrCreateSentimentPipeline()
    val annotation = pipeline.process(sentence)
    val tree = annotation.get(classOf[CoreAnnotations.SentencesAnnotation])
      .asScala
      .head
      .get(classOf[SentimentCoreAnnotations.SentimentAnnotatedTree])
    RNNCoreAnnotations.getPredictedClass(tree)
}
}

Source File: DBFunctions.scala From albedo with MIT License

5 votes

package ws.vinta.albedo.closures

import java.sql.DriverManager
import java.util.Properties

import scala.collection.mutable.ArrayBuffer

object DBFunctions {
  def selectUserStarredRepos(userId: Int, limit: Int, offset: Int): Array[Int] = {
    val dbUrl = "jdbc:mysql://127.0.0.1:3306/albedo?verifyServerCertificate=false&useSSL=false&rewriteBatchedStatements=true"
    val props = new Properties()
    props.setProperty("driver", "com.mysql.jdbc.Driver")
    props.setProperty("user", "root")
    props.setProperty("password", "123")

    val connection = DriverManager.getConnection(dbUrl, props)
    val statement = connection.createStatement()
    val resultSet = statement.executeQuery(s"""
    SELECT repo_id
    FROM app_repostarring
    WHERE user_id = $userId
    ORDER BY starred_at DESC
    LIMIT $limit
    OFFSET $offset;
    """.stripMargin(' '))

    val repoIds = ArrayBuffer.empty[Int]

    while (resultSet.next()) {
      val repoId = resultSet.getInt("repo_id")
      repoIds += repoId
    }

    connection.close()

    repoIds.toArray
  }
}

Source File: Producer.scala From awesome-recommendation-engine with Apache License 2.0

5 votes

package example.producer

import java.util.Properties

import example.utils.KafkaConfig
import kafka.producer.{KeyedMessage, ProducerConfig, Producer => KafkaProducer}

case class Producer[A](topic: String) {
  protected val config = new ProducerConfig(KafkaConfig())
  private lazy val producer = new KafkaProducer[A, A](config)

  def send(message: A) = sendMessage(producer, keyedMessage(topic, message))

  def sendStream(stream: Stream[A]) = {
    val iter = stream.iterator
    while(iter.hasNext) {
      send(iter.next())
    }
  }

  private def keyedMessage(topic: String, message: A): KeyedMessage[A, A] = new KeyedMessage[A, A](topic, message)
  private def sendMessage(producer: KafkaProducer[A, A], message: KeyedMessage[A, A]) = producer.send(message)
}


object Producer {
  def apply[T](topic: String, props: Properties) = new Producer[T](topic) {
    override val config = new ProducerConfig(props)
  }
}

Source File: KafkaConfig.scala From awesome-recommendation-engine with Apache License 2.0

5 votes

package example.utils

import java.util.Properties

import com.typesafe.config.ConfigFactory
import KafkaConfig._

trait KafkaConfig extends Properties {

  private val consumerPrefixWithDot = consumerPrefix + "."
  private val producerPrefixWithDot = producerPrefix + "."
  private val allKeys = Seq(groupId, zookeeperConnect, brokers, serializer, partitioner, requiredAcks)

  lazy val typesafeConfig = ConfigFactory.load()

  allKeys.map { key =>
    if (typesafeConfig.hasPath(key))
      put(key.replace(consumerPrefixWithDot, "").replace(producerPrefixWithDot, ""), typesafeConfig.getString(key))
  }

  def getCustomString(key: String) = typesafeConfig.getString(key)
  def getCustomInt(key: String) = typesafeConfig.getInt(key)
}

object KafkaConfig {

  val consumerPrefix = "consumer"
  val producerPrefix = "producer"

  //Consumer keys
  val groupId = s"$consumerPrefix.group.id"
  val zookeeperConnect = s"$consumerPrefix.zookeeper.connect"

  //example.producer.Producer keys
  val brokers = s"$producerPrefix.metadata.broker.list"
  val serializer = s"$producerPrefix.serializer.class"
  val partitioner = s"$producerPrefix.partitioner.class"
  val requiredAcks = s"$producerPrefix.request.required.acks"

  def apply() = new KafkaConfig {}
}

Source File: ConnectionUtils.scala From azure-sqldb-spark with MIT License

5 votes

package com.microsoft.azure.sqldb.spark.connect

import java.sql.{Connection, DriverManager, SQLException}
import java.util.Properties

import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig}

/**
  * Helper and utility methods used for setting up or using a connection
  */
private[spark] object ConnectionUtils {

  /**
    * Retrieves all connection properties in the Config object
    * and returns them as a [[Properties]] object.
    *
    * @param config the Config object with specified connection properties.
    * @return A connection [[Properties]] object.
    */
  def createConnectionProperties(config: Config): Properties = {
    val connectionProperties = new Properties()
    for (key <- config.getAllKeys) {
      connectionProperties.put(key.toString, config.get[String](key.toString).get)
    }
    connectionProperties
  }

  /**
    * Adds the "jdbc:sqlserver://" suffix to a general server url
    *
    * @param url the string url without the JDBC prefix
    * @return the url with the added JDBC prefix
    */
  def createJDBCUrl(url: String): String = SqlDBConfig.JDBCUrlPrefix + url

  /**
    * Gets a JDBC connection based on Config properties
    *
    * @param config any read or write Config
    * @return a JDBC Connection
    */
  def getConnection(config: Config): Connection = {
    Class.forName(SqlDBConfig.SQLjdbcDriver)
    DriverManager.getConnection(
      createJDBCUrl(config.get[String](SqlDBConfig.URL).get), createConnectionProperties(config))
  }

  /**
    * Retrieves the DBTable or QueryCustom specified in the config.
    * NOTE: only one property can exist within config.
    *
    * @param config the Config object with specified properties.
    * @return The specified DBTable or QueryCustom
    */
  def getTableOrQuery(config: Config): String = {
    config.get[String](SqlDBConfig.DBTable).getOrElse(
      getQueryCustom(config.get[String](SqlDBConfig.QueryCustom).get)
    )
  }

  /**
    * The JDBC driver requires parentheses and a temp variable around any custom queries.
    * This adds the required syntax so users only need to specify the query.
    *
    * @param query the default query
    * @return the syntactically correct query to be executed by the JDBC driver.
    */
  def getQueryCustom(query: String): String = s"($query) QueryCustom"

}

Source File: ConnectionUtilsSpec.scala From azure-sqldb-spark with MIT License

5 votes

package com.microsoft.azure.sqldb.spark.connect

import java.util.Properties

import com.microsoft.azure.sqldb.spark.SqlDBSpark
import com.microsoft.azure.sqldb.spark.config.Config

class ConnectionUtilsSpec extends SqlDBSpark {

  "createConnectionProperties" should "return all properties in configuration in a Properties object" in {
    val url = "mssql.database.windows.net"
    val database = "MyDatabase"
    val user = "admin"
    val password = "password"
    val dbTable = "dbo.Customers"

    val config = Config(Map(
      "url"          -> url,
      "databaseName" -> database,
      "user"         -> user,
      "password"     -> password,
      "dbTable"      -> dbTable
    ))

    val controlProperties = new Properties
    controlProperties.put("url", url.toLowerCase)
    controlProperties.put("databasename", database.toLowerCase)
    controlProperties.put("user", user.toLowerCase)
    controlProperties.put("password", password.toLowerCase)
    controlProperties.put("dbtable", dbTable.toLowerCase)

    val testProperties = ConnectionUtils.createConnectionProperties(config)
    Seq(testProperties.keySet()) should contain theSameElementsAs Seq(controlProperties.keySet())
  }

  "createJDBCUrl" should "return the server url with jdbc prefix" in {
    val url = "mssql.database.windows.net"
    ConnectionUtils.createJDBCUrl(url) should be ("jdbc:sqlserver://" + url)
  }

  "getQueryCustom" should "return original query in parenthesis" in {
    val query = "SELECT * FROM MYTABLE"
    ConnectionUtils.getQueryCustom(query) should be ("(" + query + ") QueryCustom")
  }

  "getTableOrQuery" should "return appropriate table or query from a config object" in {
    val dbTable = "dbo.Customers"
    val tableConfig = Config(Map(
      "url"          -> "mssql.database.windows.net",
      "databaseName" -> "MyDatabase",
      "user"         -> "admin",
      "password"     -> "password",
      "dbTable"      -> dbTable
    ))
    ConnectionUtils.getTableOrQuery(tableConfig) should be (dbTable)

    val queryCustom = "SELECT * FROM dbo.Customers"
    val queryConfig = Config(Map(
      "url"          -> "mssql.database.windows.net",
      "databaseName" -> "MyDatabase",
      "user"         -> "admin",
      "password"     -> "password",
      "QueryCustom"  -> queryCustom
    ))
    ConnectionUtils.getTableOrQuery(queryConfig) should be (ConnectionUtils.getQueryCustom(queryCustom))
  }
}

Source File: WriteSample.scala From azure-sqldb-spark with MIT License

5 votes

// Import libraries
import com.microsoft.azure.sqldb.spark.config.Config
import com.microsoft.azure.sqldb.spark.connect._

val url = "[Enter your url here]"
val databaseName = "[Enter your database name here]"
val dbTable = "[Enter your database table here]"

val user = "[Enter your username here]"
val password = "[Enter your password here]"

// Acquire data to be written. 
// df could be aquired in any way.
val localTable = "[Enter your local persisted table here]"
val df = spark.sql(s"SELECT * FROM $localTable")

// WRITE FROM CONFIG
val writeConfig = Config(Map(
  "url"            -> url,
  "databaseName"   -> databaseName,
  "dbTable"        -> dbTable,
  "user"           -> user, 
  "password"       -> password,
  "connectTimeout" -> "5",
  "queryTimeout"   -> "5"
))

df.write.mode(SaveMode.Append).sqlDB(writeConfig)

Source File: ReadSample.scala From azure-sqldb-spark with MIT License

5 votes

// Import libraries
import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata
import com.microsoft.azure.sqldb.spark.config.Config
import com.microsoft.azure.sqldb.spark.connect._

val url = "[Enter your url here]"
val databaseName = "[Enter your database name here]"
val dbTable = "[Enter your database table here]"

val user = "[Enter your username here]"
val password = "[Enter your password here]"

// READ FROM CONFIG
val readConfig = Config(Map(
  "url"            -> url,
  "databaseName"   -> databaseName,
  "user"           -> user,
  "password"       -> password,
  "connectTimeout" -> "5",
  "queryTimeout"   -> "5",
  "dbTable"        -> dbTable
))

val df = sqlContext.read.sqlDB(readConfig)
println("Total rows: " + df.count)
df.show()

// TRADITIONAL SYNTAX
import java.util.Properties

val properties = new Properties()
properties.put("databaseName", databaseName)
properties.put("user", user)
properties.put("password", password)
properties.put("connectTimeout", "5")
properties.put("queryTimeout", "5")

val df = sqlContext.read.sqlDB(url, dbTable, properties)
println("Total rows: " + df.count)
df.show()

Source File: SchemaRegistryService.scala From kafka-testing with Apache License 2.0

5 votes

package com.landoop.kafka.testing

import java.net.{Socket, SocketException}
import java.util.Properties

import com.typesafe.scalalogging.StrictLogging
import io.confluent.kafka.schemaregistry.avro.AvroCompatibilityLevel
import io.confluent.kafka.schemaregistry.client.rest.RestService
import io.confluent.kafka.schemaregistry.rest.{SchemaRegistryConfig, SchemaRegistryRestApplication}
import io.confluent.kafka.schemaregistry.storage.{SchemaRegistry, SchemaRegistryIdentity}
import org.eclipse.jetty.server.Server

class SchemaRegistryService(val port: Int,
                            val zookeeperConnection: String,
                            val kafkaTopic: String,
                            val avroCompatibilityLevel: AvroCompatibilityLevel,
                            val masterEligibility: Boolean) extends StrictLogging {

  private val app = new SchemaRegistryRestApplication({
    val prop = new Properties
    prop.setProperty("port", port.asInstanceOf[Integer].toString)
    prop.setProperty(SchemaRegistryConfig.KAFKASTORE_CONNECTION_URL_CONFIG, zookeeperConnection)
    prop.put(SchemaRegistryConfig.KAFKASTORE_TOPIC_CONFIG, kafkaTopic)
    prop.put(SchemaRegistryConfig.COMPATIBILITY_CONFIG, avroCompatibilityLevel.toString)
    prop.put(SchemaRegistryConfig.MASTER_ELIGIBILITY, masterEligibility.asInstanceOf[AnyRef])
    prop
  })

  val restServer = startServer(port)

  var Endpoint: String = getEndpoint(restServer)

  val restClient = new RestService(Endpoint)

  def startServer(port: Int, retries: Int = 5): Option[Server] = {
    var retry = retries > 0
    var restServer: Option[Server] = None
    if (retry) {
      if (isPortInUse(port)) {
        logger.info(s"Schema Registry Port $port is already in use")
        Thread.sleep(2000)
        startServer(port, retries - 1)
      } else {
        restServer = Some(app.createServer)
        restServer.get.start()
      }
    }
    restServer
  }

  def getEndpoint(restServer: Option[Server]): String = {
    if (restServer.isDefined) {
      val uri = restServer.get.getURI.toString
      if (uri.endsWith("/")) {
        uri.substring(0, uri.length - 1)
      } else {
        uri
      }
    } else ""
  }

  private def isPortInUse(port: Integer): Boolean = try {
    new Socket("127.0.0.1", port).close()
    true
  }
  catch {
    case e: SocketException => false
  }

  def close() {
    if (restServer.isDefined) {
      restServer.get.stop()
      restServer.get.join()
    }
  }

  def isMaster: Boolean = app.schemaRegistry.isMaster

  def setMaster(schemaRegistryIdentity: SchemaRegistryIdentity): Unit =
    app.schemaRegistry.setMaster(schemaRegistryIdentity)

  def myIdentity: SchemaRegistryIdentity = app.schemaRegistry.myIdentity

  def masterIdentity: SchemaRegistryIdentity = app.schemaRegistry.masterIdentity

  def schemaRegistry: SchemaRegistry = app.schemaRegistry
}

Source File: ConnectSamples.scala From kafka-testing with Apache License 2.0

5 votes

package com.landoop.kafka.testing

import java.util
import java.util.Properties

import org.apache.kafka.connect.runtime.distributed.DistributedConfig
import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig}

import scala.collection.JavaConverters._

object ConnectSamples {

  def workerConfig(bootstapServers: String, schemaRegistryUrl: String): util.Map[String, AnyRef] = Map(
    DistributedConfig.GROUP_ID_CONFIG -> "testing-group-id",
    WorkerConfig.BOOTSTRAP_SERVERS_CONFIG -> bootstapServers,
    WorkerConfig.KEY_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter",
    WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter",
    WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG -> "com.qubole.streamx.ByteArrayConverter",

    DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG -> "connect-offsets",
    DistributedConfig.CONFIG_TOPIC_CONFIG -> "connect-configs",
    DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG -> "connect-status",
    WorkerConfig.INTERNAL_KEY_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter",
    WorkerConfig.INTERNAL_VALUE_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter",
    "schema.registry.url" -> schemaRegistryUrl
  ).asInstanceOf[Map[String, AnyRef]].asJava

  val sourceConfig: util.Map[String, AnyRef] = Map(
    ConnectorConfig.NAME_CONFIG -> "file-source-connector",
    ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "org.apache.kafka.connect.file.FileStreamSourceConnector",
    ConnectorConfig.TASKS_MAX_CONFIG -> "1",
    "topic" -> "file-topic",
    "file" -> "/var/log/*"
  ).asInstanceOf[Map[String, AnyRef]].asJava

  def workerProperties(bootstapServers: String, schemaRegistryUrl: String): Properties = {
    val props = new Properties()
    props.putAll(workerConfig(bootstapServers, schemaRegistryUrl))
    props
  }

  val sourceProperties: Properties = {
    val props = new Properties()
    props.putAll(sourceConfig)
    props
  }

}

Source File: Config.scala From zipkin-mesos-framework with Apache License 2.0

5 votes

package net.elodina.mesos.zipkin

import java.io.{File, FileInputStream}
import java.net.URI
import java.util.Properties

import net.elodina.mesos.zipkin.utils.{BindAddress, Period}

object Config {
  val DEFAULT_FILE = new File("zipkin-mesos.properties")

  var debug: Boolean = false
  var genTraces: Boolean = false
  var storage: String = "file:zipkin-mesos.json"

  var master: Option[String] = None
  var principal: Option[String] = None
  var secret: Option[String] = None
  var user: Option[String] = None

  var frameworkName: String = "zipkin"
  var frameworkRole: String = "*"
  var frameworkTimeout: Period = new Period("30d")

  var log: Option[File] = None
  var api: Option[String] = None
  var bindAddress: Option[BindAddress] = None

  def apiPort: Int = {
    val port = new URI(getApi).getPort
    if (port == -1) 80 else port
  }

  def replaceApiPort(port: Int): Unit = {
    val prev: URI = new URI(getApi)
    api = Some("" + new URI(
      prev.getScheme, prev.getUserInfo,
      prev.getHost, port,
      prev.getPath, prev.getQuery, prev.getFragment
    ))
  }

  def getApi: String = {
    api.getOrElse(throw new Error("api not initialized"))
  }

  def getMaster: String = {
    master.getOrElse(throw new Error("master not initialized"))
  }

  def getZk: String = {
    master.getOrElse(throw new Error("zookeeper not initialized"))
  }

  private[zipkin] def loadFromFile(file: File): Unit = {
    val props: Properties = new Properties()
    val stream: FileInputStream = new FileInputStream(file)

    props.load(stream)
    stream.close()

    if (props.containsKey("debug")) debug = java.lang.Boolean.valueOf(props.getProperty("debug"))
    if (props.containsKey("genTraces")) genTraces = java.lang.Boolean.valueOf(props.getProperty("genTraces"))
    if (props.containsKey("storage")) storage = props.getProperty("storage")

    if (props.containsKey("master")) master = Some(props.getProperty("master"))
    if (props.containsKey("user")) user = Some(props.getProperty("user"))
    if (props.containsKey("principal")) principal = Some(props.getProperty("principal"))
    if (props.containsKey("secret")) secret = Some(props.getProperty("secret"))

    if (props.containsKey("framework-name")) frameworkName = props.getProperty("framework-name")
    if (props.containsKey("framework-role")) frameworkRole = props.getProperty("framework-role")
    if (props.containsKey("framework-timeout")) frameworkTimeout = new Period(props.getProperty("framework-timeout"))

    if (props.containsKey("log")) log = Some(new File(props.getProperty("log")))
    if (props.containsKey("api")) api = Some(props.getProperty("api"))
    if (props.containsKey("bind-address")) bindAddress = Some(new BindAddress(props.getProperty("bind-address")))
  }

  override def toString: String = {
    s"""
       |debug: $debug, storage: $storage
        |mesos: master=$master, user=${if (user.isEmpty || user.get.isEmpty) "<default>" else user}
        |principal=${principal.getOrElse("<none>")}, secret=${if (secret.isDefined) "*****" else "<none>"}
        |framework: name=$frameworkName, role=$frameworkRole, timeout=$frameworkTimeout
        |api: $api, bind-address: ${bindAddress.getOrElse("<all>")}, genTraces: $genTraces
    """.stripMargin.trim
  }
}

Source File: GenZipkinTraces.scala From zipkin-mesos-framework with Apache License 2.0

5 votes

package net.elodina.mesos.zipkin

import java.util.Properties

import com.github.kristofa.brave.TraceInfo
import org.junit.Test
import com.github.kristofa.brave.KafkaZipkinTracing._

class GenZipkinTraces {

  lazy val KAFKA_BROKER = Option(System.getenv("KAFKA_BROKER")).getOrElse("localhost:9092")
  lazy val KAFKA_TOPIC = Option(System.getenv("KAFKA_TOPIC")).getOrElse("zipkin")

  @Test
  def generate(): Unit = {
    //Setting producer type to sync in this test, as we don't bother about throughput here, just want all messages sent
    val kafkaProps = new Properties()
    kafkaProps.setProperty("producer.type", "sync")

    initTracing(KAFKA_BROKER, "serviceFoo", Some(KAFKA_TOPIC), kafkaProps = kafkaProps)

    var traceInfo: Option[TraceInfo] = None

    // Logging fire and forget request
    withClientTracer { ct =>
      Option(ct.startNewSpan("someRequest")).foreach { traceIds =>
        // The below line emulates adding trace info to RPC call
        traceInfo = Some(TraceInfo(Some(traceIds.getTraceId), Some(traceIds.getSpanId), sampled = true))
        ct.setClientSent()
        submitClientTracer()
      }
    }

    // Emulate RPC call
    try {
      Thread.sleep(1500)
    } catch {
      case e: InterruptedException => //ignore
    }

    // Creating new tracing for server side
    initTracing(KAFKA_BROKER, "serviceBar", Some(KAFKA_TOPIC), kafkaProps = kafkaProps)

    // Here actual servers should parse TraceInfo instances from an incoming request
    initServerFromTraceInfo(traceInfo)

    withServerTracer { st =>
      st.setServerReceived()
    }

    // Now receiving service is making request of his own
    withClientTracer { ct =>
      Option(ct.startNewSpan("otherRequest")).foreach { traceIds =>
        ct.setClientSent()
        submitClientTracer()
      }
    }

    withServerTracer { st =>
      st.submitAnnotation("Completed Processing")
    }

    submitServerTracer()
  }
}

Source File: MailAPI.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift.alerting

import java.util.Properties
import javax.mail.Message.RecipientType
import javax.mail.internet.{InternetAddress, MimeMessage, _}
import javax.mail.{Authenticator, PasswordAuthentication, Session, Transport}

import com.goibibo.sqlshift.models.Configurations.AppConfiguration
import com.goibibo.sqlshift.models.Params.MailParams
import org.slf4j.{Logger, LoggerFactory}

class MailAPI(mailParams: MailParams) {
    private val logger: Logger = LoggerFactory.getLogger(classOf[MailAPI])

    private val prop = new Properties() {
        put("mail.smtp.host", mailParams.host)
        put("mail.smtp.port", mailParams.port.toString)
    }

    private val session: Session = mailParams.password match {
        case Some(password) =>
            prop.setProperty("mail.smtp.auth", "true")
            Session.getDefaultInstance(prop, new Authenticator {
                override def getPasswordAuthentication: PasswordAuthentication = {
                    new PasswordAuthentication(mailParams.username, password)
                }
            })
        case None => Session.getDefaultInstance(prop)
    }


    
    def send(appConfs: List[AppConfiguration]): Unit = {
        val from = "[email protected]"
        logger.info("Mail from: {}", from)
        var subject = "SQLShift:"
        var text = "<html>" +
                "<body>" +
                "<table border='1' style='width:100%' bgcolor='#F5F5F5'>" +
                "<tr> <th size=6>Mysql schema</th>" +
                "<th size=6>Mysql table_name</th>" +
                "<th size=6>Redshift schema</th>" +
                "<th size=6>Status</th>" +
                "<th size=6>Migration Time(sec)</th>" +
                "<th size=6>Error</th></tr>"

        logger.info(s"Mail to: '${mailParams.to}' and cc: '${mailParams.cc}'")
        val tos: List[String] = mailParams.to.split(",").toList
        var ccs: List[String] = List()
        if (mailParams.cc != "")
            ccs = mailParams.cc.split(",").toList

        var errorCnt = 0
        var successCnt = 0
        for (appConf <- appConfs) {

            text += "<tr>" +
                    "<td bgcolor='#FFE4C4'>" + appConf.mysqlConf.db + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.mysqlConf.tableName + "</td>" +
                    "<td bgcolor='#F5F5DC'>" + appConf.redshiftConf.schema + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.status.get.isSuccessful + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.migrationTime.get + "</td>"

            if (appConf.status.get.isSuccessful) {
                successCnt += 1
            }
            else {
                text += "<td bgcolor='#F0FFFF'>%s\n%s</td></tr>"
                        .format(appConf.status.get.e.getMessage, appConf.status.get.e.getStackTrace.mkString("\n"))
                errorCnt += 1
            }
        }

        subject += " Failed " + errorCnt.toString + " Success " + successCnt.toString + mailParams.subject

        text += "</table></body></html>"
        logger.info("Subject: {}", subject)

        val message = new MimeMessage(session)
        message.setFrom(new InternetAddress(from))
        for (to <- tos)
            message.addRecipient(RecipientType.TO, new InternetAddress(to))
        for (cc <- ccs)
            message.addRecipient(RecipientType.CC, new InternetAddress(cc))
        message.setSubject(subject)
        message.setText(text)

        val mimeBdyPart = new MimeBodyPart()

        mimeBdyPart.setContent(text, "text/html; charset=utf-8")

        val multiPart = new MimeMultipart()

        logger.info("Sending message...")
        multiPart.addBodyPart(mimeBdyPart)
        message.setContent(multiPart)
        Transport.send(message)
    }
}

Source File: MailUtil.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift.alerting

import java.util.Properties

import com.goibibo.sqlshift.models.Params.MailParams

import scala.collection.JavaConverters._


object MailUtil {

    def getMailParams(prop: Properties): MailParams = {
        val scalaProp = prop.asScala
        var mailParams: MailParams = MailParams(host = scalaProp("alert.host"),
            username = null,
            to = scalaProp.getOrElse("alert.to", ""),
            cc = scalaProp.getOrElse("alert.cc", ""),
            subject = scalaProp.getOrElse("alert.subject", "")
        )

        mailParams = scalaProp.get("alert.port") match {
            case Some(port) => mailParams.copy(port = port.toInt)
            case None => mailParams
        }

        mailParams = scalaProp.get("alert.username") match {
            case Some(username) => mailParams.copy(username = username)
            case None => mailParams
        }

        mailParams = scalaProp.get("alert.password") match {
            case Some(pass) => mailParams.copy(password = Some(pass))
            case None => mailParams
        }
        mailParams
    }

}

Source File: MySQLUtil.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift

import java.net.URL
import java.sql.{Connection, DriverManager}
import java.util.Properties

import com.typesafe.config.Config
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source


object MySQLUtil {
    private val logger: Logger = LoggerFactory.getLogger(this.getClass)

    private def getMySQLConnection(config: Config): Connection = {
        val mysql = config.getConfig("mysql")
        val connectionProps = new Properties()
        connectionProps.put("user", mysql.getString("username"))
        connectionProps.put("password", mysql.getString("password"))
        val jdbcUrl = s"jdbc:mysql://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("db")}?createDatabaseIfNotExist=true&useSSL=false"
        Class.forName("com.mysql.jdbc.Driver")
        DriverManager.getConnection(jdbcUrl, connectionProps)
    }

    def createTableAndInsertRecords(config: Config, tableName: String, psvFile: URL): Unit = {
        logger.info("Inserting records in table: {}", tableName)
        val records = Source.fromFile(psvFile.toURI).getLines().toList.drop(1) // removing header

        val conn = getMySQLConnection(config)
        val statement = conn.createStatement()
        try {
            val tableCreateQuery = config.getString("table.tableCreateQuery").replace("${tableName}", tableName)
            logger.info("Running query: {}", tableCreateQuery)
            statement.executeUpdate(tableCreateQuery)
            val insertIntoQuery = config.getString("table.insertIntoQuery").replace("${tableName}", tableName)
            logger.info("Running query: {}", insertIntoQuery)
            records.foreach { record: String =>
                val columns = record.split("\\|")
                val query = insertIntoQuery.format(columns: _*)
                statement.executeUpdate(query)
            }
        } finally {
            statement.close()
            conn.close()
        }
    }
}

Source File: SparkNRedshiftUtil.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift

import java.sql.{Connection, DriverManager}
import java.util.Properties

import com.databricks.spark.redshift.RedshiftReaderM
import com.typesafe.config.Config
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.{Logger, LoggerFactory}


trait SparkNRedshiftUtil extends BeforeAndAfterAll {
    self: Suite =>
    private val logger: Logger = LoggerFactory.getLogger(this.getClass)
    @transient private var _sc: SparkContext = _
    @transient private var _sqlContext: SQLContext = _

    def sc: SparkContext = _sc
    def sqlContext: SQLContext = _sqlContext

    private def getRedshiftConnection(config: Config): Connection = {
        val mysql = config.getConfig("redshift")
        val connectionProps = new Properties()
        connectionProps.put("user", mysql.getString("username"))
        connectionProps.put("password", mysql.getString("password"))
        val jdbcUrl = s"jdbc:redshift://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("database")}?useSSL=false"
        Class.forName("com.amazon.redshift.jdbc4.Driver")
        DriverManager.getConnection(jdbcUrl, connectionProps)
    }

    val getSparkContext: (SparkContext, SQLContext) = {
        val sparkConf: SparkConf = new SparkConf().setAppName("Full Dump Testing").setMaster("local")
        val sc: SparkContext = new SparkContext(sparkConf)
        val sqlContext: SQLContext = new SQLContext(sc)

        System.setProperty("com.amazonaws.services.s3.enableV4", "true")
        sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.ap-south-1.amazonaws.com")
        sc.hadoopConfiguration.set("fs.s3a.fast.upload", "true")
        (sc, sqlContext)
    }

    def readTableFromRedshift(config: Config, tableName: String): DataFrame = {
        val redshift: Config = config.getConfig("redshift")
        val options = Map("dbtable" -> tableName,
            "user" -> redshift.getString("username"),
            "password" -> redshift.getString("password"),
            "url" -> s"jdbc:redshift://${redshift.getString("hostname")}:${redshift.getInt("portno")}/${redshift.getString("database")}",
            "tempdir" -> config.getString("s3.location"),
            "aws_iam_role" -> config.getString("redshift.iamRole")
        )
        RedshiftReaderM.getDataFrameForConfig(options, sc, sqlContext)
    }

    def dropTableRedshift(config: Config, tables: String*): Unit = {
        logger.info("Droping table: {}", tables)
        val conn = getRedshiftConnection(config)
        val statement = conn.createStatement()
        try {
            val dropTableQuery = s"""DROP TABLE ${tables.mkString(",")}"""
            logger.info("Running query: {}", dropTableQuery)
            statement.executeUpdate(dropTableQuery)
        } finally {
            statement.close()
            conn.close()
        }
    }

    override protected def beforeAll(): Unit = {
        super.beforeAll()
        val (sc, sqlContext) = getSparkContext
        _sc = sc
        _sqlContext = sqlContext
    }

    override protected def afterAll(): Unit = {
        super.afterAll()
        _sc.stop()
    }
}

Source File: SimpleProducer.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example.producer

import java.util.{Properties}

import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}

object SimpleProducer extends App{
  val topic = "sample_topic"
  private val props = new Properties()

  props.put("bootstrap.servers", "localhost:9092")
  props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
  props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

  val producer = new KafkaProducer[String,String](props)
  try {
    for(i <- 0 to 10) {
      producer.send(new ProducerRecord[String, String](topic, "title "+i.toString,"data from topic"))
      println(s"Sent: $i")
    }
    println("Message sent successfully")
    producer.close()
  }
  catch {
    case ex: Exception =>
      ex.printStackTrace()
  }
}

Source File: package.scala From kafka-scala-api with Apache License 2.0

5 votes

package com.example

import java.util.Properties

import org.apache.kafka.common.serialization.StringSerializer

package object writer {

  val topic = "first_topic"
  val numbersProducerConfig = Map(
    "bootstrap.servers" -> "127.0.0.1:9092",
    "key.serializer" -> classOf[StringSerializer].getName,
    "value.serializer" -> classOf[StringSerializer].getName
  )

  implicit def buildPropertiesFromMap(properties: Map[String, String]): Properties =
    (new Properties /: properties) {
      case (a, (k, v)) =>
        a.put(k,v)
        a
    }

  def setupLogging(): Unit = {
    import org.apache.log4j.{Level, Logger}
    val rootLogger = Logger.getRootLogger
    rootLogger.setLevel(Level.ERROR)
  }

}

Source File: KafkaMessageSender.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0

5 votes

package com.lightbend.scala.kafka


  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
}

Source File: PropertiesApi.scala From iep-apps with Apache License 2.0

5 votes

package com.netflix.iep.archaius

import java.io.StringWriter
import java.util.Properties

import akka.actor.ActorRefFactory
import akka.http.scaladsl.model.HttpCharsets
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.model.HttpEntity
import akka.http.scaladsl.model.HttpRequest
import akka.http.scaladsl.model.HttpResponse
import akka.http.scaladsl.model.MediaTypes
import akka.http.scaladsl.model.StatusCodes
import akka.http.scaladsl.server.Route
import com.netflix.atlas.akka.CustomDirectives._
import com.netflix.atlas.akka.WebApi
import com.netflix.atlas.json.Json
import com.netflix.frigga.Names

class PropertiesApi(
  val propContext: PropertiesContext,
  implicit val actorRefFactory: ActorRefFactory
) extends WebApi {

  def routes: Route = {
    endpointPath("api" / "v1" / "property") {
      get {
        parameter("asg") { asg =>
          extractRequest { request =>
            val cluster = Names.parseName(asg).getCluster
            if (propContext.initialized) {
              val props = propContext.getClusterProps(cluster)
              complete(encode(request, props))
            } else {
              complete(HttpResponse(StatusCodes.ServiceUnavailable))
            }
          }
        }
      }
    }
  }

  private def encode(request: HttpRequest, props: List[PropertiesApi.Property]): HttpResponse = {
    val useJson = request.headers.exists(h => h.is("accept") && h.value == "application/json")
    if (useJson) {
      HttpResponse(
        StatusCodes.OK,
        entity = HttpEntity(MediaTypes.`application/json`, Json.encode(props))
      )
    } else {
      val ps = new Properties
      props.foreach { p =>
        ps.setProperty(p.key, p.value)
      }
      val writer = new StringWriter()
      ps.store(writer, s"count: ${ps.size}")
      writer.close()
      val entity =
        HttpEntity(MediaTypes.`text/plain`.toContentType(HttpCharsets.`UTF-8`), writer.toString)
      HttpResponse(StatusCodes.OK, entity = entity)
    }
  }
}

object PropertiesApi {
  case class Property(id: String, cluster: String, key: String, value: String, timestamp: Long)
}

Source File: PropertiesApiSuite.scala From iep-apps with Apache License 2.0

5 votes

package com.netflix.iep.archaius

import java.io.StringReader
import java.util.Properties

import akka.http.scaladsl.model.HttpResponse
import akka.http.scaladsl.model.MediaTypes
import akka.http.scaladsl.model.StatusCode
import akka.http.scaladsl.model.StatusCodes
import akka.http.scaladsl.model.headers._
import akka.http.scaladsl.testkit.RouteTestTimeout
import akka.http.scaladsl.testkit.ScalatestRouteTest
import com.netflix.atlas.akka.RequestHandler
import com.netflix.atlas.json.Json
import com.netflix.spectator.api.DefaultRegistry
import com.netflix.spectator.api.ManualClock
import org.scalatest.funsuite.AnyFunSuite

class PropertiesApiSuite extends AnyFunSuite with ScalatestRouteTest {
  import scala.concurrent.duration._
  implicit val routeTestTimeout = RouteTestTimeout(5.second)

  val clock = new ManualClock()
  val registry = new DefaultRegistry(clock)
  val propContext = new PropertiesContext(registry)
  val endpoint = new PropertiesApi(propContext, system)
  val routes = RequestHandler.standardOptions(endpoint.routes)

  private def assertJsonContentType(response: HttpResponse): Unit = {
    assert(response.entity.contentType.mediaType === MediaTypes.`application/json`)
  }

  private def assertResponse(response: HttpResponse, expected: StatusCode): Unit = {
    assert(response.status === expected)
    assertJsonContentType(response)
  }

  test("no asg") {
    Get("/api/v1/property") ~> routes ~> check {
      assert(response.status === StatusCodes.BadRequest)
    }
  }

  test("empty") {
    propContext.update(Nil)
    Get("/api/v1/property?asg=foo-main-v001") ~>
    addHeader(Accept(MediaTypes.`application/json`)) ~>
    routes ~>
    check {
      assertResponse(response, StatusCodes.OK)
      assert(responseAs[String] === "[]")
    }
  }

  test("properties response") {
    propContext.update(
      List(
        PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L),
        PropertiesApi.Property("foo-main::1", "foo-main", "1", "2", 12345L),
        PropertiesApi.Property("bar-main::c", "bar-main", "c", "d", 12345L)
      )
    )
    Get("/api/v1/property?asg=foo-main-v001") ~> routes ~> check {
      assert(response.status === StatusCodes.OK)
      val props = new Properties
      props.load(new StringReader(responseAs[String]))
      assert(props.size === 2)
      assert(props.getProperty("a") === "b")
      assert(props.getProperty("1") === "2")
    }
  }

  test("json response") {
    propContext.update(
      List(
        PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L)
      )
    )
    Get("/api/v1/property?asg=foo-main-v001") ~>
    addHeader(Accept(MediaTypes.`application/json`)) ~>
    routes ~>
    check {
      assertResponse(response, StatusCodes.OK)
      val props = Json.decode[List[PropertiesApi.Property]](responseAs[String])
      assert(props === List(PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L)))
    }
  }
}

Source File: SchemaRegistryOps.scala From embedded-kafka-schema-registry with MIT License

5 votes

package net.manub.embeddedkafka.schemaregistry.ops

import java.net.{ServerSocket, URI}
import java.util.Properties

import io.confluent.kafka.schemaregistry.rest.{
  SchemaRegistryConfig,
  SchemaRegistryRestApplication
}
import io.confluent.rest.RestConfig
import net.manub.embeddedkafka.EmbeddedServer
import net.manub.embeddedkafka.ops.RunningServersOps
import net.manub.embeddedkafka.schemaregistry.{EmbeddedKafkaConfig, EmbeddedSR}

import scala.jdk.CollectionConverters._


  def stopSchemaRegistry(): Unit =
    runningServers.stopAndRemove(isEmbeddedSR)

  private[embeddedkafka] def isEmbeddedSR(server: EmbeddedServer): Boolean =
    server.isInstanceOf[EmbeddedSR]

  private[embeddedkafka] def schemaRegistryPort(
      restApp: SchemaRegistryRestApplication
  ): Int = {
    val listeners = restApp.getConfiguration.originalProperties
      .getProperty(RestConfig.LISTENERS_CONFIG)
    URI.create(listeners).getPort
  }

}

Source File: package.scala From aloha with MIT License

5 votes

package com.eharmony

import java.util.Properties

import org.apache.commons.io.IOUtils
import org.apache.commons.vfs2.VFS


package object aloha {
    def pkgName = getClass.getPackage.getName
    def version: String = _version

    private[this] lazy val _version: String = {
        val is = VFS.getManager.resolveFile("res:" + pkgName.replaceAll("\\.", "/") + "/version.properties").getContent.getInputStream
        try {
            val p = new Properties()
            p.load(is)
            p.getProperty("aloha.version")
        }
        finally {
            IOUtils closeQuietly is
        }
    }
}

Source File: SparkImplicits.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend.spark.datasources

import java.util.Properties

import akka.NotUsed
import akka.stream.Materializer
import akka.stream.scaladsl.{ Sink, Source }
import org.apache.spark.sql._
import org.apache.spark.sql.streaming.DataStreamReader

import scala.collection.immutable._
import scala.concurrent.duration.{ FiniteDuration, _ }
import scala.concurrent.{ Await, Future }
import scala.reflect.runtime.universe._
import slick.driver.PostgresDriver.api._

object SparkImplicits {
  implicit class DataSourceOps(dfr: DataFrameReader) {
    def helloworld(path: String): DataFrame = dfr.format("helloworld").load(path)
    def person(path: String): DataFrame = dfr.format("person").load(path)
    def jdbc(table: String)(implicit jdbcOptions: Map[String, String]): DataFrame =
      dfr.format("jdbc").options(jdbcOptions ++ Map("dbtable" -> table)).load()
  }

  implicit class DataStreamReaderOps(dsr: DataStreamReader) {
    def currentPersistenceIds(path: String = "jdbc-read-journal"): DataFrame = dsr.format("current-persistence-id").load(path)
    def eventsByPersistenceId(path: String = "jdbc-read-journal"): DataFrame = dsr.format("current-events-by-persistence-id").load(path)
  }

  implicit class DataFrameWriterOps[T](dfw: DataFrameWriter[T]) {
    
    def ignore = dfw.mode(SaveMode.Ignore)

    def jdbc(table: String)(implicit jdbcOptions: Map[String, String]) = {
      val properties = jdbcOptions.foldLeft(new Properties) { case (prop, (k, v)) => prop.put(k, v); prop }
      dfw.jdbc(jdbcOptions("url"), table, properties)
      // does not (yet) work see: https://issues.apache.org/jira/browse/SPARK-7646
      // dfw.format("jdbc").mode(SaveMode.Overwrite).options(jdbcOptions ++ Map("dbtable" -> table))
    }
  }

  trait DataFrameQueryGenerator[A] {
    def upsert: String
  }

  implicit class DatasetOps(df: DataFrame) {
    def withSession[A](db: Database)(f: Session => A): A = {
      val session = db.createSession()
      try f(session) finally session.close()
    }

    def withStatement[A](db: Database)(f: java.sql.Statement => A): A =
      withSession(db)(session ⇒ session.withStatement()(f))

    def upsert[A](table: String)(implicit db: Database, dfq: DataFrameQueryGenerator[A]): DataFrame = withStatement(db) { stmt =>
      stmt.executeUpdate(dfq.upsert)
      df
    }
  }

  implicit class SparkSessionOps(spark: SparkSession) {
    def fromFuture[A <: Product: TypeTag](data: Future[Seq[A]])(implicit _timeout: FiniteDuration = null): DataFrame =
      spark.createDataFrame(Await.result(data, Option(_timeout).getOrElse(15.minutes)))

    def fromSource[A <: Product: TypeTag](data: Source[A, NotUsed])(implicit _timeout: FiniteDuration = null, mat: Materializer): DataFrame =
      fromFuture(data.runWith(Sink.seq))
  }
}

Source File: MessageListener.scala From model-serving-tutorial with Apache License 2.0

5 votes

package com.lightbend.modelserving.client

import java.time.Duration
import java.util.Properties

import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
import org.apache.kafka.common.serialization.ByteArrayDeserializer


class MessageListener[K, V](
                             brokers: String,
                             topic: String,
                             group: String,
                             keyDeserealizer: String,
                             valueDeserealizer: String,
                             processor: RecordProcessorTrait[K, V]) extends Runnable {

  import MessageListener._

  import scala.collection.JavaConverters._

  val consumer = new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserealizer, valueDeserealizer))
  consumer.subscribe(Seq(topic).asJava)
  var completed = false

  def complete(): Unit = {
    completed = true
  }

  override def run(): Unit = {
    while (!completed) {
      val records = consumer.poll(Duration.ofMillis(100)).asScala
      for (record <- records) {
        processor.processRecord(record)
      }
    }
    consumer.close()
    System.out.println("Listener completes")
  }

  def start(): Unit = {
    val t = new Thread(this)
    t.start()
  }
}

Source File: KafkaMessageSender.scala From model-serving-tutorial with Apache License 2.0

5 votes

package com.lightbend.modelserving.client

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.ByteArraySerializer


class MessageSender(val brokers: String) {

  import MessageSender._
  val producer = new KafkaProducer[Array[Byte], Array[Byte]](
    providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName))

  def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get
    producer.flush()
  }

  def writeValue(topic: String, value: Array[Byte]): Unit = {
    val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get
    producer.flush()
  }

  def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = {
    val result = batch.map(value =>
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get)
    producer.flush()
    result
  }

  def close(): Unit = {
    producer.close()
  }
}

Source File: KafkaReadWrite.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.kafka.dsl

import java.util.Properties

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.streaming.dsl.scalaapi.StreamApp
import org.apache.gearpump.streaming.kafka.KafkaStoreFactory
import org.apache.gearpump.streaming.kafka.dsl.KafkaDSL
import org.apache.gearpump.streaming.kafka.dsl.KafkaDSL._
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.util.AkkaApp

object KafkaReadWrite extends AkkaApp with ArgumentsParser {

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<hom many kafka producer tasks>", required = false,
      defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<hom many kafka processor tasks>", required = false,
      defaultValue = Some(1)),
    "zookeeperConnect" -> CLIOption[String]("<zookeeper connect string>", required = false,
      defaultValue = Some("localhost:2181")),
    "brokerList" -> CLIOption[String]("<broker server list string>", required = false,
      defaultValue = Some("localhost:9092")),
    "sourceTopic" -> CLIOption[String]("<kafka source topic>", required = false,
      defaultValue = Some("topic1")),
    "sinkTopic" -> CLIOption[String]("<kafka sink topic>", required = false,
      defaultValue = Some("topic2")),
    "atLeastOnce" -> CLIOption[Boolean]("<turn on at least once source>", required = false,
      defaultValue = Some(true))
  )

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val sourceNum = config.getInt("source")
    val sinkNum = config.getInt("sink")
    val zookeeperConnect = config.getString("zookeeperConnect")
    val brokerList = config.getString("brokerList")
    val sourceTopic = config.getString("sourceTopic")
    val sinkTopic = config.getString("sinkTopic")
    val atLeastOnce = config.getBoolean("atLeastOnce")
    val props = new Properties
    val appName = "KafkaDSL"
    props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeperConnect)
    props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
    props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)

    val context = ClientContext(akkaConf)
    val app = StreamApp(appName, context)

    if (atLeastOnce) {
      val checkpointStoreFactory = new KafkaStoreFactory(props)
      KafkaDSL.createAtLeastOnceStream(app, sourceTopic, checkpointStoreFactory, props, sourceNum)
        .writeToKafka(sinkTopic, props, sinkNum)
    } else {
      KafkaDSL.createAtMostOnceStream(app, sourceTopic, props, sourceNum)
        .writeToKafka(sinkTopic, props, sinkNum)
    }

    context.submit(app)
    context.close()
  }
}

Source File: KafkaReadWrite.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.kafka

import java.util.Properties

import akka.actor.ActorSystem
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.ShufflePartitioner
import org.apache.gearpump.streaming.StreamApplication
import org.apache.gearpump.streaming.kafka._
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object KafkaReadWrite extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<hom many kafka producer tasks>", required = false,
      defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<hom many kafka processor tasks>", required = false,
      defaultValue = Some(1)),
    "zookeeperConnect" -> CLIOption[String]("<zookeeper connect string>", required = false,
      defaultValue = Some("localhost:2181")),
    "brokerList" -> CLIOption[String]("<broker server list string>", required = false,
      defaultValue = Some("localhost:9092")),
    "sourceTopic" -> CLIOption[String]("<kafka source topic>", required = false,
      defaultValue = Some("topic1")),
    "sinkTopic" -> CLIOption[String]("<kafka sink topic>", required = false,
      defaultValue = Some("topic2"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val appName = "KafkaReadWrite"
    val sourceNum = config.getInt("source")
    val sinkNum = config.getInt("sink")
    val zookeeperConnect = config.getString("zookeeperConnect")
    val brokerList = config.getString("brokerList")
    val sourceTopic = config.getString("sourceTopic")
    val sinkTopic = config.getString("sinkTopic")

    val appConfig = UserConfig.empty
    val props = new Properties
    props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeperConnect)
    props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
    props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)
    val source = new KafkaSource(sourceTopic, props)
    val checkpointStoreFactory = new KafkaStoreFactory(props)
    source.setCheckpointStore(checkpointStoreFactory)
    val sourceProcessor = DataSourceProcessor(source, sourceNum)
    val sink = new KafkaSink(sinkTopic, props)
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new ShufflePartitioner
    val computation = sourceProcessor ~ partitioner ~> sinkProcessor
    val app = StreamApplication(appName, Graph(computation), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
}

Source File: KafkaWordCount.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.kafka.wordcount

import java.util.Properties

import akka.actor.ActorSystem
import kafka.api.OffsetRequest
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.kafka._
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object KafkaWordCount extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<how many kafka source tasks>", required = false,
      defaultValue = Some(1)),
    "split" -> CLIOption[Int]("<how many split tasks>", required = false, defaultValue = Some(1)),
    "sum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<how many kafka sink tasks>", required = false,
      defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val appName = "KafkaWordCount"
    val sourceNum = config.getInt("source")
    val splitNum = config.getInt("split")
    val sumNum = config.getInt("sum")
    val sinkNum = config.getInt("sink")
    val appConfig = UserConfig.empty
    val props = new Properties
    props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181")
    props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
    props.put(KafkaConfig.CONSUMER_START_OFFSET_CONFIG,
      new java.lang.Long(OffsetRequest.LatestTime))
    props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)
    val sourceTopic = "topic1"
    val source = new KafkaSource(sourceTopic, props)
    val checkpointStoreFactory = new KafkaStoreFactory(props)
    source.setCheckpointStore(checkpointStoreFactory)
    val sourceProcessor = DataSourceProcessor(source, sourceNum)
    val split = Processor[Split](splitNum)
    val sum = Processor[Sum](sumNum)
    val sink = new KafkaSink("topic2", props)
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> split ~ partitioner ~>
      sum ~ partitioner ~> sinkProcessor
    val app = StreamApplication(appName, Graph(computation), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
}

Source File: NumericalDataProducer.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.integrationtest.kafka

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer
import org.apache.log4j.Logger

import org.apache.gearpump.streaming.serializer.ChillSerializer

class NumericalDataProducer(topic: String, bootstrapServers: String) {

  private val LOG = Logger.getLogger(getClass)
  private val producer = createProducer
  private val WRITE_SLEEP_NANOS = 10
  private val serializer = new ChillSerializer[Int]
  var lastWriteNum = 0

  def start(): Unit = {
    produceThread.start()
  }

  def stop(): Unit = {
    if (produceThread.isAlive) {
      produceThread.interrupt()
      produceThread.join()
    }
    producer.close()
  }

  
  def producedNumbers: Range = {
    Range(1, lastWriteNum + 1)
  }

  private def createProducer: KafkaProducer[Array[Byte], Array[Byte]] = {
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", bootstrapServers)
    new KafkaProducer[Array[Byte], Array[Byte]](properties,
      new ByteArraySerializer, new ByteArraySerializer)
  }

  private val produceThread = new Thread(new Runnable {
    override def run(): Unit = {
      try {
        while (!Thread.currentThread.isInterrupted) {
          lastWriteNum += 1
          val msg = serializer.serialize(lastWriteNum)
          val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, msg)
          producer.send(record)
          Thread.sleep(0, WRITE_SLEEP_NANOS)
        }
      } catch {
        case ex: InterruptedException =>
          LOG.error("message producing is stopped by an interrupt")
      }
    }
  })
}

Source File: KafkaDSL.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka.dsl

import java.util.Properties

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.streaming.dsl.scalaapi.{Stream, StreamApp}
import org.apache.gearpump.streaming.kafka.{KafkaSink, KafkaSource}
import org.apache.gearpump.streaming.transaction.api.CheckpointStoreFactory

object KafkaDSL {

  
  def writeToKafka(
      topic: String,
      properties: Properties,
      parallelism: Int = 1,
      userConfig: UserConfig = UserConfig.empty,
      description: String = "KafkaSink"): Stream[T] = {
    stream.sink(new KafkaSink(topic, properties), parallelism, userConfig, description)
  }
}

Source File: AbstractKafkaSink.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka.lib.sink

import java.util.Properties

import org.apache.gearpump.Message
import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.gearpump.streaming.sink.DataSink
import org.apache.gearpump.streaming.task.TaskContext
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer

object AbstractKafkaSink {
  private val LOG = LogUtil.getLogger(classOf[AbstractKafkaSink])

  val producerFactory = new KafkaProducerFactory {
    override def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] = {
      new KafkaProducer[Array[Byte], Array[Byte]](config.getProducerConfig,
        new ByteArraySerializer, new ByteArraySerializer)
    }
  }

  trait KafkaProducerFactory extends java.io.Serializable {
    def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]]
  }
}

abstract class AbstractKafkaSink private[kafka](
    topic: String,
    props: Properties,
    kafkaConfigFactory: KafkaConfigFactory,
    factory: KafkaProducerFactory) extends DataSink {
  import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink._

  def this(topic: String, props: Properties) = {
    this(topic, props, new KafkaConfigFactory, AbstractKafkaSink.producerFactory)
  }

  private lazy val config = kafkaConfigFactory.getKafkaConfig(props)
  // Lazily construct producer since KafkaProducer is not serializable
  private lazy val producer = factory.getKafkaProducer(config)

  override def open(context: TaskContext): Unit = {
    LOG.info("KafkaSink opened")
  }

  override def write(message: Message): Unit = {
    message.value match {
      case (k: Array[Byte], v: Array[Byte]) =>
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, k, v)
        producer.send(record)
        LOG.debug("KafkaSink sent record {} to Kafka", record)
      case v: Array[Byte] =>
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, v)
        producer.send(record)
        LOG.debug("KafkaSink sent record {} to Kafka", record)
      case m =>
        val errorMsg = s"unexpected message type ${m.getClass}; " +
          s"Array[Byte] or (Array[Byte], Array[Byte]) required"
        LOG.error(errorMsg)
    }
  }

  override def close(): Unit = {
    producer.close()
    LOG.info("KafkaSink closed")
  }
}

Source File: KafkaStore.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka.lib.store

import java.util.Properties

import com.twitter.bijection.Injection
import kafka.api.OffsetRequest
import org.apache.gearpump.Time.MilliSeconds
import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.gearpump.streaming.transaction.api.{CheckpointStore, CheckpointStoreFactory}
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.ByteArraySerializer


class KafkaStore private[kafka](
    val topic: String,
    val producer: KafkaProducer[Array[Byte], Array[Byte]],
    val optConsumer: Option[KafkaConsumer])
  extends CheckpointStore {
  import org.apache.gearpump.streaming.kafka.lib.store.KafkaStore._

  private var maxTime: MilliSeconds = 0L

  override def persist(time: MilliSeconds, checkpoint: Array[Byte]): Unit = {
    // make sure checkpointed timestamp is monotonically increasing
    // hence (1, 1), (3, 2), (2, 3) is checkpointed as (1, 1), (3, 2), (3, 3)
    if (time > maxTime) {
      maxTime = time
    }
    val key = maxTime
    val value = checkpoint
    val message = new ProducerRecord[Array[Byte], Array[Byte]](
      topic, 0, Injection[Long, Array[Byte]](key), value)
    producer.send(message)
    LOG.debug("KafkaStore persisted state ({}, {})", key, value)
  }

  override def recover(time: MilliSeconds): Option[Array[Byte]] = {
    var checkpoint: Option[Array[Byte]] = None
    optConsumer.foreach { consumer =>
      while (consumer.hasNext && checkpoint.isEmpty) {
        val kafkaMsg = consumer.next()
        checkpoint = for {
          k <- kafkaMsg.key
          t <- Injection.invert[MilliSeconds, Array[Byte]](k).toOption
          c = kafkaMsg.msg if t >= time
        } yield c
      }
      consumer.close()
    }
    checkpoint match {
      case Some(c) =>
        LOG.info(s"KafkaStore recovered checkpoint ($time, $c)")
      case None =>
        LOG.info(s"no checkpoint existing for $time")
    }
    checkpoint
  }

  override def close(): Unit = {
    producer.close()
    LOG.info("KafkaStore closed")
  }
}

Source File: KafkaSinkSpec.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka

import java.util.Properties

import com.twitter.bijection.Injection
import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.mockito.Mockito._
import org.scalacheck.Gen
import org.scalatest.mock.MockitoSugar
import org.scalatest.prop.PropertyChecks
import org.scalatest.{Matchers, PropSpec}

import org.apache.gearpump.Message
import org.apache.gearpump.streaming.MockUtil

class KafkaSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar {

  val dataGen = for {
    topic <- Gen.alphaStr
    key <- Gen.alphaStr
    msg <- Gen.alphaStr
  } yield (topic, Injection[String, Array[Byte]](key), Injection[String, Array[Byte]](msg))

  property("KafkaSink write should send producer record") {
    forAll(dataGen) {
      (data: (String, Array[Byte], Array[Byte])) =>
        val props = mock[Properties]
        val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]]
        val producerFactory = mock[KafkaProducerFactory]
        val configFactory = mock[KafkaConfigFactory]
        val config = mock[KafkaConfig]

        when(configFactory.getKafkaConfig(props)).thenReturn(config)
        when(producerFactory.getKafkaProducer(config)).thenReturn(producer)

        val (topic, key, msg) = data
        val kafkaSink = new KafkaSink(topic, props, configFactory, producerFactory)
        kafkaSink.write(Message((key, msg)))
        verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]](
          r => r.topic == topic && (r.key sameElements key) && (r.value sameElements msg)))
        kafkaSink.write(Message(msg))
        verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]](
          r => r.topic() == topic && (r.key == null) && (r.value() sameElements msg)
        ))
        kafkaSink.close()
    }
  }

  property("KafkaSink close should close kafka producer") {
    val props = mock[Properties]
    val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]]
    val producerFactory = mock[KafkaProducerFactory]
    val configFactory = mock[KafkaConfigFactory]
    val config = mock[KafkaConfig]

    when(configFactory.getKafkaConfig(props)).thenReturn(config)
    when(producerFactory.getKafkaProducer(config)).thenReturn(producer)

    val kafkaSink = new KafkaSink("topic", props, configFactory, producerFactory)
    kafkaSink.close()
    verify(producer).close()
  }
}

Source File: KafkaServerHarness.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka.util

import java.util.Properties

import kafka.admin.AdminUtils
import kafka.common.KafkaException
import kafka.server.{KafkaConfig => KafkaServerConfig, KafkaServer}
import kafka.utils.{TestUtils, Utils}

trait KafkaServerHarness extends ZookeeperHarness {
  val configs: List[KafkaServerConfig]
  private var servers: List[KafkaServer] = null
  private var brokerList: String = null

  def getServers: List[KafkaServer] = servers
  def getBrokerList: String = brokerList

  override def setUp() {
    super.setUp()
    if (configs.size <= 0) {
      throw new KafkaException("Must supply at least one server config.")
    }
    brokerList = TestUtils.getBrokerListStrFromConfigs(configs)
    servers = configs.map(TestUtils.createServer(_))
  }

  override def tearDown() {
    servers.foreach(_.shutdown())
    servers.foreach(_.config.logDirs.foreach(Utils.rm))
    super.tearDown()
  }

  def createTopicUntilLeaderIsElected(topic: String, partitions: Int,
      replicas: Int, timeout: Long = 10000): Map[Int, Option[Int]] = {
    val zkClient = getZkClient
    try {
      // Creates topic
      AdminUtils.createTopic(zkClient, topic, partitions, replicas, new Properties)
      // Waits until the update metadata request for new topic reaches all servers
      (0 until partitions).map { case i =>
        TestUtils.waitUntilMetadataIsPropagated(servers, topic, i, timeout)
        i -> TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, i, timeout)
      }.toMap
    } catch {
      case e: Exception => throw e
    }
  }
}

Source File: HadoopRunner.scala From ETL-Starter-Kit with MIT License

5 votes

package com.etl.utils

import java.util.Properties

import com.twitter.scalding.{RichXHandler, Tool}
import org.apache.hadoop


class HadoopRunner(job: String, prop: Properties) {

  def run() {
    val args = prop.get("main.args").asInstanceOf[String]
    require(args != null, "unable to get main.args")
    try {
      hadoop.util.ToolRunner.run(new hadoop.conf.Configuration, new Tool, args.split(" "))
    } catch {
      case t: Throwable => {
        //create the exception URL link in GitHub wiki
        val gitHubLink = RichXHandler.createXUrl(t)
        val extraInfo = (if (RichXHandler().handlers.exists(h => h(t))) {
          RichXHandler.mapping(t.getClass) + "\n"
        }
        else {
          ""
        }) +
          "If you know what exactly caused this error, please consider contributing to GitHub via following link.\n" + gitHubLink

        //re-throw the exception with extra info
        throw new Throwable(extraInfo, t)
      }
    }
  }
}

Source File: LogAnalyticsSinkConfiguration.scala From spark-monitoring with MIT License

5 votes

package org.apache.spark.metrics.sink.loganalytics

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.microsoft.pnp.LogAnalyticsEnvironment
import org.apache.spark.LogAnalyticsConfiguration

private[spark] object LogAnalyticsSinkConfiguration {
  private[spark] val LOGANALYTICS_KEY_WORKSPACEID = "workspaceId"
  private[spark] val LOGANALYTICS_KEY_SECRET = "secret"
  private[spark] val LOGANALYTICS_KEY_LOGTYPE = "logType"
  private[spark] val LOGANALYTICS_KEY_TIMESTAMPFIELD = "timestampField"
  private[spark] val LOGANALYTICS_KEY_PERIOD = "period"
  private[spark] val LOGANALYTICS_KEY_UNIT = "unit"

  private[spark] val LOGANALYTICS_DEFAULT_LOGTYPE = "SparkMetrics"
  private[spark] val LOGANALYTICS_DEFAULT_PERIOD = "10"
  private[spark] val LOGANALYTICS_DEFAULT_UNIT = "SECONDS"
}

private[spark] class LogAnalyticsSinkConfiguration(properties: Properties)
  extends LogAnalyticsConfiguration {

  import LogAnalyticsSinkConfiguration._

  override def getWorkspaceId: Option[String] = {
    Option(properties.getProperty(LOGANALYTICS_KEY_WORKSPACEID, LogAnalyticsEnvironment.getWorkspaceId))
  }

  override def getSecret: Option[String] = {
    Option(properties.getProperty(LOGANALYTICS_KEY_SECRET, LogAnalyticsEnvironment.getWorkspaceKey))
  }

  override protected def getLogType: String =
    properties.getProperty(LOGANALYTICS_KEY_LOGTYPE, LOGANALYTICS_DEFAULT_LOGTYPE)

  override protected def getTimestampFieldName: Option[String] =
    Option(properties.getProperty(LOGANALYTICS_KEY_TIMESTAMPFIELD, null))

  val pollPeriod: Int = {
    val value = properties.getProperty(LOGANALYTICS_KEY_PERIOD, LOGANALYTICS_DEFAULT_PERIOD).toInt
    logInfo(s"Setting polling period to $value")
    value
  }

  val pollUnit: TimeUnit = {
    val value = TimeUnit.valueOf(
      properties.getProperty(LOGANALYTICS_KEY_UNIT, LOGANALYTICS_DEFAULT_UNIT).toUpperCase)
    logInfo(s"Setting polling unit to $value")
    value
  }
}

Source File: LogAnalyticsMetricsSink.scala From spark-monitoring with MIT License

5 votes

package org.apache.spark.metrics.sink.loganalytics

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import org.apache.spark.internal.Logging
import org.apache.spark.metrics.sink.Sink
import org.apache.spark.{SecurityManager, SparkException}

private class LogAnalyticsMetricsSink(
                                val property: Properties,
                                val registry: MetricRegistry,
                                securityMgr: SecurityManager)
  extends Sink with Logging {

  private val config = new LogAnalyticsSinkConfiguration(property)

  org.apache.spark.metrics.MetricsSystem.checkMinimalPollingPeriod(config.pollUnit, config.pollPeriod)

  var reporter = LogAnalyticsReporter.forRegistry(registry)
    .withWorkspaceId(config.workspaceId)
    .withWorkspaceKey(config.secret)
    .withLogType(config.logType)
    .build()

  override def start(): Unit = {
    reporter.start(config.pollPeriod, config.pollUnit)
    logInfo(s"LogAnalyticsMetricsSink started")
  }

  override def stop(): Unit = {
    reporter.stop()
    logInfo("LogAnalyticsMetricsSink stopped.")
  }

  override def report(): Unit = {
    reporter.report()
  }
}

Source File: MiniClusterUtils.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache.livy.test.framework

import java.io._
import java.nio.charset.StandardCharsets.UTF_8
import java.util.Properties

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration

trait MiniClusterUtils {

  protected def saveProperties(props: Map[String, String], dest: File): Unit = {
    val jprops = new Properties()
    props.foreach { case (k, v) => jprops.put(k, v) }

    val tempFile = new File(dest.getAbsolutePath() + ".tmp")
    val out = new OutputStreamWriter(new FileOutputStream(tempFile), UTF_8)
    try {
      jprops.store(out, "Configuration")
    } finally {
      out.close()
    }
    tempFile.renameTo(dest)
  }

  protected def loadProperties(file: File): Map[String, String] = {
    val in = new InputStreamReader(new FileInputStream(file), UTF_8)
    val props = new Properties()
    try {
      props.load(in)
    } finally {
      in.close()
    }
    props.asScala.toMap
  }

  protected def saveConfig(conf: Configuration, dest: File): Unit = {
    val redacted = new Configuration(conf)
    // This setting references a test class that is not available when using a real Spark
    // installation, so remove it from client configs.
    redacted.unset("net.topology.node.switch.mapping.impl")

    val out = new FileOutputStream(dest)
    try {
      redacted.writeXml(out)
    } finally {
      out.close()
    }
  }
}

Source File: package.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache

import java.util.Properties

import scala.util.control.NonFatal

package object livy {

  private object LivyBuildInfo {
    val (
        livyVersion: String,
        livyBuildUser: String,
        livyRevision: String,
        livyBranch: String,
        livyBuildDate: String,
        livyRepo: String
      ) = {
      val unknown = "<unknown>"
      val defaultValue = (unknown, unknown, unknown, unknown, unknown, unknown)
      val resource = Option(Thread.currentThread().getContextClassLoader
        .getResourceAsStream("livy-version-info.properties"))

      try {
        resource.map { r =>
          val properties = new Properties()
          properties.load(r)
          (
            properties.getProperty("version", unknown),
            properties.getProperty("user", unknown),
            properties.getProperty("revision", unknown),
            properties.getProperty("branch", unknown),
            properties.getProperty("date", unknown),
            properties.getProperty("url", unknown)
          )
        }.getOrElse(defaultValue)
      } catch {
        case NonFatal(e) =>
          // swallow the exception
          defaultValue
      } finally {
        try {
          resource.foreach(_.close())
        } catch {
          case NonFatal(e) => // swallow the exception in closing the stream
        }
      }
    }
  }

  val LIVY_VERSION = LivyBuildInfo.livyVersion
  val LIVY_BUILD_USER = LivyBuildInfo.livyBuildUser
  val LIVY_REVISION = LivyBuildInfo.livyRevision
  val LIVY_BRANCH = LivyBuildInfo.livyBranch
  val LIVY_BUILD_DATE = LivyBuildInfo.livyBuildDate
  val LIVY_REPO_URL = LivyBuildInfo.livyRepo
}

Source File: SessionSpec.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache.livy.repl

import java.util.Properties
import java.util.concurrent.{ConcurrentLinkedQueue, CountDownLatch, TimeUnit}

import org.apache.spark.SparkConf
import org.scalatest.{BeforeAndAfter, FunSpec}
import org.scalatest.Matchers._
import org.scalatest.concurrent.Eventually
import org.scalatest.time._

import org.apache.livy.LivyBaseUnitTestSuite
import org.apache.livy.repl.Interpreter.ExecuteResponse
import org.apache.livy.rsc.RSCConf
import org.apache.livy.sessions._

class SessionSpec extends FunSpec with Eventually with LivyBaseUnitTestSuite with BeforeAndAfter {
  override implicit val patienceConfig =
    PatienceConfig(timeout = scaled(Span(30, Seconds)), interval = scaled(Span(100, Millis)))

  private val rscConf = new RSCConf(new Properties()).set(RSCConf.Entry.SESSION_KIND, "spark")

  describe("Session") {
    var session: Session = null

    after {
      if (session != null) {
        session.close()
        session = null
      }
    }

    it("should call state changed callbacks in happy path") {
      val expectedStateTransitions =
        Array("not_started", "starting", "idle", "busy", "idle", "busy", "idle")
      val actualStateTransitions = new ConcurrentLinkedQueue[String]()

      session = new Session(rscConf, new SparkConf(), None,
        { s => actualStateTransitions.add(s.toString) })
      session.start()
      session.execute("")

      eventually {
        actualStateTransitions.toArray shouldBe expectedStateTransitions
      }
    }

    it("should not transit to idle if there're any pending statements.") {
      val expectedStateTransitions =
        Array("not_started", "starting", "idle", "busy", "busy", "busy", "idle", "busy", "idle")
      val actualStateTransitions = new ConcurrentLinkedQueue[String]()

      val blockFirstExecuteCall = new CountDownLatch(1)
      val interpreter = new SparkInterpreter(new SparkConf()) {
        override def execute(code: String): ExecuteResponse = {
          blockFirstExecuteCall.await(10, TimeUnit.SECONDS)
          super.execute(code)
        }
      }
      session = new Session(rscConf, new SparkConf(), Some(interpreter),
        { s => actualStateTransitions.add(s.toString) })
      session.start()

      for (_ <- 1 to 2) {
        session.execute("")
      }

      blockFirstExecuteCall.countDown()
      eventually {
        actualStateTransitions.toArray shouldBe expectedStateTransitions
      }
    }

    it("should remove old statements when reaching threshold") {
      rscConf.set(RSCConf.Entry.RETAINED_STATEMENTS, 2)
      session = new Session(rscConf, new SparkConf())
      session.start()

      session.statements.size should be (0)
      session.execute("")
      session.statements.size should be (1)
      session.statements.map(_._1).toSet should be (Set(0))
      session.execute("")
      session.statements.size should be (2)
      session.statements.map(_._1).toSet should be (Set(0, 1))
      session.execute("")
      eventually {
        session.statements.size should be (2)
        session.statements.map(_._1).toSet should be (Set(1, 2))
      }

      // Continue submitting statements, total statements in memory should be 2.
      session.execute("")
      eventually {
        session.statements.size should be (2)
        session.statements.map(_._1).toSet should be (Set(2, 3))
      }
    }
  }
}

Source File: BaseSessionSpec.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache.livy.repl

import java.util.Properties
import java.util.concurrent.atomic.AtomicInteger

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.language.postfixOps

import org.apache.spark.SparkConf
import org.json4s._
import org.scalatest.{FlatSpec, Matchers}
import org.scalatest.concurrent.Eventually._

import org.apache.livy.LivyBaseUnitTestSuite
import org.apache.livy.rsc.RSCConf
import org.apache.livy.rsc.driver.{Statement, StatementState}
import org.apache.livy.sessions._

abstract class BaseSessionSpec(kind: Kind)
    extends FlatSpec with Matchers with LivyBaseUnitTestSuite {

  implicit val formats = DefaultFormats

  private val rscConf = new RSCConf(new Properties()).set(RSCConf.Entry.SESSION_KIND, kind.toString)

  private val sparkConf = new SparkConf()

  protected def execute(session: Session)(code: String): Statement = {
    val id = session.execute(code)
    eventually(timeout(30 seconds), interval(100 millis)) {
      val s = session.statements(id)
      s.state.get() shouldBe StatementState.Available
      s
    }
  }

  protected def withSession(testCode: Session => Any): Unit = {
    val stateChangedCalled = new AtomicInteger()
    val session =
      new Session(rscConf, sparkConf, None, { _ => stateChangedCalled.incrementAndGet() })
    try {
      // Session's constructor should fire an initial state change event.
      stateChangedCalled.intValue() shouldBe 1
      Await.ready(session.start(), 30 seconds)
      assert(session.state === SessionState.Idle)
      // There should be at least 1 state change event fired when session transits to idle.
      stateChangedCalled.intValue() should (be > 1)
      testCode(session)
    } finally {
      session.close()
    }
  }

  it should "start in the starting or idle state" in {
    val session = new Session(rscConf, sparkConf)
    val future = session.start()
    try {
      Await.ready(future, 60 seconds)
      session.state should (equal (SessionState.Starting) or equal (SessionState.Idle))
    } finally {
      session.close()
    }
  }

  it should "eventually become the idle state" in withSession { session =>
    session.state should equal (SessionState.Idle)
  }

}

Source File: Utils.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache.livy

import java.io.{Closeable, File, InputStreamReader}
import java.net.URL
import java.nio.charset.StandardCharsets.UTF_8
import java.security.SecureRandom
import java.util.Properties

import scala.annotation.tailrec
import scala.collection.JavaConverters._
import scala.concurrent.TimeoutException
import scala.concurrent.duration.Duration

import org.apache.commons.codec.binary.Base64

object Utils {
  def getPropertiesFromFile(file: File): Map[String, String] = {
    loadProperties(file.toURI().toURL())
  }

  def loadProperties(url: URL): Map[String, String] = {
    val inReader = new InputStreamReader(url.openStream(), UTF_8)
    try {
      val properties = new Properties()
      properties.load(inReader)
      properties.stringPropertyNames().asScala.map { k =>
        (k, properties.getProperty(k).trim())
      }.toMap
    } finally {
      inReader.close()
    }
  }

  
  def isProcessAlive(process: Process): Boolean = {
    try {
      process.exitValue()
      false
    } catch {
      case _: IllegalThreadStateException =>
        true
    }
  }

  def startDaemonThread(name: String)(f: => Unit): Thread = {
    val thread = new Thread(name) {
      override def run(): Unit = f
    }
    thread.setDaemon(true)
    thread.start()
    thread
  }

  def usingResource[A <: Closeable, B](resource: A)(f: A => B): B = {
    try {
      f(resource)
    } finally {
      resource.close()
    }
  }

  def createSecret(secretBitLength: Int): String = {
    val rnd = new SecureRandom()
    val secretBytes = new Array[Byte](secretBitLength / java.lang.Byte.SIZE)
    rnd.nextBytes(secretBytes)

    Base64.encodeBase64String(secretBytes)
  }
}

Source File: stringMap.scala From case-classy with Apache License 2.0

5 votes

package classy

import java.io.InputStream //#=jvm
import java.util.Properties

package object stringMap {

  type StringMap     = scala.collection.Map[String, String]
  type JavaStringMap = java.util.Map[String, String]

  type StringMapDecoder[A] = Decoder[StringMap, A]
  object StringMapDecoder {
    def apply[A](implicit ev: StringMapDecoder[A]): StringMapDecoder[A] = ev
    def instance[A](f: StringMap => Either[DecodeError, A]): StringMapDecoder[A] =
      Decoder.instance(f)
  }

  implicit val stringMapReadString: Read[StringMap, String] =
    Read.instance(decoders.stringMapToString)

  implicit val stringMapReadNested: Read[StringMap, StringMap] =
    Read.instance(decoders.stringMapToStringMap)

  implicit val stringMapReadListString: Read[StringMap, List[String]] =
    Read.instance(decoders.stringMapToListString)

  implicit val stringMapReadListStringMap: Read[StringMap, List[StringMap]] =
    Read.instance(decoders.stringMapToListStringMap)

  val readStringMap: Read.From[StringMap] = Read.from[StringMap]

  implicit class StringMapDecoderOps[A](
    private val decoder: Decoder[StringMap, A]
  ) extends AnyVal {

    import scala.collection.convert.{ Wrappers => wrap }

    
    def fromProperties: Decoder[Properties, A] =
      decoder.mapInput(properties => wrap.JPropertiesWrapper(properties))
  }

  //#+jvm
  implicit class PropertiesDecoderOps[A](
    private val decoder: Decoder[Properties, A]
  ) extends AnyVal {

    def fromInputStream: Decoder[InputStream, A] =
      decoder <<< decoders.inputStreamToProperties
  }
  //#-jvm
}

Source File: KafkaTestUtilsTest.scala From spark-testing-base with Apache License 2.0

5 votes

package com.holdenkarau.spark.testing.kafka

import java.util.Properties

import scala.collection.JavaConversions._

import kafka.consumer.ConsumerConfig
import org.apache.spark.streaming.kafka.KafkaTestUtils
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfterAll, FunSuite}

@RunWith(classOf[JUnitRunner])
class KafkaTestUtilsTest extends FunSuite with BeforeAndAfterAll {

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll(): Unit = {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()
  }

  override def afterAll(): Unit = if (kafkaTestUtils != null) {
    kafkaTestUtils.teardown()
    kafkaTestUtils = null
  }

  test("Kafka send and receive message") {
    val topic = "test-topic"
    val message = "HelloWorld!"
    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, message.getBytes)

    val consumerProps = new Properties()
    consumerProps.put("zookeeper.connect", kafkaTestUtils.zkAddress)
    consumerProps.put("group.id", "test-group")
    consumerProps.put("flow-topic", topic)
    consumerProps.put("auto.offset.reset", "smallest")
    consumerProps.put("zookeeper.session.timeout.ms", "2000")
    consumerProps.put("zookeeper.connection.timeout.ms", "6000")
    consumerProps.put("zookeeper.sync.time.ms", "2000")
    consumerProps.put("auto.commit.interval.ms", "2000")

    val consumer = kafka.consumer.Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerProps))

    try {
      val topicCountMap = Map(topic -> new Integer(1))
      val consumerMap = consumer.createMessageStreams(topicCountMap)
      val stream = consumerMap.get(topic).get(0)
      val it = stream.iterator()
      val mess = it.next
      assert(new String(mess.message().map(_.toChar)) === message)
    } finally {
      consumer.shutdown()
    }
  }

}

Source File: LagomDevModePropertiesLoader.scala From lagom with Apache License 2.0

5 votes

package com.lightbend.lagom.devmode.internal.util

import java.io.File
import java.io.FileInputStream
import java.util.Properties

object PropertiesLoader {
  def from(file: String): Properties = {
    val properties = new Properties()
    // First check if the file is on the classpath
    val is = {
      getClass.getResourceAsStream(file) match {
        case null =>
          // Try and load it as a file
          val f = new File(file)
          if (f.isFile) {
            new FileInputStream(f)
          } else {
            throw new IllegalArgumentException(s"File $file not found as classpath resource or on the filesystem")
          }
        case found => found
      }
    }

    try {
      properties.load(is)
      properties
    } finally {
      is.close()
    }
  }
}

Source File: GangliaSink.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.ganglia.GangliaReporter
import info.ganglia.gmetric4j.gmetric.GMetric
import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

class GangliaSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GANGLIA_KEY_PERIOD = "period"
  val GANGLIA_DEFAULT_PERIOD = 10

  val GANGLIA_KEY_UNIT = "unit"
  val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS

  val GANGLIA_KEY_MODE = "mode"
  val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST

  // TTL for multicast messages. If listeners are X hops away in network, must be at least X.
  val GANGLIA_KEY_TTL = "ttl"
  val GANGLIA_DEFAULT_TTL = 1

  val GANGLIA_KEY_HOST = "host"
  val GANGLIA_KEY_PORT = "port"

  val GANGLIA_KEY_DMAX = "dmax"
  val GANGLIA_DEFAULT_DMAX = 0

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) {
    throw new Exception("Ganglia sink requires 'host' property.")
  }

  if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) {
    throw new Exception("Ganglia sink requires 'port' property.")
  }

  val host = propertyToOption(GANGLIA_KEY_HOST).get
  val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt
  val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL)
  val dmax = propertyToOption(GANGLIA_KEY_DMAX).map(_.toInt).getOrElse(GANGLIA_DEFAULT_DMAX)
  val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE)
    .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE)
  val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt)
    .getOrElse(GANGLIA_DEFAULT_PERIOD)
  val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT)
    .map(u => TimeUnit.valueOf(u.toUpperCase))
    .getOrElse(GANGLIA_DEFAULT_UNIT)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val ganglia = new GMetric(host, port, mode, ttl)
  val reporter: GangliaReporter = GangliaReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .withDMax(dmax)
      .build(ganglia)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: SQLExecutionSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import java.util.Properties

import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.sql.SparkSession

class SQLExecutionSuite extends SparkFunSuite {

  test("concurrent query execution (SPARK-10548)") {
    // Try to reproduce the issue with the old SparkContext
    val conf = new SparkConf()
      .setMaster("local[*]")
      .setAppName("test")
    val badSparkContext = new BadSparkContext(conf)
    try {
      testConcurrentQueryExecution(badSparkContext)
      fail("unable to reproduce SPARK-10548")
    } catch {
      case e: IllegalArgumentException =>
        assert(e.getMessage.contains(SQLExecution.EXECUTION_ID_KEY))
    } finally {
      badSparkContext.stop()
    }

    // Verify that the issue is fixed with the latest SparkContext
    val goodSparkContext = new SparkContext(conf)
    try {
      testConcurrentQueryExecution(goodSparkContext)
    } finally {
      goodSparkContext.stop()
    }
  }

  test("concurrent query execution with fork-join pool (SPARK-13747)") {
    val spark = SparkSession.builder
      .master("local[*]")
      .appName("test")
      .getOrCreate()

    import spark.implicits._
    try {
      // Should not throw IllegalArgumentException
      (1 to 100).par.foreach { _ =>
        spark.sparkContext.parallelize(1 to 5).map { i => (i, i) }.toDF("a", "b").count()
      }
    } finally {
      spark.sparkContext.stop()
    }
  }

  
private class BadSparkContext(conf: SparkConf) extends SparkContext(conf) {
  protected[spark] override val localProperties = new InheritableThreadLocal[Properties] {
    override protected def childValue(parent: Properties): Properties = new Properties(parent)
    override protected def initialValue(): Properties = new Properties()
  }
}

Source File: RowDataSourceStrategySuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources

import java.sql.DriverManager
import java.util.Properties

import org.scalatest.BeforeAndAfter

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.sources._
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils

class RowDataSourceStrategySuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext {
  import testImplicits._

  val url = "jdbc:h2:mem:testdb0"
  val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass"
  var conn: java.sql.Connection = null

  before {
    Utils.classForName("org.h2.Driver")
    // Extra properties that will be specified for our database. We need these to test
    // usage of parameters from OPTIONS clause in queries.
    val properties = new Properties()
    properties.setProperty("user", "testUser")
    properties.setProperty("password", "testPass")
    properties.setProperty("rowId", "false")

    conn = DriverManager.getConnection(url, properties)
    conn.prepareStatement("create schema test").executeUpdate()
    conn.prepareStatement("create table test.inttypes (a INT, b INT, c INT)").executeUpdate()
    conn.prepareStatement("insert into test.inttypes values (1, 2, 3)").executeUpdate()
    conn.commit()
    sql(
      s"""
        |CREATE TEMPORARY TABLE inttypes
        |USING org.apache.spark.sql.jdbc
        |OPTIONS (url '$url', dbtable 'TEST.INTTYPES', user 'testUser', password 'testPass')
      """.stripMargin.replaceAll("\n", " "))
  }

  after {
    conn.close()
  }

  test("SPARK-17673: Exchange reuse respects differences in output schema") {
    val df = sql("SELECT * FROM inttypes")
    val df1 = df.groupBy("a").agg("b" -> "min")
    val df2 = df.groupBy("a").agg("c" -> "min")
    val res = df1.union(df2)
    assert(res.distinct().count() == 2)  // would be 1 if the exchange was incorrectly reused
  }
}

Source File: CryptoStreamUtils.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.security

import java.io.{InputStream, OutputStream}
import java.util.Properties
import javax.crypto.KeyGenerator
import javax.crypto.spec.{IvParameterSpec, SecretKeySpec}

import org.apache.commons.crypto.random._
import org.apache.commons.crypto.stream._

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._


  private[this] def createInitializationVector(properties: Properties): Array[Byte] = {
    val iv = new Array[Byte](IV_LENGTH_IN_BYTES)
    val initialIVStart = System.currentTimeMillis()
    CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv)
    val initialIVFinish = System.currentTimeMillis()
    val initialIVTime = initialIVFinish - initialIVStart
    if (initialIVTime > 2000) {
      logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " +
        s"used by CryptoStream")
    }
    iv
  }
}

Source File: CsvSink.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.io.File
import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{CsvReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CSV_KEY_PERIOD = "period"
  val CSV_KEY_UNIT = "unit"
  val CSV_KEY_DIR = "directory"

  val CSV_DEFAULT_PERIOD = 10
  val CSV_DEFAULT_UNIT = "SECONDS"
  val CSV_DEFAULT_DIR = "/tmp/"

  val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CSV_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match {
    case Some(s) => s
    case None => CSV_DEFAULT_DIR
  }

  val reporter: CsvReporter = CsvReporter.forRegistry(registry)
      .formatFor(Locale.US)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build(new File(pollDir))

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: HDFSSink.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry

import org.apache.spark.{HDFSReporter, SecurityManager}
import org.apache.spark.metrics.MetricsSystem

private[spark] class HDFSSink(val property: Properties, val registry: MetricRegistry,
                              securityMgr: SecurityManager) extends Sink {
  val HDFS_KEY_PERIOD = "period"
  val HDFS_KEY_UNIT = "unit"
  val HDFS_KEY_DIR = "dir"

  val HDFS_DEFAULT_PERIOD = 10
  val HDFS_DEFAULT_UNIT = "SECONDS"
  val HDFS_DEFAULT_DIR = "hdfs://localhost:9000/custom-metrics/"

  val pollPeriod = Option(property.getProperty(HDFS_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => HDFS_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(HDFS_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(HDFS_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val pollDir = Option(property.getProperty(HDFS_KEY_DIR)) match {
    case Some(s) => s
    case None => HDFS_DEFAULT_DIR
  }

  val reporter: HDFSReporter = HDFSReporter.forRegistry(registry)
    .convertDurationsTo(TimeUnit.MILLISECONDS)
    .convertRatesTo(TimeUnit.SECONDS)
    .build(pollDir)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: MetricsServlet.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit
import javax.servlet.http.HttpServletRequest

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.eclipse.jetty.servlet.ServletContextHandler

import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.ui.JettyUtils._

private[spark] class MetricsServlet(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {

  val SERVLET_KEY_PATH = "path"
  val SERVLET_KEY_SAMPLE = "sample"

  val SERVLET_DEFAULT_SAMPLE = false

  val servletPath = property.getProperty(SERVLET_KEY_PATH)

  val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean)
    .getOrElse(SERVLET_DEFAULT_SAMPLE)

  val mapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))

  def getHandlers(conf: SparkConf): Array[ServletContextHandler] = {
    Array[ServletContextHandler](
      createServletHandler(servletPath,
        new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr, conf)
    )
  }

  def getMetricsSnapshot(request: HttpServletRequest): String = {
    mapper.writeValueAsString(registry)
  }

  override def start() { }

  override def stop() { }

  override def report() { }
}

Source File: MQTTSink.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry

import org.apache.spark.{MQTTReporter, SecurityManager}
import org.apache.spark.metrics.MetricsSystem

private[spark] class MQTTSink(val property: Properties, val registry: MetricRegistry,
                              securityMgr: SecurityManager) extends Sink {
  val MQTT_KEY_PERIOD = "pollPeriod"
  val MQTT_KEY_UNIT = "unit"
  val MQTT_KEY_HOST = "host"
  val MQTT_KEY_PORT = "port"

  val MQTT_DEFAULT_PERIOD = 10
  val MQTT_DEFAULT_UNIT = "SECONDS"
  val MQTT_DEFAULT_HOST = "localhost"
  val MQTT_DEFAULT_PORT = 1883

  val pollPeriod = Option(property.getProperty(MQTT_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => MQTT_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(MQTT_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(MQTT_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val masterHost = Option(property.getProperty(MQTT_KEY_HOST)) match {
    case Some(s) => s
    case None => MQTT_DEFAULT_HOST
  }

  val masterPort = Option(property.getProperty(MQTT_KEY_PORT)) match {
    case Some(s) => s.toInt
    case None => MQTT_DEFAULT_PORT
  }

  val reporter: MQTTReporter = MQTTReporter.forRegistry(registry)
    .convertDurationsTo(TimeUnit.MILLISECONDS)
    .convertRatesTo(TimeUnit.SECONDS)
    .build(masterHost, masterPort)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: Slf4jSink.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{MetricRegistry, Slf4jReporter}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class Slf4jSink(
    val property: Properties,
    val registry: MetricRegistry,
    securityMgr: SecurityManager)
  extends Sink {
  val SLF4J_DEFAULT_PERIOD = 10
  val SLF4J_DEFAULT_UNIT = "SECONDS"

  val SLF4J_KEY_PERIOD = "period"
  val SLF4J_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => SLF4J_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry)
    .convertDurationsTo(TimeUnit.MILLISECONDS)
    .convertRatesTo(TimeUnit.SECONDS)
    .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ConsoleSink.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{ConsoleReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CONSOLE_DEFAULT_PERIOD = 10
  val CONSOLE_DEFAULT_UNIT = "SECONDS"

  val CONSOLE_KEY_PERIOD = "period"
  val CONSOLE_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CONSOLE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: GraphiteSink.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.net.InetSocketAddress
import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.graphite.{Graphite, GraphiteReporter, GraphiteUDP}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GRAPHITE_DEFAULT_PERIOD = 10
  val GRAPHITE_DEFAULT_UNIT = "SECONDS"
  val GRAPHITE_DEFAULT_PREFIX = ""

  val GRAPHITE_KEY_HOST = "host"
  val GRAPHITE_KEY_PORT = "port"
  val GRAPHITE_KEY_PERIOD = "period"
  val GRAPHITE_KEY_UNIT = "unit"
  val GRAPHITE_KEY_PREFIX = "prefix"
  val GRAPHITE_KEY_PROTOCOL = "protocol"

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) {
    throw new Exception("Graphite sink requires 'host' property.")
  }

  if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) {
    throw new Exception("Graphite sink requires 'port' property.")
  }

  val host = propertyToOption(GRAPHITE_KEY_HOST).get
  val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt

  val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match {
    case Some(s) => s.toInt
    case None => GRAPHITE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT)
  }

  val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match {
    case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port))
    case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port))
    case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p")
  }

  val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .prefixedWith(prefix)
      .build(graphite)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ResultTask.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io._
import java.lang.management.ManagementFactory
import java.nio.ByteBuffer
import java.util.Properties

import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.rdd.RDD


private[spark] class ResultTask[T, U](
    stageId: Int,
    stageAttemptId: Int,
    taskBinary: Broadcast[Array[Byte]],
    partition: Partition,
    locs: Seq[TaskLocation],
    val outputId: Int,
    localProperties: Properties,
    metrics: TaskMetrics,
    jobId: Option[Int] = None,
    appId: Option[String] = None,
    appAttemptId: Option[String] = None)
  extends Task[U](stageId, stageAttemptId, partition.index, metrics, localProperties, jobId,
    appId, appAttemptId)
  with Serializable {

  @transient private[this] val preferredLocs: Seq[TaskLocation] = {
    if (locs == null) Nil else locs.toSet.toSeq
  }

  override def runTask(context: TaskContext): U = {
    // Deserialize the RDD and the func using the broadcast variables.
    val threadMXBean = ManagementFactory.getThreadMXBean
    val deserializeStartTime = System.currentTimeMillis()
    val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
      threadMXBean.getCurrentThreadCpuTime
    } else 0L
    val ser = SparkEnv.get.closureSerializer.newInstance()
    val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime
    _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
      threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
    } else 0L

    func(context, rdd.iterator(partition, context))
  }

  // This is only callable on the driver side.
  override def preferredLocations: Seq[TaskLocation] = preferredLocs

  override def toString: String = "ResultTask(" + stageId + ", " + partitionId + ")"
}

Source File: package.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache



import java.util.Properties

package object spark {

  private object SparkBuildInfo {

    val (
        spark_version: String,
        spark_branch: String,
        spark_revision: String,
        spark_build_user: String,
        spark_repo_url: String,
        spark_build_date: String) = {

      val resourceStream = Thread.currentThread().getContextClassLoader.
        getResourceAsStream("spark-version-info.properties")

      try {
        val unknownProp = "<unknown>"
        val props = new Properties()
        props.load(resourceStream)
        (
          props.getProperty("version", unknownProp),
          props.getProperty("branch", unknownProp),
          props.getProperty("revision", unknownProp),
          props.getProperty("user", unknownProp),
          props.getProperty("url", unknownProp),
          props.getProperty("date", unknownProp)
        )
      } catch {
        case npe: NullPointerException =>
          throw new SparkException("Error while locating file spark-version-info.properties", npe)
        case e: Exception =>
          throw new SparkException("Error loading properties from spark-version-info.properties", e)
      } finally {
        if (resourceStream != null) {
          try {
            resourceStream.close()
          } catch {
            case e: Exception =>
              throw new SparkException("Error closing spark build info resource stream", e)
          }
        }
      }
    }
  }

  val SPARK_VERSION = SparkBuildInfo.spark_version
  val SPARK_BRANCH = SparkBuildInfo.spark_branch
  val SPARK_REVISION = SparkBuildInfo.spark_revision
  val SPARK_BUILD_USER = SparkBuildInfo.spark_build_user
  val SPARK_REPO_URL = SparkBuildInfo.spark_repo_url
  val SPARK_BUILD_DATE = SparkBuildInfo.spark_build_date
}

Source File: TaskContextImpl.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark

import java.util.Properties

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.executor.TaskMetrics
import org.apache.spark.internal.Logging
import org.apache.spark.memory.TaskMemoryManager
import org.apache.spark.metrics.MetricsSystem
import org.apache.spark.metrics.source.Source
import org.apache.spark.util._

private[spark] class TaskContextImpl(
    val stageId: Int,
    val partitionId: Int,
    override val taskAttemptId: Long,
    override val attemptNumber: Int,
    override val taskMemoryManager: TaskMemoryManager,
    localProperties: Properties,
    @transient private val metricsSystem: MetricsSystem,
    // The default value is only used in tests.
    override val taskMetrics: TaskMetrics = TaskMetrics.empty)
  extends TaskContext
  with Logging {

  
  private[spark] def markInterrupted(): Unit = {
    interrupted = true
  }

  override def isCompleted(): Boolean = completed

  override def isRunningLocally(): Boolean = false

  override def isInterrupted(): Boolean = interrupted

  override def getLocalProperty(key: String): String = localProperties.getProperty(key)

  override def getMetricsSources(sourceName: String): Seq[Source] =
    metricsSystem.getSourcesByName(sourceName)

  private[spark] override def registerAccumulator(a: AccumulatorV2[_, _]): Unit = {
    taskMetrics.registerAccumulator(a)
  }

}

Source File: MemoryTestingUtils.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.memory

import java.util.Properties

import org.apache.spark.{SparkEnv, TaskContext, TaskContextImpl}


object MemoryTestingUtils {
  def fakeTaskContext(env: SparkEnv): TaskContext = {
    val taskMemoryManager = new TaskMemoryManager(env.memoryManager, 0)
    new TaskContextImpl(
      stageId = 0,
      partitionId = 0,
      taskAttemptId = 0,
      attemptNumber = 0,
      taskMemoryManager = taskMemoryManager,
      localProperties = new Properties,
      metricsSystem = env.metricsSystem)
  }
}

Source File: ResetSystemProperties.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.util

import java.util.Properties

import org.apache.commons.lang3.SerializationUtils
import org.scalatest.{BeforeAndAfterEach, Suite}


private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite =>
  var oldProperties: Properties = null

  override def beforeEach(): Unit = {
    // we need SerializationUtils.clone instead of `new Properties(System.getProperties())` because
    // the later way of creating a copy does not copy the properties but it initializes a new
    // Properties object with the given properties as defaults. They are not recognized at all
    // by standard Scala wrapper over Java Properties then.
    oldProperties = SerializationUtils.clone(System.getProperties)
    super.beforeEach()
  }

  override def afterEach(): Unit = {
    try {
      super.afterEach()
    } finally {
      System.setProperties(oldProperties)
      oldProperties = null
    }
  }
}

Source File: WmUserApp.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.utils

import java.util.Properties

class WmUserApp(val args: Array[String], val resourcePropertiesName: String, val filePropertiesKey: String = WmUserApp.properties) {

  def getPropertiesBuilder: PropertiesBuilder = {
    val resourcePropertiesBuilder = PropertiesBuilder.fromResource(resourcePropertiesName)
    val commandPropertiesBuilder = {
      val propertiesBuilder = PropertiesBuilder()

      args.foreach { arg =>
        val key = StringUtils.beforeFirst(arg, '=', all = false)
        val value = StringUtils.afterFirst(arg, '=', all = false)

        if (key.nonEmpty && value.nonEmpty)
          propertiesBuilder.put(key, value)
      }
      propertiesBuilder
    }
    val filePropertiesBuilder = {
      val fileNameOpt: Option[String] = commandPropertiesBuilder
          .getProperty(WmUserApp.app, filePropertiesKey)
          .orElse(resourcePropertiesBuilder.getProperty(WmUserApp.app, filePropertiesKey))
          .orElse(None)

      fileNameOpt
          .map(PropertiesBuilder.fromFile)
          .getOrElse(PropertiesBuilder())
    }

    PropertiesBuilder()
        .putAll(resourcePropertiesBuilder)
        .putAll(filePropertiesBuilder)
        .putAll(commandPropertiesBuilder)
  }

  val propertiesBuilder: PropertiesBuilder = getPropertiesBuilder
  val appProperties: Properties = propertiesBuilder.filter("app").get
  val kafkaProperties: Properties = propertiesBuilder.filter("kafka").get

  val interactive: Boolean = appProperties.getProperty("interactive").toBoolean
}

object WmUserApp {
  val topic = "topic"
  val app = "app"
  val properties = "app.properties"
}

Source File: KafkaConsumer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.wmconsumer

import java.io.File
import java.time.Duration
import java.util.Collections
import java.util.ConcurrentModificationException
import java.util.Properties

import org.apache.kafka.clients.consumer.{KafkaConsumer => ApacheKafkaConsumer}
import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser
import org.clulab.wm.wmexchanger.utils.FileUtils
import org.clulab.wm.wmexchanger.utils.FileEditor
import org.json4s._
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumer(properties: Properties, closeDuration: Int, topic: String, outputDir: String) {
  import KafkaConsumer._
  implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats

  logger.info("Opening consumer...")

  protected val consumer: ApacheKafkaConsumer[String, String] = {
    val consumer = new ApacheKafkaConsumer[String, String](properties)

    consumer.subscribe(Collections.singletonList(topic))
    consumer
  }

  def poll(duration: Int): Unit = {
    val records = consumer.poll(Duration.ofSeconds(duration))

    logger.info(s"Polling ${records.count} records...")
    records.forEach { record =>
      val key = record.key
      val value = record.value
      // Imply an extension on the file so that it can be replaced.
      val file = FileEditor(new File(key + ".")).setDir(outputDir).setExt("json").get
      logger.info("Consuming " + file.getName)

      FileUtils.printWriterFromFile(file).autoClose { printWriter =>
        printWriter.print(value)
      }
    }
  }

  def close(): Unit = {
    logger.info("Closing consumer...")
    try {
      consumer.close(Duration.ofSeconds(closeDuration))
    }
    catch {
      case _: ConcurrentModificationException => // KafkaConsumer is not safe for multi-threaded access
    }
  }
}

object KafkaConsumer {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
}

Source File: KafkaConsumerApp.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.wmconsumer

import java.util.Properties

import org.clulab.wm.wmexchanger.utils.PropertiesBuilder
import org.clulab.wm.wmexchanger.utils.WmUserApp
import org.clulab.wm.wmexchanger.utils.SafeThread
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumerApp(args: Array[String]) extends WmUserApp(args,  "/kafkaconsumer.properties") {
  val localKafkaProperties: Properties = {
    // This allows the login to be contained in a file external to the project.
    val loginProperty = appProperties.getProperty("login")
    val loginPropertiesBuilder = PropertiesBuilder.fromFile(loginProperty)

    PropertiesBuilder(kafkaProperties).putAll(loginPropertiesBuilder).get
  }

  val topic: String = appProperties.getProperty("topic")
  val outputDir: String = appProperties.getProperty("outputDir")

  val pollDuration: Int = appProperties.getProperty("poll.duration").toInt
  val waitDuration: Long = appProperties.getProperty("wait.duration").toLong
  val closeDuration: Int = appProperties.getProperty("close.duration").toInt

  val thread: SafeThread = new SafeThread(KafkaConsumerApp.logger) {
    override def runSafely(): Unit = {
      val consumer = new KafkaConsumer(localKafkaProperties, closeDuration, topic, outputDir)

      // autoClose isn't executed if the thread is shot down, so this hook is used instead.
      sys.ShutdownHookThread { consumer.close() }
      while (!isInterrupted)
        consumer.poll(pollDuration)
    }
  }

  if (interactive)
    thread.waitSafely(waitDuration)
}

object KafkaConsumerApp extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  new KafkaConsumerApp(args)
}

Source File: MetricsConfig.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.metrics

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import scala.collection.mutable
import scala.util.matching.Regex

import org.apache.spark.Logging
import org.apache.spark.util.Utils

private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging {

  private val DEFAULT_PREFIX = "*"
  private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
  private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties"

  private[metrics] val properties = new Properties()
  private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null

  private def setDefaultProperties(prop: Properties) {
    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
    prop.setProperty("*.sink.servlet.path", "/metrics/json")
    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
  }

  def initialize() {
    // Add default properties in case there's no properties file
    setDefaultProperties(properties)

    // If spark.metrics.conf is not set, try to get file in class path
    val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse {
      try {
        Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME))
      } catch {
        case e: Exception =>
          logError("Error loading default configuration file", e)
          None
      }
    }

    isOpt.foreach { is =>
      try {
        properties.load(is)
      } finally {
        is.close()
      }
    }

    propertyCategories = subProperties(properties, INSTANCE_REGEX)
    if (propertyCategories.contains(DEFAULT_PREFIX)) {
      import scala.collection.JavaConversions._

      val defaultProperty = propertyCategories(DEFAULT_PREFIX)
      for { (inst, prop) <- propertyCategories
            if (inst != DEFAULT_PREFIX)
            (k, v) <- defaultProperty
            if (prop.getProperty(k) == null) } {
        prop.setProperty(k, v)
      }
    }
  }

  def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
    val subProperties = new mutable.HashMap[String, Properties]
    import scala.collection.JavaConversions._
    prop.foreach { kv =>
      if (regex.findPrefixOf(kv._1).isDefined) {
        val regex(prefix, suffix) = kv._1
        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
      }
    }
    subProperties
  }

  def getInstance(inst: String): Properties = {
    propertyCategories.get(inst) match {
      case Some(s) => s
      case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties)
    }
  }
}

Source File: CsvSink.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.io.File
import java.util.{Locale, Properties}
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{CsvReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CSV_KEY_PERIOD = "period"
  val CSV_KEY_UNIT = "unit"
  val CSV_KEY_DIR = "directory"

  val CSV_DEFAULT_PERIOD = 10
  val CSV_DEFAULT_UNIT = "SECONDS"
  val CSV_DEFAULT_DIR = "/tmp/"

  val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CSV_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match {
    case Some(s) => s
    case None => CSV_DEFAULT_DIR
  }

  val reporter: CsvReporter = CsvReporter.forRegistry(registry)
      .formatFor(Locale.US)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build(new File(pollDir))

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ConsoleSink.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.{ConsoleReporter, MetricRegistry}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val CONSOLE_DEFAULT_PERIOD = 10
  val CONSOLE_DEFAULT_UNIT = "SECONDS"

  val CONSOLE_KEY_PERIOD = "period"
  val CONSOLE_KEY_UNIT = "unit"

  val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match {
    case Some(s) => s.toInt
    case None => CONSOLE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT)
  }

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .build()

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: GraphiteSink.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.net.InetSocketAddress
import java.util.Properties
import java.util.concurrent.TimeUnit

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.graphite.{GraphiteUDP, Graphite, GraphiteReporter}

import org.apache.spark.SecurityManager
import org.apache.spark.metrics.MetricsSystem

private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends Sink {
  val GRAPHITE_DEFAULT_PERIOD = 10
  val GRAPHITE_DEFAULT_UNIT = "SECONDS"
  val GRAPHITE_DEFAULT_PREFIX = ""

  val GRAPHITE_KEY_HOST = "host"
  val GRAPHITE_KEY_PORT = "port"
  val GRAPHITE_KEY_PERIOD = "period"
  val GRAPHITE_KEY_UNIT = "unit"
  val GRAPHITE_KEY_PREFIX = "prefix"
  val GRAPHITE_KEY_PROTOCOL = "protocol"

  def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))

  if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) {
    throw new Exception("Graphite sink requires 'host' property.")
  }

  if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) {
    throw new Exception("Graphite sink requires 'port' property.")
  }

  val host = propertyToOption(GRAPHITE_KEY_HOST).get
  val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt

  val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match {
    case Some(s) => s.toInt
    case None => GRAPHITE_DEFAULT_PERIOD
  }

  val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match {
    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
    case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT)
  }

  val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX)

  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)

  val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match {
    case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port))
    case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port))
    case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p")
  }

  val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry)
      .convertDurationsTo(TimeUnit.MILLISECONDS)
      .convertRatesTo(TimeUnit.SECONDS)
      .prefixedWith(prefix)
      .build(graphite)

  override def start() {
    reporter.start(pollPeriod, pollUnit)
  }

  override def stop() {
    reporter.stop()
  }

  override def report() {
    reporter.report()
  }
}

Source File: ActiveJob.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.util.Properties

import org.apache.spark.TaskContext
import org.apache.spark.util.CallSite


private[spark] class ActiveJob(
    val jobId: Int,
    val finalStage: Stage,
    val func: (TaskContext, Iterator[_]) => _,
    val partitions: Array[Int],
    val callSite: CallSite,
    val listener: JobListener,
    val properties: Properties) {

  val numPartitions = partitions.length
  val finished = Array.fill[Boolean](numPartitions)(false)
  var numFinished = 0
}

Source File: ResetSystemProperties.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.util

import java.util.Properties

import org.apache.commons.lang3.SerializationUtils
import org.scalatest.{BeforeAndAfterEach, Suite}


private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite =>
  var oldProperties: Properties = null

  override def beforeEach(): Unit = {
    // we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because
    // the later way of creating a copy does not copy the properties but it initializes a new
    // Properties object with the given properties as defaults. They are not recognized at all
    // by standard Scala wrapper over Java Properties then.
    oldProperties = SerializationUtils.clone(System.getProperties)
    super.beforeEach()
  }

  override def afterEach(): Unit = {
    try {
      super.afterEach()
    } finally {
      System.setProperties(oldProperties)
      oldProperties = null
    }
  }
}

Source File: SimpleConsumer.scala From embedded-kafka with Apache License 2.0

5 votes

package com.tuplejump.embedded.kafka

import java.util.Properties
import java.util.concurrent.{CountDownLatch, Executors}

import scala.util.Try
import kafka.serializer.StringDecoder
import kafka.consumer.{ Consumer, ConsumerConfig }


class SimpleConsumer(
    val latch: CountDownLatch,
    consumerConfig: Map[String, String],
    topic: String,
    groupId: String,
    partitions: Int,
    numThreads: Int) {

  val connector = Consumer.create(createConsumerConfig)

  val streams = connector
    .createMessageStreams(Map(topic -> partitions), new StringDecoder(), new StringDecoder())
    .get(topic)

  val executor = Executors.newFixedThreadPool(numThreads)

  for (stream <- streams) {
    executor.submit(new Runnable() {
      def run(): Unit = {
        for (s <- stream) {
          while (s.iterator.hasNext) {
            latch.countDown()
          }
        }
      }
    })
  }

  private def createConsumerConfig: ConsumerConfig = {
    import scala.collection.JavaConverters._
    val props = new Properties()
    props.putAll(consumerConfig.asJava)
    new ConsumerConfig(props)
  }

  def shutdown(): Unit = Try {
    connector.shutdown()
    executor.shutdown()
  }
}

Source File: TableLoader.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.util

import java.util.Properties

import scala.collection.{immutable, mutable}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.util.CarbonProperties


// scalastyle:off
object TableLoader {

  def extractOptions(propertiesFile: String): immutable.Map[String, String] = {
    val props = new Properties
    val path = new Path(propertiesFile)
    val fs = path.getFileSystem(FileFactory.getConfiguration)
    props.load(fs.open(path))
    val elments = props.entrySet().iterator()
    val map = new mutable.HashMap[String, String]()
    System.out.println("properties file:")
    while (elments.hasNext) {
      val elment = elments.next()
      System.out.println(s"${elment.getKey}=${elment.getValue}")
      map.put(elment.getKey.asInstanceOf[String], elment.getValue.asInstanceOf[String])
    }

    immutable.Map(map.toSeq: _*)
  }

  def extractStorePath(map: immutable.Map[String, String]): String = {
    map.get(CarbonCommonConstants.STORE_LOCATION) match {
      case Some(path) => path
      case None => throw new Exception(s"${CarbonCommonConstants.STORE_LOCATION} can't be empty")
    }
  }

  def loadTable(spark: SparkSession, dbName: Option[String], tableName: String, inputPaths: String,
      options: scala.collection.immutable.Map[String, String]): Unit = {
    CarbonLoadDataCommand(dbName, tableName, inputPaths, Nil, options, false).run(spark)
  }

  def main(args: Array[String]): Unit = {
    if (args.length < 3) {
      System.err.println("Usage: TableLoader <properties file> <table name> <input files>")
      System.exit(1)
    }
    System.out.println("parameter list:")
    args.foreach(System.out.println)
    val map = extractOptions(TableAPIUtil.escape(args(0)))
    val storePath = extractStorePath(map)
    System.out.println(s"${CarbonCommonConstants.STORE_LOCATION}:$storePath")
    val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1)))
    System.out.println(s"table name: $dbName.$tableName")
    val inputPaths = TableAPIUtil.escape(args(2))

    val spark = TableAPIUtil.spark(storePath, s"TableLoader: $dbName.$tableName")

    loadTable(spark, Option(dbName), tableName, inputPaths, map)
  }

}

Source File: KfProducer.scala From Adenium with Apache License 2.0

5 votes

package com.adenium.externals.kafka

import java.util.Properties
import java.util.concurrent.Future

import com.adenium.utils.May._
import org.apache.kafka.clients.producer._


object KfProducer {

  def apply( broker: String, props: Option[Properties] = None): KfProducer = {

    val prop = props.getOrElse {
      val  p = new Properties()
      p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, broker)
      p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
      p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
      p.put(ProducerConfig.ACKS_CONFIG, "0")
      p.put(ProducerConfig.RETRIES_CONFIG, "3")
      p.put(ProducerConfig.LINGER_MS_CONFIG, "0")
      //props.put(ProducerConfig.BATCH_SIZE_CONFIG, "1")
      p
    }

    val prod = new KafkaProducer[String, String](prop)
    new KfProducer( broker, prod )
  }

}

Source File: PrometheusSink.scala From spark-metrics with Apache License 2.0

5 votes

package org.apache.spark.banzaicloud.metrics.sink

import java.net.URL
import java.util.Properties

import com.banzaicloud.spark.metrics.sink.PrometheusSink.SinkConfig
import com.codahale.metrics.MetricRegistry
import io.prometheus.client.exporter.PushGateway
import org.apache.spark.banzaicloud.metrics.sink.PrometheusSink.SinkConfigProxy
import org.apache.spark.internal.config
import org.apache.spark.metrics.sink.Sink
import org.apache.spark.{SecurityManager, SparkConf, SparkEnv}

object PrometheusSink {

  class SinkConfigProxy extends SinkConfig {
    // SparkEnv may become available only after metrics sink creation thus retrieving
    // SparkConf from spark env here and not during the creation/initialisation of PrometheusSink.
    @transient
    private lazy val sparkConfig = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf(true))

    // Don't use sparkConf.getOption("spark.metrics.namespace") as the underlying string won't be substituted.
    def metricsNamespace: Option[String] = sparkConfig.get(config.METRICS_NAMESPACE)
    def sparkAppId: Option[String] = sparkConfig.getOption("spark.app.id")
    def sparkAppName: Option[String] = sparkConfig.getOption("spark.app.name")
    def executorId: Option[String] = sparkConfig.getOption("spark.executor.id")
  }
}

class PrometheusSink(property: Properties,
                     registry: MetricRegistry,
                     securityMgr: SecurityManager,
                     sinkConfig: SinkConfig,
                     pushGatewayBuilder: URL => PushGateway)
  extends com.banzaicloud.spark.metrics.sink.PrometheusSink(property, registry, sinkConfig, pushGatewayBuilder) with Sink {

  // Constructor required by MetricsSystem::registerSinks()
  def this(property: Properties, registry: MetricRegistry, securityMgr: SecurityManager) = {
    this(
      property,
      registry,
      securityMgr,
      new SinkConfigProxy,
      new PushGateway(_)
    )
  }
}

Source File: PropertiesConfig.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.utils.sumac

import collection._
import java.util.Properties
import java.io.{FileOutputStream, File, FileInputStream, BufferedInputStream}

import collection.JavaConverters._


trait PropertiesConfig extends ExternalConfig {
  self: Args =>

  var propertyFile: File = _

  abstract override def readArgs(originalArgs: Map[String,String]): Map[String,String] = {
    parse(originalArgs, false)

    val props = new Properties()
    if (propertyFile != null) {
      val in = new BufferedInputStream(new FileInputStream(propertyFile))
      props.load(in)
      in.close()
    }
    //append args we read from the property file to the args from the command line, and pass to next trait
    super.readArgs(ExternalConfigUtil.mapWithDefaults(originalArgs,props.asScala))
  }

  abstract override def saveConfig() {
    PropertiesConfig.saveConfig(this, propertyFile)
    super.saveConfig()
  }

}

object PropertiesConfig {
  def saveConfig(args: Args, propertyFile: File) {
    val props = new Properties()
    args.getStringValues.foreach{case(k,v) => props.put(k,v)}
    val out = new FileOutputStream(propertyFile)
    props.store(out, "")
    out.close()
  }
}

Source File: PropertiesConfigTest.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.utils.sumac

import org.scalatest.FunSuite
import java.io.{PrintWriter, File}
import java.util.Properties
import org.scalatest.Matchers

class PropertiesConfigTest extends FunSuite with Matchers {

  val testOutDir = new File("test_output/" + getClass.getSimpleName)
  testOutDir.mkdirs()

  test("load properties") {
    val propFile = new File(testOutDir, "load_properties_test.properties")
    val p = new Properties()
    p.put("x", "98")
    p.put("blah", "ooga booga")
    val out = new PrintWriter(propFile)
    p.store(out,null)
    out.close()


    val args = new PropertyArgs()
    args.parse(Array("--propertyFile", propFile.getAbsolutePath))
    args.x should be (98)
    args.blah should be ("ooga booga")
  }

  test("roundtrip properties") {
    val propFile = new File(testOutDir, "roundtrip_properties_test.properties")
    val args = new PropertyArgs()
    args.x = 5
    args.wakka = 93.4f
    args.propertyFile = propFile
    args.saveConfig()

    val args2 = new PropertyArgs()
    args2.propertyFile = propFile
    args2.parse(Map[String,String]())
    args2.x should be (5)
    args2.wakka should be (93.4f)
    args2.blah should be (null)
  }


  class PropertyArgs extends FieldArgs with PropertiesConfig {
    var x: Int = _
    var blah: String = _
    var wakka: Float = _
  }

}

Source File: H2Sandbox.scala From redshift-fake-driver with Apache License 2.0

5 votes

package jp.ne.opt.redshiftfake

import java.sql.{DriverManager, Connection}
import java.util.Properties

import jp.ne.opt.redshiftfake.util.Loan.using
import org.scalatest.{Outcome, fixture}

trait H2Sandbox { self: fixture.TestSuite =>

  type FixtureParam = Connection

  override def withFixture(test: OneArgTest): Outcome = {
    val url = "jdbc:h2redshift:mem:redshift;MODE=PostgreSQL;DATABASE_TO_UPPER=false"
    val prop = new Properties()
    prop.setProperty("driver", "org.h2.jdbc.FakeH2Driver")
    prop.setProperty("user", "sa")

    Class.forName("org.h2.jdbc.FakeH2Driver")
    using(DriverManager.getConnection(url, prop))(test)
  }
}

Source File: VelocityUtils.scala From InteractiveGraph-neo4j with BSD 2-Clause "Simplified" License

5 votes

package org.grapheco.server.util

import java.io.{File, FileOutputStream, StringWriter}
import java.util.Properties

import cn.pidb.blob.Blob
import cn.pidb.engine.blob.{BlobIO, InlineBlob, RemoteBlob}
import org.apache.velocity.app.VelocityEngine
import org.apache.velocity.tools.ToolManager
import org.apache.velocity.tools.config.DefaultKey
import org.neo4j.values.storable.{BlobValue, ValueWriter}
import org.springframework.util.ClassUtils
import scala.collection.JavaConversions
import java.io.FileOutputStream
import java.io.IOException


object VelocityUtils {
  val pro = new Properties();
  val toolManager = new ToolManager();
  toolManager.configure("tools.xml");

  pro.setProperty("input.encoding", "UTF-8");
  pro.setProperty("output.encoding", "UTF-8");
  val ve = new VelocityEngine(pro);
  val props = new Properties()
  props.put("runtime.log.logsystem.class", "org.apache.velocity.runtime.log.SimpleLog4JLogSystem")
  props.put("runtime.log.logsystem.log4j.category", "velocity")
  props.put("runtime.log.logsystem.log4j.logger", "velocity")
  ve.init(props)
  def parse(expr: String, context: Map[String, Any]): Any = {
    val vc = toolManager.createContext();
    val writer = new StringWriter();

    context.foreach(kv => vc.put(kv._1,
      //is a scala Map?
      if (kv._2.isInstanceOf[Map[_, _]]) {
        JavaConversions.mapAsJavaMap(kv._2.asInstanceOf[Map[_, _]])
      }
      else {
        kv._2
      }));

    try {
      if (expr.startsWith("=")) {
        val expr1 = expr.substring(1);
        ve.evaluate(vc, writer, "", s"#set($$__VAR=$expr1)");
        var value = vc.get("__VAR");
        //if is a blob
        if(value.isInstanceOf[Blob]){
          //get blob
          var result:String = ""
          try {
            val data = value.asInstanceOf[Blob].toBytes()
            val path = ClassUtils.getDefaultClassLoader.getResource("").getPath.replace("/WEB-INF/classes","") + "static/"
            val tool = new FileSystemTool()
            result = tool.filesave(data,path, System.currentTimeMillis.toString+".jpg")
          }
          catch{
            case e:Throwable =>
              print(e.toString)
          }
          //TODO url
          return "http://localhost:9999/graphserver/static/"+result
        }
        return value
      }
      else {
        ve.evaluate(vc, writer, "", expr);
        writer.getBuffer.toString.trim
      }
    }
    catch {
      case e: Throwable =>
        throw new WrongExpressionException(expr, e);
    }
  }
}

class WrongExpressionException(msg: String, e: Throwable) extends RuntimeException(msg, e) {

}

@DefaultKey("fileTool")
class FileSystemTool {
  def exists(path: String) = new File(path).exists();



  @throws[IOException]
  def filesave(file: Array[Byte], filePath: String, fileName: String): String = { //目标目录
    val targetfile = new File(filePath)
    if (!targetfile.exists) targetfile.mkdirs
    //二进制流写入
    val out = new FileOutputStream(filePath + fileName)
    out.write(file)
    out.flush()
    out.close()
    return  fileName
  }
}

Source File: BinaryTypeBenchmark.scala From memsql-spark-connector with Apache License 2.0

5 votes

package com.memsql.spark

import java.sql.{Connection, DriverManager}
import java.util.Properties

import com.github.mrpowers.spark.daria.sql.SparkSessionExt._
import com.memsql.spark.BatchInsertBenchmark.{df, executeQuery}
import org.apache.spark.sql.types.{BinaryType, IntegerType}
import org.apache.spark.sql.{SaveMode, SparkSession}

import scala.util.Random

// BinaryTypeBenchmark is written to writing of the BinaryType with CPU profiler
// this feature is accessible in Ultimate version of IntelliJ IDEA
// see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details
object BinaryTypeBenchmark extends App {
  final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost")
  final val masterPort: String = sys.props.getOrElse("memsql.port", "5506")

  val spark: SparkSession = SparkSession
    .builder()
    .master("local")
    .config("spark.sql.shuffle.partitions", "1")
    .config("spark.driver.bindAddress", "localhost")
    .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}")
    .config("spark.datasource.memsql.database", "testdb")
    .getOrCreate()

  def jdbcConnection: Loan[Connection] = {
    val connProperties = new Properties()
    connProperties.put("user", "root")

    Loan(
      DriverManager.getConnection(
        s"jdbc:mysql://$masterHost:$masterPort",
        connProperties
      ))
  }

  def executeQuery(sql: String): Unit = {
    jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql)))
  }

  executeQuery("set global default_partitions_per_leaf = 2")
  executeQuery("drop database if exists testdb")
  executeQuery("create database testdb")

  def genRandomByte(): Byte = (Random.nextInt(256) - 128).toByte
  def genRandomRow(): Array[Byte] =
    Array.fill(1000)(genRandomByte())

  val df = spark.createDF(
    List.fill(100000)(genRandomRow()).zipWithIndex,
    List(("data", BinaryType, true), ("id", IntegerType, true))
  )

  val start1 = System.nanoTime()
  df.write
    .format("memsql")
    .mode(SaveMode.Overwrite)
    .save("testdb.LoadData")

  println("Elapsed time: " + (System.nanoTime() - start1) + "ns [LoadData CSV]")

  val start2 = System.nanoTime()
  df.write
    .format("memsql")
    .option("tableKey.primary", "id")
    .option("onDuplicateKeySQL", "id = id")
    .mode(SaveMode.Overwrite)
    .save("testdb.BatchInsert")

  println("Elapsed time: " + (System.nanoTime() - start2) + "ns [BatchInsert]")

  val avroStart = System.nanoTime()
  df.write
    .format(DefaultSource.MEMSQL_SOURCE_NAME_SHORT)
    .mode(SaveMode.Overwrite)
    .option(MemsqlOptions.LOAD_DATA_FORMAT, "Avro")
    .save("testdb.AvroSerialization")
  println("Elapsed time: " + (System.nanoTime() - avroStart) + "ns [LoadData Avro] ")
}

Source File: LoadDataBenchmark.scala From memsql-spark-connector with Apache License 2.0

5 votes

package com.memsql.spark

import java.sql.{Connection, Date, DriverManager}
import java.time.{Instant, LocalDate}
import java.util.Properties

import org.apache.spark.sql.types._
import com.github.mrpowers.spark.daria.sql.SparkSessionExt._
import org.apache.spark.sql.{SaveMode, SparkSession}

import scala.util.Random

// LoadDataBenchmark is written to test load data with CPU profiler
// this feature is accessible in Ultimate version of IntelliJ IDEA
// see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details
object LoadDataBenchmark extends App {
  final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost")
  final val masterPort: String = sys.props.getOrElse("memsql.port", "5506")

  val spark: SparkSession = SparkSession
    .builder()
    .master("local")
    .config("spark.sql.shuffle.partitions", "1")
    .config("spark.driver.bindAddress", "localhost")
    .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}")
    .config("spark.datasource.memsql.database", "testdb")
    .getOrCreate()

  def jdbcConnection: Loan[Connection] = {
    val connProperties = new Properties()
    connProperties.put("user", "root")

    Loan(
      DriverManager.getConnection(
        s"jdbc:mysql://$masterHost:$masterPort",
        connProperties
      ))
  }

  def executeQuery(sql: String): Unit = {
    jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql)))
  }

  executeQuery("set global default_partitions_per_leaf = 2")
  executeQuery("drop database if exists testdb")
  executeQuery("create database testdb")

  def genRow(): (Long, Int, Double, String) =
    (Random.nextLong(), Random.nextInt(), Random.nextDouble(), Random.nextString(20))
  val df =
    spark.createDF(
      List.fill(1000000)(genRow()),
      List(("LongType", LongType, true),
           ("IntType", IntegerType, true),
           ("DoubleType", DoubleType, true),
           ("StringType", StringType, true))
    )

  val start = System.nanoTime()
  df.write
    .format("memsql")
    .mode(SaveMode.Append)
    .save("testdb.batchinsert")

  val diff = System.nanoTime() - start
  println("Elapsed time: " + diff + "ns [CSV serialization] ")

  executeQuery("truncate testdb.batchinsert")

  val avroStart = System.nanoTime()
  df.write
    .format(DefaultSource.MEMSQL_SOURCE_NAME_SHORT)
    .mode(SaveMode.Append)
    .option(MemsqlOptions.LOAD_DATA_FORMAT, "Avro")
    .save("testdb.batchinsert")
  val avroDiff = System.nanoTime() - avroStart
  println("Elapsed time: " + avroDiff + "ns [Avro serialization] ")
}

Source File: BatchInsertBenchmark.scala From memsql-spark-connector with Apache License 2.0

5 votes

package com.memsql.spark

import java.sql.{Connection, Date, DriverManager}
import java.time.LocalDate
import java.util.Properties

import org.apache.spark.sql.types._
import com.github.mrpowers.spark.daria.sql.SparkSessionExt._
import org.apache.spark.sql.{SaveMode, SparkSession}

import scala.util.Random

// BatchInsertBenchmark is written to test batch insert with CPU profiler
// this feature is accessible in Ultimate version of IntelliJ IDEA
// see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details
object BatchInsertBenchmark extends App {
  final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost")
  final val masterPort: String = sys.props.getOrElse("memsql.port", "5506")

  val spark: SparkSession = SparkSession
    .builder()
    .master("local")
    .config("spark.sql.shuffle.partitions", "1")
    .config("spark.driver.bindAddress", "localhost")
    .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}")
    .config("spark.datasource.memsql.database", "testdb")
    .getOrCreate()

  def jdbcConnection: Loan[Connection] = {
    val connProperties = new Properties()
    connProperties.put("user", "root")

    Loan(
      DriverManager.getConnection(
        s"jdbc:mysql://$masterHost:$masterPort",
        connProperties
      ))
  }

  def executeQuery(sql: String): Unit = {
    jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql)))
  }

  executeQuery("set global default_partitions_per_leaf = 2")
  executeQuery("drop database if exists testdb")
  executeQuery("create database testdb")

  def genDate() =
    Date.valueOf(LocalDate.ofEpochDay(LocalDate.of(2001, 4, 11).toEpochDay + Random.nextInt(10000)))
  def genRow(): (Long, Int, Double, String, Date) =
    (Random.nextLong(), Random.nextInt(), Random.nextDouble(), Random.nextString(20), genDate())
  val df =
    spark.createDF(
      List.fill(1000000)(genRow()),
      List(("LongType", LongType, true),
           ("IntType", IntegerType, true),
           ("DoubleType", DoubleType, true),
           ("StringType", StringType, true),
           ("DateType", DateType, true))
    )

  val start = System.nanoTime()
  df.write
    .format("memsql")
    .option("tableKey.primary", "IntType")
    .option("onDuplicateKeySQL", "IntType = IntType")
    .mode(SaveMode.Append)
    .save("testdb.batchinsert")

  val diff = System.nanoTime() - start
  println("Elapsed time: " + diff + "ns")
}

Source File: CircleUtils.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.creational.lazy_init

import java.util.Properties

object CircleUtils {
  val basicPi = 3.14
  lazy val precisePi: Double = {
    System.out.println("Reading properties for the precise PI.")
    val props = new Properties()
    props.load(getClass.getResourceAsStream("pi.properties"))
    props.getProperty("pi.high").toDouble
  }
  
  def area(radius: Double, isPrecise: Boolean = false): Double = {
    val pi: Double = if (isPrecise) precisePi else basicPi
    pi * Math.pow(radius, 2)
  }
}

object Example {
  def main(args: Array[String]): Unit = {
    System.out.println(s"The basic area for a circle with radius 2.5 is ${CircleUtils.area(2.5)}")
    System.out.println(s"The precise area for a circle with radius 2.5 is ${CircleUtils.area(2.5, true)}")
    System.out.println(s"The basic area for a circle with radius 6.78 is ${CircleUtils.area(6.78)}")
    System.out.println(s"The precise area for a circle with radius 6.78 is ${CircleUtils.area(6.78, true)}")
  }
}

Source File: CircleUtils.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.creational.lazy_init

import java.util.Properties

object CircleUtils {
  val basicPi = 3.14
  lazy val precisePi: Double = {
    System.out.println("Reading properties for the precise PI.")
    val props = new Properties()
    props.load(getClass.getResourceAsStream("pi.properties"))
    props.getProperty("pi.high").toDouble
  }
  
  def area(radius: Double, isPrecise: Boolean = false): Double = {
    val pi: Double = if (isPrecise) precisePi else basicPi
    pi * Math.pow(radius, 2)
  }
}

object Example {
  def main(args: Array[String]): Unit = {
    System.out.println(s"The basic area for a circle with radius 2.5 is ${CircleUtils.area(2.5)}")
    System.out.println(s"The precise area for a circle with radius 2.5 is ${CircleUtils.area(2.5, true)}")
    System.out.println(s"The basic area for a circle with radius 6.78 is ${CircleUtils.area(6.78)}")
    System.out.println(s"The precise area for a circle with radius 6.78 is ${CircleUtils.area(6.78, true)}")
  }
}

Source File: ConfigConversions.scala From etl-light with MIT License

5 votes

package yamrcraft.etlite.utils

import java.util.Properties

import com.typesafe.config.Config

import scala.collection.JavaConversions._

object ConfigConversions {

  implicit class RichConfig(config: Config) {

    def asProperties: Properties = {
      val props = new Properties()
      for (entry <- config.entrySet) {
        props.put(entry.getKey, entry.getValue.unwrapped)
      }
      props
    }

    def asMap[T <: Any]: Map[String, String] = {
      config.entrySet.collect {
        case entry => entry.getKey -> entry.getValue.unwrapped().toString
      }.toMap
    }
  }

}

Source File: KafkaPublisher.scala From etl-light with MIT License

5 votes

package yamrcraft.etlite

import java.util.Properties

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

class KafkaPublisher {

  val props = new Properties()
  props.put("bootstrap.servers", "localhost:9092")
  props.put("partition.assignment.strategy", "range")
  props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
  props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")
  props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
  props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")

  val producer = new KafkaProducer[Array[Byte], Array[Byte]](props)

  def send(topic: String, event: Array[Byte]): Unit = {
    producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event))
  }

  def send(topic: String, events: List[Array[Byte]]): Unit = {
    for (event <- events) {
      producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event))
    }
  }

}

Source File: SonarRunnerPlugin.scala From sbt-sonarrunner-plugin with MIT License

5 votes

package com.aol.sbt.sonar

import java.io.{File, FileOutputStream}
import java.util.Properties

import org.sonar.runner.Main
import sbt.Keys._
import sbt._

import scala.collection.JavaConversions
;

object SonarRunnerPlugin extends AutoPlugin {

  object autoImport {
    val sonarProperties = settingKey[Map[String, String]]("SonarRunner configuration properties. See http://docs.codehaus.org/display/SONAR/Analysis+Parameters.")
    val sonar = taskKey[Unit]("Runs Sonar agent")
    val generateSonarConfiguration = taskKey[File]("Generates Sonar configuration")
    val sonarRunnerOptions = settingKey[Seq[String]]("Extra options for sonar runner")
  }

  import com.aol.sbt.sonar.SonarRunnerPlugin.autoImport._

  override def projectSettings: Seq[Setting[_]] = Seq(
    generateSonarConfiguration := makeConfiguration(target.value + "/sonar-project.properties", sonarProperties.value),
    sonarProperties := Map(
      "sonar.projectName" -> name.value,
      "sonar.projectVersion" -> version.value,
      "sonar.projectKey" -> "%s:%s".format(organization.value, name.value),
      "sonar.binaries" -> filePathsToString(Seq((classDirectory in Compile).value)),
      "sonar.sources" -> filePathsToString((unmanagedSourceDirectories in Compile).value),
      "sonar.tests" -> filePathsToString((unmanagedSourceDirectories in Test).value),
      "sonar.projectBaseDir" -> file(".").absolutePath,
      "sonar.sourceEncoding" -> "UTF-8",
      "sonar.host.url" -> "http://localhost:9000",
      "sonar.jdbc.url" -> "jdbc:mysql://localhost:3306/sonar",
      "sonar.jdbc.username" -> "sonar",
      "sonar.jdbc.password" -> "sonar"
    ),
    sonarRunnerOptions := Seq.empty,
    sonar := {
      lazy val logger: TaskStreams = streams.value
      runSonarAgent(generateSonarConfiguration.value, logger, sonarRunnerOptions.value)
    }
  )

  def runSonarAgent(configFile: File, logger: TaskStreams, sonarRunnerOptions: Seq[String]) = {
    logger.log.info("**********************************")
    logger.log.info("Publishing reports to SonarQube...")
    logger.log.info("**********************************")
    Main.main(Array[String]("-D", "project.settings=" + configFile.getCanonicalPath, "-D", "project.home=" + file(".").absolutePath) ++ sonarRunnerOptions)
  }

  private[this] def filePathsToString(files: Seq[File]) = files.filter(_.exists).map(_.getAbsolutePath).toSet.mkString(",")

  private[this] def makeConfiguration(configPath: String, props: Map[String, String]): File = {
    val p = new Properties()
    p.putAll(JavaConversions.mapAsJavaMap(props))
    p.store(new FileOutputStream(configPath), null)
    file(configPath)
  }
}

Source File: StreamHQL.scala From spark-cep with Apache License 2.0

5 votes

import java.util.Properties

import kafka.consumer.ConsumerConfig
import org.I0Itec.zkclient.ZkClient
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.streaming.StreamSQLContext
import org.apache.spark.sql.streaming.sources.MessageDelimiter
import org.apache.spark.streaming.dstream.ConstantInputDStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
import redis.RedisManager

import scala.util.parsing.json.JSON

class TabDelimiter extends MessageDelimiter {
  override val delimiter = "\t"
}

object StreamDDL {
  def main(args: Array[String]): Unit = {
    Logger.getRootLogger.setLevel(Level.WARN)
    val query = args(0)
    val sc = new SparkContext(new SparkConf())
    val ssc = new StreamingContext(sc, Seconds(1))
    val streamSqlContext = new StreamSQLContext(ssc, new HiveContext(sc))
    streamSqlContext.command(query)
    new ConstantInputDStream[Int](ssc, sc.parallelize(Seq(1))).print
    ssc.start()
    ssc.awaitTerminationOrTimeout(100)
    ssc.stop()
  }
}

object StreamHQL {

  object Redis {
    var initialized = false
    var manager: RedisManager = _
    def init(confMap: Map[String, String]) {
      if (initialized == false) {
        manager = new RedisManager(
          confMap("redis.shards"),
          confMap("redis.sentinels"),
          confMap("redis.database").toInt)
        manager.init
        initialized = true
      }
    }
  }

  def removeConsumerGroup(zkQuorum: String, groupId: String) {
    val properties = new Properties()
    properties.put("zookeeper.connect", zkQuorum)
    properties.put("group.id", groupId)
    val conf = new ConsumerConfig(properties)
    val zkClient = new ZkClient(conf.zkConnect)
    zkClient.deleteRecursive(s"/consumers/${conf.groupId}")
    zkClient.close()
  }

  def main(args: Array[String]): Unit = {
    Logger.getRootLogger.setLevel(Level.WARN)
    val confMap = JSON.parseFull(args(0)).get.asInstanceOf[Map[String, String]]
    val qid = args(1)
    val query = args(2)
    val sc = new SparkContext(new SparkConf())
    val ssc = new StreamingContext(sc, Seconds(1))
    val hc = new HiveContext(sc)
    val streamSqlContext = new StreamSQLContext(ssc, hc)
    val redisExpireSec = confMap("redis.expire.sec").toInt
    ssc.checkpoint(s"checkpoint/$qid")
    hc.setConf("spark.streaming.query.id", qid)
    hc.setConf("spark.sql.shuffle.partitions", confMap("spark.sql.shuffle.partitions"))

    removeConsumerGroup(confMap("kafka.zookeeper.quorum"), qid)
    val result = streamSqlContext.sql(query)
    val schema = result.schema

    result.foreachRDD((rdd, time) => {
      rdd.foreachPartition(partition => {
        Redis.init(confMap)
        val jedis = Redis.manager.getResource
        val pipe = jedis.pipelined
        partition.foreach(record => {
          val seq = record.toSeq(schema)
          val ts = time.milliseconds / 1000
          val hkey = seq.take(seq.size - 1).mkString(".")
          pipe.hset(qid + "." + ts, hkey, seq(seq.size - 1).toString)
          pipe.expire(qid + "." + ts, redisExpireSec)
        })
        pipe.sync
        Redis.manager.returnResource(jedis)
      })
    })

    ssc.start()
    ssc.awaitTermination()
    ssc.stop()
  }
}

Source File: MQTTUtils.scala From bahir with Apache License 2.0

5 votes

package org.apache.bahir.sql.streaming.mqtt

import java.util.Properties

import org.eclipse.paho.client.mqttv3.{MqttClient, MqttClientPersistence, MqttConnectOptions}
import org.eclipse.paho.client.mqttv3.persist.{MemoryPersistence, MqttDefaultFilePersistence}

import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap

import org.apache.bahir.utils.Logging

object MQTTUtils extends Logging {
  // Since data source configuration properties are case-insensitive,
  // we have to introduce our own keys. Also, good for vendor independence.
  private[mqtt] val sslParamMapping = Map(
    "ssl.protocol" -> "com.ibm.ssl.protocol",
    "ssl.key.store" -> "com.ibm.ssl.keyStore",
    "ssl.key.store.password" -> "com.ibm.ssl.keyStorePassword",
    "ssl.key.store.type" -> "com.ibm.ssl.keyStoreType",
    "ssl.key.store.provider" -> "com.ibm.ssl.keyStoreProvider",
    "ssl.trust.store" -> "com.ibm.ssl.trustStore",
    "ssl.trust.store.password" -> "com.ibm.ssl.trustStorePassword",
    "ssl.trust.store.type" -> "com.ibm.ssl.trustStoreType",
    "ssl.trust.store.provider" -> "com.ibm.ssl.trustStoreProvider",
    "ssl.ciphers" -> "com.ibm.ssl.enabledCipherSuites",
    "ssl.key.manager" -> "com.ibm.ssl.keyManager",
    "ssl.trust.manager" -> "com.ibm.ssl.trustManager"
  )

  def parseConfigParams(config: Map[String, String]):
      (String, String, String, MqttClientPersistence, MqttConnectOptions, Int, Long, Long, Int) = {
    def e(s: String) = new IllegalArgumentException(s)
    val parameters = CaseInsensitiveMap(config)

    val brokerUrl: String = parameters.getOrElse("brokerUrl", parameters.getOrElse("path",
      throw e("Please provide a `brokerUrl` by specifying path or .options(\"brokerUrl\",...)")))

    val persistence: MqttClientPersistence = parameters.get("persistence") match {
      case Some("memory") => new MemoryPersistence()
      case _ => val localStorage: Option[String] = parameters.get("localStorage")
        localStorage match {
          case Some(x) => new MqttDefaultFilePersistence(x)
          case None => new MqttDefaultFilePersistence()
        }
    }

    // if default is subscribe everything, it leads to getting lot unwanted system messages.
    val topic: String = parameters.getOrElse("topic",
      throw e("Please specify a topic, by .options(\"topic\",...)"))

    val clientId: String = parameters.getOrElse("clientId", {
      log.warn("If `clientId` is not set, a random value is picked up." +
        "\nRecovering from failure is not supported in such a case.")
      MqttClient.generateClientId()})

    val username: Option[String] = parameters.get("username")
    val password: Option[String] = parameters.get("password")
    val connectionTimeout: Int = parameters.getOrElse("connectionTimeout",
      MqttConnectOptions.CONNECTION_TIMEOUT_DEFAULT.toString).toInt
    val keepAlive: Int = parameters.getOrElse("keepAlive", MqttConnectOptions
      .KEEP_ALIVE_INTERVAL_DEFAULT.toString).toInt
    val mqttVersion: Int = parameters.getOrElse("mqttVersion", MqttConnectOptions
      .MQTT_VERSION_DEFAULT.toString).toInt
    val cleanSession: Boolean = parameters.getOrElse("cleanSession", "false").toBoolean
    val qos: Int = parameters.getOrElse("QoS", "1").toInt
    val autoReconnect: Boolean = parameters.getOrElse("autoReconnect", "false").toBoolean
    val maxInflight: Int = parameters.getOrElse("maxInflight", "60").toInt

    val maxBatchMessageNum = parameters.getOrElse("maxBatchMessageNum", s"${Long.MaxValue}").toLong
    val maxBatchMessageSize = parameters.getOrElse("maxBatchMessageSize",
      s"${Long.MaxValue}").toLong
    val maxRetryNumber = parameters.getOrElse("maxRetryNum", "3").toInt

    val mqttConnectOptions: MqttConnectOptions = new MqttConnectOptions()
    mqttConnectOptions.setAutomaticReconnect(autoReconnect)
    mqttConnectOptions.setCleanSession(cleanSession)
    mqttConnectOptions.setConnectionTimeout(connectionTimeout)
    mqttConnectOptions.setKeepAliveInterval(keepAlive)
    mqttConnectOptions.setMqttVersion(mqttVersion)
    mqttConnectOptions.setMaxInflight(maxInflight)
    (username, password) match {
      case (Some(u: String), Some(p: String)) =>
        mqttConnectOptions.setUserName(u)
        mqttConnectOptions.setPassword(p.toCharArray)
      case _ =>
    }
    val sslProperties = new Properties()
    config.foreach(e => {
      if (e._1.startsWith("ssl.")) {
        sslProperties.setProperty(sslParamMapping(e._1), e._2)
      }
    })
    mqttConnectOptions.setSSLProperties(sslProperties)

    (brokerUrl, clientId, topic, persistence, mqttConnectOptions, qos,
      maxBatchMessageNum, maxBatchMessageSize, maxRetryNumber)
  }
}

Source File: EmbeddedZookeeper.scala From pizza-auth-3 with MIT License

5 votes

package moe.pizza.auth.queue

import java.util.Properties

import org.apache.zookeeper.server.{ServerConfig, ZooKeeperServerMain}
import org.apache.zookeeper.server.quorum.QuorumPeerConfig


class EmbeddedZookeeper(config: Properties) {
  val quorumconfig = new QuorumPeerConfig
  quorumconfig.parseProperties(config)

  val server = new ZooKeeperServerMain
  val zkconfig = new ServerConfig
  zkconfig.readFrom(quorumconfig)

  var zkthread: Option[Thread] = None

  def start(): Thread = {
    val thread = new Thread() {
      override def run(): Unit = {
        try {
          server.runFromConfig(zkconfig)
        } catch {
          case e: Throwable => e.printStackTrace(System.err)
        }

      }
    }
    thread.start()
    zkthread = Some(thread)
    thread
  }

  def stop() = {
    zkthread match {
      case Some(t) => t.destroy()
      case None => ()
    }
  }

}

Source File: Project.scala From ScalaClean with Apache License 2.0

5 votes

package scalaclean.model.impl

import scalaclean.model._
import java.io.File
import java.net.{URL, URLClassLoader}
import java.nio.file.{Files, Path, Paths}
import java.util.Properties
import java.util.concurrent.ConcurrentHashMap

import scalafix.v1.SymbolInformation

import scala.meta.internal.symtab.{GlobalSymbolTable, SymbolTable}
import scala.meta.io.{AbsolutePath, Classpath}

object Project {

  import org.scalaclean.analysis.PropertyNames._

  def apply(propsPath: Path, projects: ProjectSet): Project = {
    val props = new Properties()
    println("PropsPath = " + propsPath)
    props.load(Files.newBufferedReader(propsPath))
    val classpathValue = props.getProperty(prop_classpath)
    val outputPath = props.getProperty(prop_outputDir)
    val elementsFilePath = props.getProperty(prop_elementsFile)
    val relationshipsFilePath = props.getProperty(prop_relationshipsFile)
    val extensionsFilePath = props.getProperty(prop_extensionsFile)
    val src = props.getProperty(prop_src)
    val srcBuildBase = props.getProperty(prop_srcBuildBase)
    val srcFiles = props.getProperty(prop_srcFiles, "").split(File.pathSeparatorChar).toSet
    val srcRoots = props.getProperty(prop_srcRoots).split(File.pathSeparatorChar).toList.sortWith((s1, s2) => s1.length > s1.length || s1 < s2).map(AbsolutePath(_))
    println("srcRoots = " + srcRoots)
    assert(classpathValue ne null, props.keys)
    assert(outputPath ne null, props.keys)

    val classPath = Classpath.apply(classpathValue)

    new Project(projects, classPath, outputPath, src, srcRoots, srcBuildBase, elementsFilePath, relationshipsFilePath, extensionsFilePath, srcFiles)
  }
}

class Project private(
                       val projects: ProjectSet, val classPath: Classpath, val outputPath: String, val src: String, val srcRoots: List[AbsolutePath], val srcBuildBase: String,
                       elementsFilePath: String, relationshipsFilePath: String, extensionsFilePath: String,
                       val srcFiles: Set[String]) {
  def symbolTable: SymbolTable = GlobalSymbolTable(classPath, includeJdk = true)

  lazy val classloader: ClassLoader = new URLClassLoader(Array(new URL("file:" + outputPath + "/")), null)

  private val infos = new ConcurrentHashMap[LegacyElementId, SymbolInformation]()

  def symbolInfo(viewedFrom: ElementModelImpl, symbol: LegacyElementId): SymbolInformation = {
    infos.computeIfAbsent(symbol,
      s => //any doc in the project would do though
        viewedFrom.source.doc.info(s.symbol).orNull)
  }

  def read: (Vector[ElementModelImpl], BasicRelationshipInfo) = ModelReader.read(this, elementsFilePath, relationshipsFilePath, extensionsFilePath)

  private val sourcesMap = new ConcurrentHashMap[String, SourceData]()

  def source(name: String): SourceData = {
    sourcesMap.computeIfAbsent(name, p => SourceData(this, Paths.get(p)))
  }

}

Source File: BlazegraphClientFixture.scala From nexus with Apache License 2.0

5 votes

package ch.epfl.bluebrain.nexus.commons.sparql.client

import java.util.Properties

import akka.http.scaladsl.model.Uri
import ch.epfl.bluebrain.nexus.commons.sparql.client.BlazegraphClientFixture._
import ch.epfl.bluebrain.nexus.util.Randomness._

import scala.jdk.CollectionConverters._

trait BlazegraphClientFixture {

  val namespace: String = genString(8)
  val rand: String      = genString(length = 8)
  val graph: Uri        = s"http://$localhost:8080/graphs/$rand"
  val id: String        = genString()
  val label: String     = genString()
  val value: String     = genString()
}

object BlazegraphClientFixture {

  val localhost = "127.0.0.1"

  def properties(file: String = "/commons/sparql/index.properties"): Map[String, String] = {
    val props = new Properties()
    props.load(getClass.getResourceAsStream(file))
    props.asScala.toMap
  }
}

Source File: Conf.scala From CkoocNLP with Apache License 2.0

5 votes

package config

import java.io.{File, FileInputStream, InputStreamReader}
import java.util.Properties

import scala.collection.mutable


  def loadConf(filePath: String): mutable.LinkedHashMap[String, String] = {
    val kvMap = mutable.LinkedHashMap[String, String]()

    val properties = new Properties()
    properties.load(new InputStreamReader(new FileInputStream(filePath), "UTF-8"))
    val propertyNameArray = properties.stringPropertyNames().toArray(new Array[String](0))

    val fileName = new File(filePath).getName

    println(s"============ 加载配置文件 $fileName ================")
    for (propertyName <- propertyNameArray) {
      val property = properties.getProperty(propertyName).replaceAll("\"", "").trim
      println(propertyName + ": " + property)
      kvMap.put(propertyName, property)
    }
    println("==========================================================")

    kvMap
  }
}

Source File: AppConfiguration.scala From haystack-trends with Apache License 2.0

5 votes

package com.expedia.www.haystack.trends.config

import java.util.Properties

import com.expedia.www.haystack.commons.config.ConfigurationLoader
import com.expedia.www.haystack.commons.entities.encoders.EncoderFactory
import com.expedia.www.haystack.trends.config.entities.{KafkaConfiguration, TransformerConfiguration}
import com.typesafe.config.Config
import org.apache.kafka.streams.StreamsConfig
import org.apache.kafka.streams.Topology.AutoOffsetReset
import org.apache.kafka.streams.processor.TimestampExtractor

import scala.collection.JavaConverters._
import scala.util.matching.Regex

class AppConfiguration {
  private val config = ConfigurationLoader.loadConfigFileWithEnvOverrides()

  val healthStatusFilePath: String = config.getString("health.status.path")

  
  def kafkaConfig: KafkaConfiguration = {

    // verify if the applicationId and bootstrap server config are non empty
    def verifyRequiredProps(props: Properties): Unit = {
      require(props.getProperty(StreamsConfig.APPLICATION_ID_CONFIG).nonEmpty)
      require(props.getProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG).nonEmpty)
    }

    def addProps(config: Config, props: Properties, prefix: (String) => String = identity): Unit = {
      config.entrySet().asScala.foreach(kv => {
        val propKeyName = prefix(kv.getKey)
        props.setProperty(propKeyName, kv.getValue.unwrapped().toString)
      })
    }

    val kafka = config.getConfig("kafka")
    val producerConfig = kafka.getConfig("producer")
    val consumerConfig = kafka.getConfig("consumer")
    val streamsConfig = kafka.getConfig("streams")

    val props = new Properties

    // add stream specific properties
    addProps(streamsConfig, props)

    // validate props
    verifyRequiredProps(props)

    val timestampExtractor = Class.forName(props.getProperty("timestamp.extractor",
      "org.apache.kafka.streams.processor.WallclockTimestampExtractor"))

    KafkaConfiguration(new StreamsConfig(props),
      produceTopic = producerConfig.getString("topic"),
      consumeTopic = consumerConfig.getString("topic"),
      if (streamsConfig.hasPath("auto.offset.reset")) AutoOffsetReset.valueOf(streamsConfig.getString("auto.offset.reset").toUpperCase)
      else AutoOffsetReset.LATEST
      , timestampExtractor.newInstance().asInstanceOf[TimestampExtractor],
      kafka.getLong("close.timeout.ms"))
  }
}

Source File: FeatureSpec.scala From haystack-trends with Apache License 2.0

5 votes

package com.expedia.www.haystack.trends.feature

import java._
import java.util.Properties

import com.expedia.metrics.MetricData
import com.expedia.open.tracing.Span
import com.expedia.www.haystack.commons.entities.encoders.Base64Encoder
import com.expedia.www.haystack.trends.config.AppConfiguration
import com.expedia.www.haystack.trends.config.entities.{KafkaConfiguration, TransformerConfiguration}
import org.apache.kafka.streams.StreamsConfig
import org.easymock.EasyMock
import org.scalatest.easymock.EasyMockSugar
import org.scalatest.{FeatureSpecLike, GivenWhenThen, Matchers}


trait FeatureSpec extends FeatureSpecLike with GivenWhenThen with Matchers with EasyMockSugar {

  protected val METRIC_TYPE = "gauge"

  def generateTestSpan(duration: Long): Span = {
    val operationName = "testSpan"
    val serviceName = "testService"
    Span.newBuilder()
      .setDuration(duration)
      .setOperationName(operationName)
      .setServiceName(serviceName)
      .build()
  }

  protected def mockAppConfig: AppConfiguration = {
    val kafkaConsumeTopic = "test-consume"
    val kafkaProduceTopic = "test-produce"
    val streamsConfig = new Properties()
    streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-app")
    streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "test-kafka-broker")
    val kafkaConfig = KafkaConfiguration(new StreamsConfig(streamsConfig), kafkaProduceTopic, kafkaConsumeTopic, null, null, 0l)
    val transformerConfig = TransformerConfiguration(new Base64Encoder, enableMetricPointServiceLevelGeneration = true, List())
    val appConfiguration = mock[AppConfiguration]

    expecting {
      appConfiguration.kafkaConfig.andReturn(kafkaConfig).anyTimes()
      appConfiguration.transformerConfiguration.andReturn(transformerConfig).anyTimes()
    }
    EasyMock.replay(appConfiguration)
    appConfiguration
  }

  protected def getMetricDataTags(metricData : MetricData): util.Map[String, String] = {
    metricData.getMetricDefinition.getTags.getKv
  }

}

Source File: FeatureSpec.scala From haystack-trends with Apache License 2.0

5 votes

package com.expedia.www.haystack.trends.feature

import java.util
import java.util.Properties

import com.expedia.metrics.{MetricData, MetricDefinition, TagCollection}
import com.expedia.www.haystack.commons.entities.encoders.PeriodReplacementEncoder
import com.expedia.www.haystack.trends.config.AppConfiguration
import com.expedia.www.haystack.trends.config.entities.{KafkaConfiguration, KafkaProduceConfiguration, KafkaSinkTopic, StateStoreConfiguration}
import org.apache.kafka.streams.StreamsConfig
import org.apache.kafka.streams.Topology.AutoOffsetReset
import org.apache.kafka.streams.processor.WallclockTimestampExtractor
import org.easymock.EasyMock
import org.scalatest._
import org.scalatest.easymock.EasyMockSugar
import org.mockito.Mockito._

import scala.collection.JavaConverters._


trait FeatureSpec extends FeatureSpecLike with GivenWhenThen with Matchers with EasyMockSugar {

  def currentTimeInSecs: Long = {
    System.currentTimeMillis() / 1000l
  }

  protected def mockAppConfig: AppConfiguration = {
    val kafkaConsumeTopic = "test-consume"
    val kafkaProduceTopic = "test-produce"
    val kafkaMetricTankProduceTopic = "test-mdm-produce"
    val streamsConfig = new Properties()
    streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-app")
    streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "test-kafka-broker")

    val kafkaSinkTopics = List(KafkaSinkTopic("metrics","com.expedia.www.haystack.commons.kstreams.serde.metricdata.MetricDataSerde",true), KafkaSinkTopic("mdm","com.expedia.www.haystack.commons.kstreams.serde.metricdata.MetricTankSerde",true))
    val kafkaConfig = KafkaConfiguration(new StreamsConfig(streamsConfig), KafkaProduceConfiguration(kafkaSinkTopics, None, "mdm", false), kafkaConsumeTopic, AutoOffsetReset.EARLIEST, new WallclockTimestampExtractor, 30000)
    val projectConfiguration = mock[AppConfiguration]

    expecting {
      projectConfiguration.kafkaConfig.andReturn(kafkaConfig).anyTimes()
      projectConfiguration.encoder.andReturn(new PeriodReplacementEncoder).anyTimes()
      projectConfiguration.stateStoreConfig.andReturn(StateStoreConfiguration(128, false, 60, Map())).anyTimes()
      projectConfiguration.additionalTags.andReturn(Map("k1"->"v1", "k2"-> "v2")).anyTimes()
    }
    EasyMock.replay(projectConfiguration)
    projectConfiguration
  }

  protected def getMetricData(metricKey: String, tags: Map[String, String], value: Double, timeStamp: Long): MetricData = {

    val tagsMap = new java.util.LinkedHashMap[String, String] {
      if (tags != null) putAll(tags.asJava)
      put(MetricDefinition.MTYPE, "gauge")
      put(MetricDefinition.UNIT, "short")
    }
    val metricDefinition = new MetricDefinition(metricKey, new TagCollection(tagsMap), TagCollection.EMPTY)
    new MetricData(metricDefinition, value, timeStamp)
  }

  protected def containsTagInMetricData(metricData: MetricData, tagKey: String, tagValue: String): Boolean = {
    val tags = getTagsFromMetricData(metricData)
    tags.containsKey(tagKey) && tags.get(tagKey).equalsIgnoreCase(tagValue)
  }

  protected def getTagsFromMetricData(metricData: MetricData): util.Map[String, String] = {
    metricData.getMetricDefinition.getTags.getKv
  }
}

Source File: NetezzaRDD.scala From spark-netezza with Apache License 2.0

5 votes

package com.ibm.spark.netezza

import java.sql.Connection
import java.util.Properties

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types._
import org.apache.spark.{Partition, SparkContext, TaskContext}


  override def compute(thePart: Partition, context: TaskContext): Iterator[Row] =
    new Iterator[Row] {
      var closed = false
      var finished = false
      var gotNext = false
      var nextValue: Row = null

      context.addTaskCompletionListener { context => close() }
      val part = thePart.asInstanceOf[NetezzaPartition]
      val conn = getConnection()
      val reader = new NetezzaDataReader(conn, table, columns, filters, part, schema)
      reader.startExternalTableDataUnload()

      def getNext(): Row = {
        if (reader.hasNext) {
          reader.next()
        } else {
          finished = true
          null.asInstanceOf[Row]
        }
      }

      def close() {
        if (closed) return
        try {
          if (null != reader) {
            reader.close()
          }
        } catch {
          case e: Exception => logWarning("Exception closing Netezza record reader", e)
        }
        try {
          if (null != conn) {
            conn.close()
          }
          logInfo("closed connection")
        } catch {
          case e: Exception => logWarning("Exception closing connection", e)
        }
      }

      override def hasNext: Boolean = {
        if (!finished) {
          if (!gotNext) {
            nextValue = getNext()
            if (finished) {
              close()
            }
            gotNext = true
          }
        }
        !finished
      }

      override def next(): Row = {
        if (!hasNext) {
          throw new NoSuchElementException("End of stream")
        }
        gotNext = false
        nextValue
      }
    }
}

Source File: DefaultSource.scala From spark-netezza with Apache License 2.0

5 votes

package com.ibm.spark.netezza

import java.util.Properties
import org.apache.spark.sql.{SQLContext}
import org.apache.spark.sql.sources.{DataSourceRegister, BaseRelation, RelationProvider}


  override def createRelation(
                               sqlContext: SQLContext,
                               parameters: Map[String, String]): BaseRelation = {
    val url = parameters.getOrElse("url", sys.error("Option 'Netezza database url' not specified"))
    val (table, isQuery) = parameters.get("dbtable").map(table => (table, false)).orElse {
      parameters.get("query")
        .map(q => (s"($q) as src", true))
        .orElse(sys.error("Option 'dbtable/query' should be specified."))
    }.get

    // TODO: Have to set it to the system default.
    // For query default is 1, when fetching from a table defauilt is 4. Data slice ca
    // can be used for partitioning when table is specified.
    val numPartitions = parameters.getOrElse("numPartitions", if (isQuery) "1" else "4").toInt

    val partitionCol = parameters.get("partitioncol")
    val lowerBound = parameters.get("lowerbound")
    val upperBound = parameters.get("upperbound")

    val properties = new Properties() // Additional properties that we will pass to getConnection
    parameters.foreach { case (k, v) => properties.setProperty(k, v) }

    val conn = NetezzaJdbcUtils.getConnector(url, properties)()
    val parts = try {
      if (partitionCol.isDefined || isQuery) {
        if (isQuery && numPartitions > 1 && !partitionCol.isDefined) {
          throw new IllegalArgumentException("Partition column should be specified or" +
            " number of partitions should be set to 1 with the query option.")
        }
        val partnInfo = PartitioningInfo(partitionCol, lowerBound, upperBound, numPartitions)
        NetezzaInputFormat.getColumnPartitions(conn, table, partnInfo)
      } else {
        // Partitions based on the data slices.
        NetezzaInputFormat.getDataSlicePartition(conn, numPartitions)
      }
    } finally { conn.close() }

    NetezzaRelation(url, table, parts, properties, numPartitions)(sqlContext)
  }
}

Source File: Version.scala From apalache with Apache License 2.0

5 votes

package at.forsyte.apalache.tla.tooling

import java.io.IOException
import java.util.Properties

object Version {
  private val pomProps: Properties = loadProperties("META-INF/maven/at.forsyte.apalache/tool/pom.properties")
  private val gitProps: Properties = loadProperties("at/forsyte/apalache/tla/tooling/git.properties")

  def version: String = {
    pomProps.getProperty("version", "version-dev")
  }

  def build: String = {
    gitProps.getProperty("git.commit.id.describe", "unknown-build")
  }

  private def loadProperties(name: String): Properties = {
    val resourceStream = ClassLoader.getSystemClassLoader.getResourceAsStream(name)
    var props = new Properties()
    try {
      if (resourceStream != null) {
        props.load(resourceStream)
      }
    } catch {
      case _: IOException => ()
        // ignore and set defaults, this is not a critical function

      case e: Throwable => throw e
    }

    props
  }
}

Source File: HoconMessagesApi.scala From play-i18n-hocon with Apache License 2.0

5 votes

package com.marcospereira.play.i18n

import java.net.URL
import java.util.Properties
import javax.inject.{ Inject, Singleton }

import com.typesafe.config.ConfigFactory
import play.api.http.HttpConfiguration
import play.api.i18n._
import play.api.inject.Module
import play.api.{ Configuration, Environment }
import play.utils.Resources

import scala.collection.JavaConverters._

@Singleton
class HoconMessagesApiProvider @Inject() (
  environment: Environment,
  config: Configuration,
  langs: Langs,
  httpConfiguration: HttpConfiguration
)
    extends DefaultMessagesApiProvider(environment, config, langs, httpConfiguration) {

  override lazy val get: MessagesApi = {
    new DefaultMessagesApi(
      loadAllMessages,
      langs,
      langCookieName = langCookieName,
      langCookieSecure = langCookieSecure,
      langCookieHttpOnly = langCookieHttpOnly,
      httpConfiguration = httpConfiguration
    )
  }

  override protected def loadMessages(file: String): Map[String, String] = {
    getResources(file)
      .filterNot(url => Resources.isDirectory(environment.classLoader, url)).reverse
      .map(getMessages)
      .foldLeft(Map.empty[String, String]) { _ ++ _ }
  }

  override protected def loadAllMessages: Map[String, Map[String, String]] = {
    langs.availables.map(_.code).map { lang =>
      (lang, loadMessages(s"messages.$lang.conf"))
    }.toMap ++ Map(
      "default" -> loadMessages("messages.conf"),
      "default.play" -> loadMessages("messages.default")
    )
  }

  override protected def joinPaths(first: Option[String], second: String) = first match {
    case Some(parent) => new java.io.File(parent, second).getPath
    case None => second
  }

  private def getResources(file: String): List[URL] = {
    environment.classLoader.getResources(joinPaths(messagesPrefix, file)).asScala.toList
  }

  private def getMessages(url: URL): Map[String, String] = {
    // messages.default is bundled with play and it is a properties file
    val config = if (url.toString.endsWith("messages.default")) {
      ConfigFactory.parseProperties(getProperties(url))
    } else {
      ConfigFactory.parseURL(url)
    }

    config.resolve().entrySet().asScala
      .map(e => e.getKey -> String.valueOf(e.getValue.unwrapped()))
      .toMap
  }

  private def getProperties(url: URL): Properties = {
    val properties = new Properties()
    val input = url.openStream()
    try {
      properties.load(input)
    } finally {
      input.close()
    }
    properties
  }
}


trait HoconI18nComponents extends I18nComponents {

  def environment: Environment
  def configuration: Configuration
  def httpConfiguration: HttpConfiguration
  def langs: Langs

  override lazy val messagesApi: MessagesApi = new HoconMessagesApiProvider(environment, configuration, langs, httpConfiguration).get
}

Source File: KafkaProducer.scala From spark-ref-architecture with Apache License 2.0

5 votes

package com.stc.spark.streaming.kafka

import java.util.{Date, Properties}

import kafka.producer.{KeyedMessage, Producer, ProducerConfig}

import scala.util.Random

object KafkaProducer extends App {
  val events = args(0).toInt
  val topic = args(1)
  val brokers = args(2)
  val rnd = new Random()

  val producer = new Producer[String, String](KafkaConfig.config)
  val t = System.currentTimeMillis()
  for (nEvents <- Range(0, events)) {
    val runtime = new Date().getTime();
    val ip = "192.168.2." + rnd.nextInt(255);
    val msg = runtime + "," + nEvents + ",www.example.com," + ip;
    val data = new KeyedMessage[String, String](topic, ip, msg);
    producer.send(data);
  }

  System.out.println("sent per second: " + events * 1000 / (System.currentTimeMillis() - t));
  producer.close();
}

Source File: UtilTests.scala From coursier with Apache License 2.0

5 votes

package coursier.paths

import java.io.File
import java.nio.file.{Files, Path}
import java.util.Properties

import coursier.paths.Util

import utest._

import scala.collection.JavaConverters._

object UtilTests extends TestSuite {

  private def deleteRecursive(f: File): Unit = {
    if (f.isDirectory)
      f.listFiles().foreach(deleteRecursive)
    if (f.exists())
      f.delete()
  }

  val tests = Tests {
    "createDirectories fine with sym links" - {
      var tmpDir: Path = null
      try {
        tmpDir = Files.createTempDirectory("coursier-paths-tests")
        val dir = Files.createDirectories(tmpDir.resolve("dir"))
        val link = Files.createSymbolicLink(tmpDir.resolve("link"), dir)
        Util.createDirectories(link) // should not throw
      } finally {
        deleteRecursive(tmpDir.toFile)
      }
    }

    "property expansion" - {
      "simple" - {
        val map = Map("something" -> "value", "other" -> "a")
        val sysProps = new Properties
        sysProps.setProperty("foo", "FOO")
        val toSet = Util.expandProperties(sysProps, map.asJava)
          .asScala
          .toVector
          .sorted
        val expected = map.toVector.sorted
        assert(toSet == expected)
      }

      "substitution" - {
        val map = Map("something" -> "value ${foo}", "other" -> "a")
        val sysProps = new Properties
        sysProps.setProperty("foo", "FOO")
        val toSet = Util.expandProperties(sysProps, map.asJava)
          .asScala
          .toVector
          .sorted
        val expected = Seq("something" -> "value FOO", "other" -> map("other")).sorted
        assert(toSet == expected)
      }

      "optional value" - {
        val map = Map("something" -> "value", "foo?" -> "A")
        val sysProps = new Properties
        sysProps.setProperty("foo", "FOO")
        val toSet = Util.expandProperties(sysProps, map.asJava)
          .asScala
          .toVector
          .sorted
        val expected = Seq("something" -> "value")
        assert(toSet == expected)
      }
    }
  }

}

Source File: FileCredentials.scala From coursier with Apache License 2.0

5 votes

package coursier.credentials

import java.io.{File, FileInputStream, StringReader}
import java.nio.charset.Charset
import java.nio.file.{Files, Paths}
import java.util.Properties

import dataclass.data

import scala.collection.JavaConverters._

@data class FileCredentials(
  path: String,
  optional: Boolean = true
) extends Credentials {

  def get(): Seq[DirectCredentials] = {

    val f = Paths.get(path)

    if (Files.isRegularFile(f)) {
      val content = new String(Files.readAllBytes(f), Charset.defaultCharset())
      FileCredentials.parse(content, path)
    } else if (optional)
      Nil
    else
      throw new Exception(s"Credential file $path not found")
  }
}

object FileCredentials {

  def parse(content: String, origin: String): Seq[DirectCredentials] = {

    val props = new Properties
    props.load(new StringReader(content))

    val userProps = props
      .propertyNames()
      .asScala
      .map(_.asInstanceOf[String])
      .filter(_.endsWith(".username"))
      .toVector

    userProps.map { userProp =>
      val prefix = userProp.stripSuffix(".username")

      val user = props.getProperty(userProp)
      val password = Option(props.getProperty(s"$prefix.password")).getOrElse {
        throw new Exception(s"Property $prefix.password not found in $origin")
      }

      val host = Option(props.getProperty(s"$prefix.host")).getOrElse {
        throw new Exception(s"Property $prefix.host not found in $origin")
      }

      val realmOpt = Option(props.getProperty(s"$prefix.realm")) // filter if empty?

      val matchHost = Option(props.getProperty(s"$prefix.auto")).fold(DirectCredentials.defaultMatchHost)(_.toBoolean)
      val httpsOnly = Option(props.getProperty(s"$prefix.https-only")).fold(DirectCredentials.defaultHttpsOnly)(_.toBoolean)
      val passOnRedirect = Option(props.getProperty(s"$prefix.pass-on-redirect")).fold(false)(_.toBoolean)

      DirectCredentials(host, user, password)
        .withRealm(realmOpt)
        .withMatchHost(matchHost)
        .withHttpsOnly(httpsOnly)
        .withPassOnRedirect(passOnRedirect)
    }
  }

}

Source File: Bench.scala From akka-nbench with Apache License 2.0

5 votes

package bench

import akka.actor._
import akka.pattern.ask
import akka.util.Timeout

import scala.concurrent.duration._
import scala.reflect.runtime.universe._
import scala.concurrent.Await

import com.typesafe.config._
import net.ceedubs.ficus.Ficus._

import java.util.Properties
import java.nio.file._
import java.util.Date
import java.text.SimpleDateFormat
import java.util.Date

import Tapper._

object Bench extends App {

  def prepareOutputDirs(): String = {
    val csvDateTimeDir = FileSystems.getDefault().getPath(
      "tests/" + new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()))
    Files.createDirectories(csvDateTimeDir)
    val csvSymlink = FileSystems.getDefault().getPath("tests/current")
    if(Files.isSymbolicLink(csvSymlink)){
      Files.delete(csvSymlink)
    } else if (Files.exists(csvSymlink)) {
      throw new NotASymbolicLinkException(s"test/current is not a symbolic link. Path: $csvSymlink")
    }
    Files.createSymbolicLink(csvSymlink, csvDateTimeDir.toAbsolutePath)
    csvDateTimeDir.toAbsolutePath.toString
  }

  def parseOptions(): String = {
     val usage = """
         Usage: activator -mem 4096 "run-main bench.Bench scenario_name"
     """
    if (args.length != 1) println(usage)
    return args(0)
  }

  val scenario = parseOptions
  val config = ConfigFactory.load().getConfig(scenario)
  val duration = config.getInt("duration")
  val concurrent = config.getInt("concurrent")
  val csvDateTimeDir = prepareOutputDirs

  val system = ActorSystem("bench")
  val actorProps = Props(classOf[StatsCollector], csvDateTimeDir, config)
  val statsCollector = system.actorOf(actorProps, name = "statscollector")

  val operationsWithRatio: Map[String, Int] = config.as[Map[String, Int]]("operations")
  val numer = operationsWithRatio.values.sum
  if (concurrent < numer){
    val msg = s"concurrent($concurrent) must greater than sum of operations ratio($numer)"
    System.err.println(msg)
    throw new ApplicationConfigException(msg)
  }
  val operations = for((key, value) <- operationsWithRatio) yield {
    List.range(0, concurrent * operationsWithRatio(key) / numer).map(_ => key)
  }

  implicit val timeout = Timeout(duration * 2, SECONDS)
  var driverClz = Class.forName(config.getString("driver"))
  val drivers = operations.flatten.zipWithIndex.map{ case (operation, i) =>
    system.actorOf(Props(driverClz, operation, statsCollector, config).withDispatcher("my-dispatcher"), name = s"driver_$i")
  }

  drivers.par.map(actor => actor ? Ready()).foreach{ f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  val startAt = new Date()
  val doUntil = new Date(startAt.getTime + duration * 1000)
  drivers.par.map(actor => actor ? Go(doUntil)).foreach { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  (statsCollector ? TearDown()).tap { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  drivers.par.map(actor => actor ? TearDown()).foreach { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  (drivers.head ? TearDown()).tap { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  system.awaitTermination()
}

Source File: WordCount.scala From kafka-streams with Apache License 2.0

5 votes

import java.time.Duration
import java.util.Properties

import org.apache.kafka.streams.kstream.Materialized
import org.apache.kafka.streams.scala.ImplicitConversions._
import org.apache.kafka.streams.scala._
import org.apache.kafka.streams.scala.kstream._
import org.apache.kafka.streams.{KafkaStreams, StreamsConfig}

object WordCount extends App {
  import Serdes._

  val props: Properties = {
    val p = new Properties()
    p.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-modified")
    p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
    p
  }

  val builder: StreamsBuilder = new StreamsBuilder
  val textLines: KStream[String, String] = builder.stream[String, String]("text_lines")
  val wordCounts: KTable[String, Long] = textLines
    .flatMapValues(textLine => textLine.toLowerCase.split("\\W+"))
    .groupBy((_, word) => word)
    .count()
  wordCounts.toStream.to("word_count_results")

  val streams: KafkaStreams = new KafkaStreams(builder.build(), props)
  streams.start()

  sys.ShutdownHookThread {
     streams.close(Duration.ofSeconds(10))
  }
}

Source File: WordCountTestable.scala From kafka-streams with Apache License 2.0

5 votes

package com.supergloo

import java.time.Duration
import java.util.Properties

import org.apache.kafka.streams.kstream.Materialized
import org.apache.kafka.streams.scala.ImplicitConversions._
import org.apache.kafka.streams.{KafkaStreams, StreamsConfig, Topology}
import org.apache.kafka.streams.scala.{Serdes, StreamsBuilder}
import org.apache.kafka.streams.scala.kstream.{KStream, KTable}

class WordCountTestable {

  import Serdes._

  val props: Properties = {
    val p = new Properties()
    p.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-modified")
    p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
    p
  }

  def countNumberOfWords(inputTopic: String,
                         outputTopic: String, storeName: String): Topology = {

    val builder: StreamsBuilder = new StreamsBuilder
    val textLines: KStream[String, String] = builder.stream[String, String](inputTopic)
    val wordCounts: KTable[String, Long] = textLines
      .flatMapValues(textLine => textLine.toLowerCase.split("\\W+"))
      .groupBy((_, word) => word)
      .count()(Materialized.as("counts-store"))
    wordCounts.toStream.to(outputTopic)
    builder.build()
  }

  def toLowerCaseStream(inputTopic: String, outputTopic: String): Topology = {
    val builder: StreamsBuilder = new StreamsBuilder()
    val textLines: KStream[String, String] = builder.stream(inputTopic)
    val wordCounts: KStream[String, String] = textLines
      .mapValues(textLine => textLine.toLowerCase)
    wordCounts.to(outputTopic)
    builder.build()
  }
}

object WordCountTestable extends WordCountTestable {

  def main(args: Array[String]): Unit = {
    val builder: Topology = countNumberOfWords("input-topic", "output-topic", "counts-store")
    val streams: KafkaStreams = new KafkaStreams(builder, props)
    streams.start()

    sys.ShutdownHookThread {
      streams.close(Duration.ofSeconds(10))
    }
  }
}

java.util.Properties Scala Examples