java.util.Properties Scala Examples
The following examples show how to use java.util.Properties.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HasDex.scala From matcher with MIT License | 8 votes |
package com.wavesplatform.dex.it.dex import java.util.Properties import java.util.concurrent.ThreadLocalRandom import cats.Functor import com.typesafe.config.{Config, ConfigFactory} import com.wavesplatform.dex.it.api.BaseContainersKit import com.wavesplatform.dex.it.docker.DexContainer import com.wavesplatform.dex.it.fp.CanExtract import mouse.any._ import org.apache.kafka.clients.admin.{AdminClient, NewTopic} import scala.collection.JavaConverters._ trait HasDex { self: BaseContainersKit => private val defaultTag = Option(System.getenv("DEX_TAG")).getOrElse("latest") protected implicit def toDexExplicitGetOps[F[_]: CanExtract: Functor](self: DexApi[F]): DexApiOps.ExplicitGetDexApiOps[F] = { new DexApiOps.ExplicitGetDexApiOps[F](self) } protected def dexInitialSuiteConfig: Config = ConfigFactory.empty() protected lazy val dexRunConfig: Config = dexQueueConfig(ThreadLocalRandom.current.nextInt(0, Int.MaxValue)) protected def kafkaServer: Option[String] = Option { System.getenv("KAFKA_SERVER") } protected def dexQueueConfig(queueId: Int): Config = { kafkaServer.fold { ConfigFactory.empty() } { kafkaServer => ConfigFactory.parseString(s"""waves.dex.events-queue { | type = kafka | kafka { | servers = "$kafkaServer" | topic = "dex-$queueId" | } |}""".stripMargin) } } protected def createDex(name: String, runConfig: Config = dexRunConfig, suiteInitialConfig: Config = dexInitialSuiteConfig, tag: String = defaultTag): DexContainer = DexContainer(name, networkName, network, getIp(name), runConfig, suiteInitialConfig, localLogsDir, tag) unsafeTap addKnownContainer lazy val dex1: DexContainer = createDex("dex-1") protected def createKafkaTopic(name: String): Unit = kafkaServer.foreach { server => val properties = new Properties() properties.putAll( Map( "bootstrap.servers" -> server, "group.id" -> s"create-$name", "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer" ).asJava ) val adminClient = AdminClient.create(properties) try { val newTopic = new NewTopic(name, 1, 1.toShort) adminClient.createTopics(java.util.Collections.singletonList(newTopic)) } finally { adminClient.close() } } }
Example 2
Source File: DNSstat.scala From jdbcsink with Apache License 2.0 | 6 votes |
import org.apache.spark.sql.SparkSession import java.util.Properties import org.apache.spark.sql.types._ import org.apache.spark.sql.functions.{from_json,window} import java.sql.{Connection,Statement,DriverManager} import org.apache.spark.sql.ForeachWriter import org.apache.spark.sql.Row class JDBCSink() extends ForeachWriter[Row]{ val driver = "com.mysql.jdbc.Driver" var connection:Connection = _ var statement:Statement = _ def open(partitionId: Long,version: Long): Boolean = { Class.forName(driver) connection = DriverManager.getConnection("jdbc:mysql://10.88.1.102:3306/aptwebservice", "root", "mysqladmin") statement = connection.createStatement true } def process(value: Row): Unit = { statement.executeUpdate("replace into DNSStat(ip,domain,time,count) values(" + "'" + value.getString(0) + "'" + ","//ip + "'" + value.getString(1) + "'" + ","//domain + "'" + value.getTimestamp(2) + "'" + "," //time + value.getLong(3) //count + ")") } def close(errorOrNull: Throwable): Unit = { connection.close } } object DNSstatJob{ val schema: StructType = StructType( Seq(StructField("Vendor", StringType,true), StructField("Id", IntegerType,true), StructField("Time", LongType,true), StructField("Conn", StructType(Seq( StructField("Proto", IntegerType, true), StructField("Sport", IntegerType, true), StructField("Dport", IntegerType, true), StructField("Sip", StringType, true), StructField("Dip", StringType, true) )), true), StructField("Dns", StructType(Seq( StructField("Domain", StringType, true), StructField("IpCount", IntegerType, true), StructField("Ip", StringType, true) )), true))) def main(args: Array[String]) { val spark=SparkSession .builder .appName("DNSJob") .config("spark.some.config.option", "some-value") .getOrCreate() import spark.implicits._ val connectionProperties = new Properties() connectionProperties.put("user", "root") connectionProperties.put("password", "mysqladmin") val bruteForceTab = spark.read .jdbc("jdbc:mysql://10.88.1.102:3306/aptwebservice", "DNSTab",connectionProperties) bruteForceTab.registerTempTable("DNSTab") val lines = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", "10.94.1.110:9092") .option("subscribe","xdr") //.option("startingOffsets","earliest") .option("startingOffsets","latest") .load() .select(from_json($"value".cast(StringType),schema).as("jsonData")) lines.registerTempTable("xdr") val filterDNS = spark.sql("select CAST(from_unixtime(xdr.jsonData.Time DIV 1000000) as timestamp) as time,xdr.jsonData.Conn.Sip as sip, xdr.jsonData.Dns.Domain from xdr inner join DNSTab on xdr.jsonData.Dns.domain = DNSTab.domain") val windowedCounts = filterDNS .withWatermark("time","5 minutes") .groupBy(window($"time", "1 minutes", "1 minutes"),$"sip",$"domain") .count() .select($"sip",$"domain",$"window.start",$"count") val writer = new JDBCSink() val query = windowedCounts .writeStream .foreach(writer) .outputMode("update") .option("checkpointLocation","/checkpoint/") .start() query.awaitTermination() } }
Example 3
Source File: ExampleExternalStateSpec.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.example import java.util.Properties import com.typesafe.config.ConfigFactory import io.amient.affinity.core.cluster.Node import io.amient.affinity.core.util.AffinityTestBase import io.amient.affinity.kafka.EmbeddedKafka import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.scalatest.concurrent.TimeLimitedTests import org.scalatest.time.{Millis, Span} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import scala.collection.JavaConverters._ class ExampleExternalStateSpec extends FlatSpec with AffinityTestBase with EmbeddedKafka with Matchers with BeforeAndAfterAll with TimeLimitedTests { override def numPartitions = 2 val config = configure(ConfigFactory.load("example-external-state")) val topic = config.getString("affinity.keyspace.external.state.news.storage.kafka.topic") val node = new Node(configure(config, Some(zkConnect), Some(kafkaBootstrap))) override def beforeAll: Unit = try { createTopic(topic) val externalProducer = createKafkaAvroProducer[String, String]() try { externalProducer.send(new ProducerRecord(topic, "10:30", "the universe is expanding")) externalProducer.send(new ProducerRecord(topic, "11:00", "the universe is still expanding")) externalProducer.send(new ProducerRecord(topic, "11:30", "the universe briefly contracted but is expanding again")) externalProducer.flush() } finally { externalProducer.close() } //the external fixture is produced and the externalProducer is flushed() before the node is started node.start() node.awaitClusterReady() //at this point all stores have loaded everything available in the external topic so the test will be deterministic } finally { super.beforeAll() } override def afterAll: Unit = try { node.shutdown() } finally { super.afterAll() } behavior of "External State" val timeLimit = Span(5000, Millis) //it should be much faster but sometimes many tests are run at the same time it should "start automatically tailing state partitions on startup even when master" in { //we don't need an arbitrary sleep to ensure the tailing state catches up with the writes above //before we fetch the latest news because the watermark is built into the request to make the test fast and deterministic val response = node.get_text(node.http_get(s"/news/latest")) response should include("10:30\tthe universe is expanding") response should include("11:00\tthe universe is still expanding") response should include("11:30\tthe universe briefly contracted but is expanding again") } private def createKafkaAvroProducer[K, V]() = new KafkaProducer[K, V](new Properties { put("bootstrap.servers", kafkaBootstrap) put("acks", "1") put("key.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer") put("value.serializer", "io.amient.affinity.kafka.KafkaAvroSerializer") //this simply adds all configs required by KafkaAvroSerializer config.getConfig("affinity.avro").entrySet().asScala.foreach { case (entry) => put(entry.getKey, entry.getValue.unwrapped()) } }) }
Example 4
Source File: AvroMessageFormatterMain.scala From affinity with Apache License 2.0 | 5 votes |
import java.time.temporal.ChronoUnit import java.util.Properties import io.amient.affinity.kafka.AvroMessageFormatter import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.common.serialization.ByteArrayDeserializer import scala.collection.JavaConverters._ object AvroMessageFormatterMain extends App { val consumerProps = new Properties() consumerProps.put("bootstrap.servers", args(0)) consumerProps.put("security.protocol", "SASL_SSL") consumerProps.put("sasl.mechanism", "PLAIN") consumerProps.put("client.id", "console") consumerProps.put("group.id", "console") consumerProps.put("key.deserializer", classOf[ByteArrayDeserializer]) consumerProps.put("value.deserializer", classOf[ByteArrayDeserializer]) consumerProps.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required username='" + args(1) + "' password='" + args(2) + "';") consumerProps.put("schema.registry.url", args(3)) val c = new KafkaConsumer[Array[Byte], Array[Byte]](consumerProps) c.subscribe(List(args(4)).asJava) val formatter = new AvroMessageFormatter() formatter.init(consumerProps) while (true) { c.poll(java.time.Duration.of(1, ChronoUnit.SECONDS)).asScala.foreach { record => formatter.writeTo(record, System.out) } } }
Example 5
Source File: EmbeddedConfluentRegistry.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.kafka import java.nio.channels.ServerSocketChannel import java.util.Properties import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient import io.confluent.kafka.schemaregistry.rest.{SchemaRegistryConfig, SchemaRegistryRestApplication} import org.scalatest.{BeforeAndAfterAll, Suite} import org.slf4j.LoggerFactory trait EmbeddedConfluentRegistry extends EmbeddedKafka with BeforeAndAfterAll { self: Suite => private val log = LoggerFactory.getLogger(classOf[EmbeddedConfluentRegistry]) private val registryConfig: SchemaRegistryConfig = new SchemaRegistryConfig(new Properties() { put("listeners", s"http://127.0.0.1:0") put("kafkastore.connection.url", zkConnect) put("avro.compatibility.level", "full") put("kafkastore.init.timeout.ms", "5000") put("kafkastore.topic", "_schemas") put("debug", "true") }) private val app = new SchemaRegistryRestApplication(registryConfig) private val registry = app.createServer registry.start() val registryUrl = s"http://127.0.0.1:" + registry.getConnectors.head.getTransport.asInstanceOf[ServerSocketChannel].socket.getLocalPort log.info("Confluent schema registry listening at: " + registryUrl) val registryClient = new CachedSchemaRegistryClient(registryUrl, 20) abstract override def afterAll() = try { registry.stop() } finally { super.afterAll() } }
Example 6
Source File: EmbeddedKafkaServer.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.kafka import java.io.File import java.util.Properties import java.util.concurrent.TimeUnit import kafka.server.{KafkaConfig, KafkaServerStartable} import kafka.zk.BrokerIdZNode import org.I0Itec.zkclient.ZkClient import org.I0Itec.zkclient.serialize.ZkSerializer import org.apache.kafka.clients.admin.{AdminClient, NewTopic} import org.apache.kafka.common.network.ListenerName import org.apache.kafka.common.security.auth.SecurityProtocol import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ import scala.collection.mutable trait EmbeddedKafkaServer extends EmbeddedService with EmbeddedZookeperServer { private val log = LoggerFactory.getLogger(classOf[EmbeddedKafka]) def numPartitions: Int private val embeddedKafkaPath = new File(testDir, "local-kafka-logs") private val kafkaConfig = new KafkaConfig(new Properties { put("broker.id", "1") put("host.name", "localhost") put("port", "0") put("log.dir", embeddedKafkaPath.toString) put("num.partitions", numPartitions.toString) put("auto.create.topics.enable", "false") put("delete.topic.enable", "true") put("zookeeper.connect", zkConnect) put("offsets.topic.replication.factor", "1") }) private val kafka = new KafkaServerStartable(kafkaConfig) kafka.startup() lazy val admin = AdminClient.create(Map[String, AnyRef]("bootstrap.servers" -> kafkaBootstrap).asJava) def createTopic(name: String): Unit = { admin.createTopics(List(new NewTopic(name, numPartitions, 1)).asJava).all().get(30, TimeUnit.SECONDS) } def listTopics: mutable.Set[String] = { admin.listTopics().names().get(1, TimeUnit.SECONDS).asScala } val tmpZkClient = new ZkClient(zkConnect, 5000, 6000, new ZkSerializer { def serialize(o: Object): Array[Byte] = o.toString.getBytes override def deserialize(bytes: Array[Byte]): Object = new String(bytes) }) val broker = BrokerIdZNode.decode(1, tmpZkClient.readData[String]("/brokers/ids/1").getBytes("utf-8")).broker val kafkaBootstrap = broker.brokerEndPoint(ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT)).connectionString() tmpZkClient.close log.info(s"Embedded Kafka $kafkaBootstrap, data dir: $testDir") abstract override def close(): Unit = try kafka.shutdown finally super.close }
Example 7
Source File: TransactionalProducer.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.kafka import java.util.Properties import akka.actor.Actor import akka.actor.Status.{Failure, Success} import akka.event.Logging import com.typesafe.config.Config import io.amient.affinity.Conf import io.amient.affinity.core.actor.{TransactionAbort, TransactionBegin, TransactionCommit, TransactionalRecord} import io.amient.affinity.core.config.CfgStruct import io.amient.affinity.core.storage.StorageConf import io.amient.affinity.kafka.KafkaStorage.{KafkaConsumerConf, KafkaProducerConf} import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} import org.apache.kafka.common.serialization.ByteArraySerializer import scala.collection.JavaConverters._ object KafkaConf extends KafkaConf { override def apply(config: Config): KafkaConf = new KafkaConf().apply(config) } class KafkaConf extends CfgStruct[KafkaConf](classOf[StorageConf]) { val BootstrapServers = string("kafka.bootstrap.servers", true).doc("kafka connection string used for consumer and/or producer") val Producer = struct("kafka.producer", new KafkaProducerConf, false).doc("any settings that the underlying version of kafka producer client supports") val Consumer = struct("kafka.consumer", new KafkaConsumerConf, false).doc("any settings that the underlying version of kafka consumer client supports") } class TransactionalProducer extends Actor { val logger = Logging.getLogger(context.system, this) private[this] var producer: KafkaProducer[Array[Byte], Array[Byte]] = null val kafkaConf = KafkaConf(Conf(context.system.settings.config).Affi.Storage) val producerConfig = new Properties() { if (kafkaConf.Producer.isDefined) { val producerConfig = kafkaConf.Producer.toMap() if (producerConfig.containsKey("bootstrap.servers")) throw new IllegalArgumentException("bootstrap.servers cannot be overriden for KafkaStroage producer") if (producerConfig.containsKey("key.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom key.serializer") if (producerConfig.containsKey("value.serializer")) throw new IllegalArgumentException("Binary kafka stream cannot use custom value.serializer") producerConfig.entrySet.asScala.filter(_.getValue.isDefined).foreach { case (entry) => put(entry.getKey, entry.getValue.apply.toString) } } put("bootstrap.servers", kafkaConf.BootstrapServers()) put("value.serializer", classOf[ByteArraySerializer].getName) put("key.serializer", classOf[ByteArraySerializer].getName) } override def receive: Receive = { case req@TransactionBegin(transactionalId) => req(sender) ! { if (producer == null) { producerConfig.put("transactional.id", transactionalId) producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfig) logger.debug(s"Transactions.Init(transactional.id = $transactionalId)") producer.initTransactions() } logger.debug("Transactions.Begin()") producer.beginTransaction() } case TransactionalRecord(topic, key, value, timestamp, partition) => val replyto = sender val producerRecord = new ProducerRecord( topic, partition.map(new Integer(_)).getOrElse(null), timestamp.map(new java.lang.Long(_)).getOrElse(null), key, value) logger.debug(s"Transactions.Append(topic=$topic)") producer.send(producerRecord, new Callback { override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = { if (exception != null) { replyto ! Failure(exception) } else { replyto ! Success(metadata.offset()) } } }) case req@TransactionCommit() => req(sender) ! { logger.debug("Transactions.Commit()") producer.commitTransaction() } case req@TransactionAbort() => req(sender) ! { logger.debug("Transactions.Abort()") producer.abortTransaction() } } }
Example 8
Source File: KafkaAvroConsumer.scala From sqs-kafka-connect with Apache License 2.0 | 5 votes |
package com.hivehome.kafka.connect.sqs import java.util.{Properties, UUID} import io.confluent.kafka.serializers.KafkaAvroDeserializer import org.apache.kafka.clients.consumer.{ConsumerConfig => ConsumerConfigConst, KafkaConsumer} import org.apache.kafka.common.serialization.ByteArrayDeserializer import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ import scala.concurrent.duration._ import scala.language.postfixOps def poll(numberOfMessagesExpected: Int, timeout: FiniteDuration = 30 seconds, accept: V => Boolean = _ => true): Vector[V] = { val deadline = timeout.fromNow var messages = Vector.empty[V] while (deadline.hasTimeLeft && messages.size < numberOfMessagesExpected) { val records = cons.poll(PollingInterval) // convert to Seq so that we have all the messages once we have // exhausted the iterator val msgsSeq = records.iterator().asScala.toSeq messages = messages ++ msgsSeq.map(_.value()).filter(accept).toVector } logger.debug("Number of messages received {}", messages.size) if (messages.size < numberOfMessagesExpected) { throw new AssertionError(s"Expected $numberOfMessagesExpected messages within $timeout, but only received ${messages.size}. $messages") } // Possibly throw exception if too many messages? messages } } object KafkaAvroConsumer { val logger = LoggerFactory.getLogger(getClass) def apply[K, V](kafkaProps: Map[String, String], topicName: String): KafkaAvroConsumer[K, V] = { val props = new Properties() props.putAll(kafkaProps.asJava) props.put(ConsumerConfigConst.GROUP_ID_CONFIG, "test" + UUID.randomUUID().toString.substring(0, 10)) props.put(ConsumerConfigConst.KEY_DESERIALIZER_CLASS_CONFIG, classOf[ByteArrayDeserializer]) props.put(ConsumerConfigConst.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[KafkaAvroDeserializer]) logger.info(s"Consuming from $topicName with properties $props") val cons = new KafkaConsumer[K, V](props) cons.subscribe(Seq(topicName).asJava) new KafkaAvroConsumer(cons) } }
Example 9
Source File: package.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn import java.util.Properties import scala.util.Try import yaooqinn.kyuubi.service.ServiceException package object kyuubi { private object BuildInfo { private val buildFile = "kyuubi-version-info.properties" private val buildFileStream = Thread.currentThread().getContextClassLoader.getResourceAsStream(buildFile) private val unknown = "<unknown>" private val props = new Properties() try { props.load(buildFileStream) } catch { case e: Exception => throw new ServiceException(e) } finally { Try(buildFileStream.close()) } val version: String = props.getProperty("kyuubi_version", unknown) val sparkVersion: String = props.getProperty("spark_version", unknown) val branch: String = props.getProperty("branch", unknown) val jar: String = props.getProperty("jar", unknown) val revision: String = props.getProperty("revision", unknown) val user: String = props.getProperty("user", unknown) val repoUrl: String = props.getProperty("url", unknown) val buildDate: String = props.getProperty("date", unknown) } val KYUUBI_VERSION: String = BuildInfo.version val SPARK_COMPILE_VERSION: String = BuildInfo.sparkVersion val BRANCH: String = BuildInfo.branch val KYUUBI_JAR_NAME: String = BuildInfo.jar val REVISION: String = BuildInfo.revision val BUILD_USER: String = BuildInfo.user val REPO_URL: String = BuildInfo.repoUrl val BUILD_DATE: String = BuildInfo.buildDate }
Example 10
Source File: packageSuite.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi import java.util.Properties import org.apache.spark.SparkFunSuite class packageSuite extends SparkFunSuite { test("build info") { val buildFile = "kyuubi-version-info.properties" val str = Thread.currentThread().getContextClassLoader.getResourceAsStream(buildFile) val props = new Properties() assert(str !== null) props.load(str) str.close() assert(props.getProperty("kyuubi_version") === KYUUBI_VERSION) assert(props.getProperty("spark_version") === SPARK_COMPILE_VERSION) assert(props.getProperty("branch") === BRANCH) assert(props.getProperty("jar") === KYUUBI_JAR_NAME) assert(props.getProperty("revision") === REVISION) assert(props.getProperty("user") === BUILD_USER) assert(props.getProperty("url") === REPO_URL) } }
Example 11
Source File: Utils.scala From kyuubi with Apache License 2.0 | 5 votes |
package org.apache.kyuubi import java.io.{File, InputStreamReader, IOException} import java.net.{URI, URISyntaxException} import java.nio.charset.StandardCharsets import java.util.{Properties, UUID} import scala.collection.JavaConverters._ import scala.util.{Success, Try} private[kyuubi] object Utils extends Logging { import org.apache.kyuubi.config.KyuubiConf._ def strToSeq(s: String): Seq[String] = { require(s != null) s.split(",").map(_.trim).filter(_.nonEmpty) } def getSystemProperties: Map[String, String] = { sys.props.toMap } def getDefaultPropertiesFile(env: Map[String, String] = sys.env): Option[File] = { env.get(KYUUBI_CONF_DIR) .orElse(env.get(KYUUBI_HOME).map(_ + File.separator + "/conf")) .map( d => new File(d + File.separator + KYUUBI_CONF_FILE_NAME)) .filter(f => f.exists() && f.isFile) } def getPropertiesFromFile(file: Option[File]): Map[String, String] = { file.map { f => info(s"Loading Kyuubi properties from ${f.getAbsolutePath}") val reader = new InputStreamReader(f.toURI.toURL.openStream(), StandardCharsets.UTF_8) try { val properties = new Properties() properties.load(reader) properties.stringPropertyNames().asScala.map { k => (k, properties.getProperty(k).trim) }.toMap } catch { case e: IOException => throw new KyuubiException( s"Failed when loading Kyuubi properties from ${f.getAbsolutePath}", e) } finally { reader.close() } }.getOrElse(Map.empty) } def createTempDir( root: String = System.getProperty("java.io.tmpdir"), namePrefix: String = "kyuubi"): File = { val dir = createDirectory(root, namePrefix) dir.deleteOnExit() dir } }
Example 12
Source File: package.scala From kyuubi with Apache License 2.0 | 5 votes |
package org.apache import java.util.Properties import scala.util.Try package object kyuubi { private object BuildInfo { private val buildFile = "kyuubi-version-info.properties" private val buildFileStream = Thread.currentThread().getContextClassLoader.getResourceAsStream(buildFile) private val unknown = "<unknown>" private val props = new Properties() try { props.load(buildFileStream) } catch { case e: Exception => throw new KyuubiException(e) } finally { Try(buildFileStream.close()) } val version: String = props.getProperty("kyuubi_version", unknown) val sparkVersion: String = props.getProperty("spark_version", unknown) val branch: String = props.getProperty("branch", unknown) val jar: String = props.getProperty("jar", unknown) val revision: String = props.getProperty("revision", unknown) val user: String = props.getProperty("user", unknown) val repoUrl: String = props.getProperty("url", unknown) val buildDate: String = props.getProperty("date", unknown) } val KYUUBI_VERSION: String = BuildInfo.version val SPARK_COMPILE_VERSION: String = BuildInfo.sparkVersion val BRANCH: String = BuildInfo.branch val KYUUBI_JAR_NAME: String = BuildInfo.jar val REVISION: String = BuildInfo.revision val BUILD_USER: String = BuildInfo.user val REPO_URL: String = BuildInfo.repoUrl val BUILD_DATE: String = BuildInfo.buildDate }
Example 13
Source File: MetadataIteratorSpec.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.iterator import java.nio.file.Paths import java.util.{Properties, UUID} import org.apache.commons.io.FileUtils import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{Metadata, StringType, StructType} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import tech.sourced.engine.{BaseSparkSpec, Schema} class JDBCQueryIteratorSpec extends FlatSpec with Matchers with BeforeAndAfterAll with BaseSparkSpec { private val tmpPath = Paths.get( System.getProperty("java.io.tmpdir"), UUID.randomUUID.toString ) private val dbPath = tmpPath.resolve("test.db") override def beforeAll(): Unit = { super.beforeAll() tmpPath.toFile.mkdir() val rdd = ss.sparkContext.parallelize(Seq( Row("id1"), Row("id2"), Row("id3") )) val properties = new Properties() properties.put("driver", "org.sqlite.JDBC") val df = ss.createDataFrame(rdd, StructType(Seq(Schema.repositories.head))) df.write.jdbc(s"jdbc:sqlite:${dbPath.toString}", "repositories", properties) } override def afterAll(): Unit = { super.afterAll() FileUtils.deleteQuietly(tmpPath.toFile) } "JDBCQueryIterator" should "return all rows for the query" in { val iter = new JDBCQueryIterator( Seq(attr("id")), dbPath.toString, "SELECT id FROM repositories ORDER BY id" ) // calling hasNext more than one time does not cause rows to be lost iter.hasNext iter.hasNext val rows = (for (row <- iter) yield row).toArray rows.length should be(3) rows(0).length should be(1) rows(0)(0).toString should be("id1") rows(1)(0).toString should be("id2") rows(2)(0).toString should be("id3") } private def attr(name: String): Attribute = AttributeReference( name, StringType, nullable = false, Metadata.empty )() }
Example 14
Source File: EmailNotifierConfig.scala From vinyldns with Apache License 2.0 | 5 votes |
package vinyldns.api.notifier.email import scala.collection.JavaConverters._ import javax.mail.Address import javax.mail.internet.InternetAddress import pureconfig.ConfigReader import scala.util.Try import pureconfig.error.CannotConvert import java.util.Properties import com.typesafe.config.{ConfigObject, ConfigValue} import com.typesafe.config.ConfigValueType object EmailNotifierConfig { implicit val smtpPropertiesReader: ConfigReader[Properties] = { ConfigReader[ConfigObject].map { config => val props = new Properties() def convertToProperties(baseKey: String, config: ConfigObject): Unit = config.keySet().asScala.foreach { case key => config.get(key) match { case value: ConfigObject => convertToProperties(s"${baseKey}.${key}", value) case value: ConfigValue if value.valueType != ConfigValueType.NULL => props.put(s"${baseKey}.${key}", value.unwrapped()) case _ => } } convertToProperties("mail.smtp", config) props } } implicit val addressReader: ConfigReader[Address] = ConfigReader[String].emap { s => Try(new InternetAddress(s)).toEither.left.map { exc => CannotConvert(s, "InternetAddress", exc.getMessage) } } } case class EmailNotifierConfig(from: Address, smtp: Properties = new Properties())
Example 15
Source File: UserBehaviorMsgProducer.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming.IBMKafkaStream import scala.util.Random import java.util.Properties import kafka.producer.KeyedMessage import kafka.producer.ProducerConfig import kafka.producer.Producer class UserBehaviorMsgProducer(brokers: String, topic: String) extends Runnable { private val brokerList = brokers private val targetTopic = topic private val props = new Properties() props.put("metadata.broker.list", this.brokerList) props.put("serializer.class", "kafka.serializer.StringEncoder") props.put("producer.type", "async") private val config = new ProducerConfig(this.props) private val producer = new Producer[String, String](this.config) private val PAGE_NUM = 100 private val MAX_MSG_NUM = 3 private val MAX_CLICK_TIME = 5 private val MAX_STAY_TIME = 10 //Like,1;Dislike -1;No Feeling 0 private val LIKE_OR_NOT = Array[Int](1, 0, -1) def run(): Unit = { val rand = new Random() while (true) { //how many user behavior messages will be produced val msgNum = rand.nextInt(MAX_MSG_NUM) + 1 try { //generate the message with format like page1|2|7.123|1 //网页 ID|点击次数|停留时间 (分钟)|是否点赞 //(page001.html, 1, 0.5, 1) //向量的第一项表示网页的ID,第二项表示从进入网站到离开对该网页的点击次数,第三项表示停留时间,以分钟为单位,第四项是代表是否点赞,1 为赞,-1 表示踩,0 表示中立。 for (i <- 0 to msgNum) { var msg = new StringBuilder() msg.append("page" + (rand.nextInt(PAGE_NUM) + 1)) msg.append("|") msg.append(rand.nextInt(MAX_CLICK_TIME) + 1) msg.append("|") msg.append(rand.nextInt(MAX_CLICK_TIME) + rand.nextFloat()) msg.append("|") msg.append(LIKE_OR_NOT(rand.nextInt(3))) println(msg.toString()) //send the generated message to broker sendMessage(msg.toString()) } println("%d user behavior messages produced.".format(msgNum+1)) } catch { case e: Exception => println(e) } try { //sleep for 5 seconds after send a micro batch of message //每隔 5 秒钟会随机的向 user-behavior-topic 主题推送 0 到 50 条行为数据消息 Thread.sleep(5000) } catch { case e: Exception => println(e) } } } def sendMessage(message: String) = { try { val data = new KeyedMessage[String, String](this.topic, message); producer.send(data); } catch { case e:Exception => println(e) } } } object UserBehaviorMsgProducerClient { def main(args: Array[String]) { //start the message producer thread val Array(zkServers,processingInterval) = Array("192.168.200.80:9092","topic")//args new Thread(new UserBehaviorMsgProducer(zkServers,processingInterval)).start() } }
Example 16
Source File: JDBCRelation.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.jdbc import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.Partition import org.apache.spark.rdd.RDD import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, Row, SQLContext, SaveMode} def columnPartition(partitioning: JDBCPartitioningInfo): Array[Partition] = { if (partitioning == null) return Array[Partition](JDBCPartition(null, 0)) val numPartitions = partitioning.numPartitions val column = partitioning.column if (numPartitions == 1) return Array[Partition](JDBCPartition(null, 0)) // Overflow and silliness can happen if you subtract then divide. // Here we get a little roundoff, but that's (hopefully) OK. val stride: Long = (partitioning.upperBound / numPartitions - partitioning.lowerBound / numPartitions) var i: Int = 0 var currentValue: Long = partitioning.lowerBound var ans = new ArrayBuffer[Partition]() while (i < numPartitions) { val lowerBound = if (i != 0) s"$column >= $currentValue" else null currentValue += stride val upperBound = if (i != numPartitions - 1) s"$column < $currentValue" else null val whereClause = if (upperBound == null) { lowerBound } else if (lowerBound == null) { upperBound } else { s"$lowerBound AND $upperBound" } ans += JDBCPartition(whereClause, i) i = i + 1 } ans.toArray } } private[sql] case class JDBCRelation( url: String, table: String, parts: Array[Partition], properties: Properties = new Properties())(@transient val sqlContext: SQLContext) extends BaseRelation with PrunedFilteredScan with InsertableRelation { override val needConversion: Boolean = false override val schema: StructType = JDBCRDD.resolveTable(url, table, properties) override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { val driver: String = DriverRegistry.getDriverClassName(url) // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row] JDBCRDD.scanTable( sqlContext.sparkContext, schema, driver, url, properties, table, requiredColumns, filters, parts).asInstanceOf[RDD[Row]] } override def insert(data: DataFrame, overwrite: Boolean): Unit = { data.write .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append) .jdbc(url, table, properties) } }
Example 17
Source File: DefaultSource.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.jdbc import java.util.Properties import org.apache.spark.sql.SQLContext import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, DataSourceRegister} class DefaultSource extends RelationProvider with DataSourceRegister { override def shortName(): String = "jdbc" override def createRelation( sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { val url = parameters.getOrElse("url", sys.error("Option 'url' not specified")) val driver = parameters.getOrElse("driver", null) val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified")) val partitionColumn = parameters.getOrElse("partitionColumn", null) val lowerBound = parameters.getOrElse("lowerBound", null) val upperBound = parameters.getOrElse("upperBound", null) val numPartitions = parameters.getOrElse("numPartitions", null) if (driver != null) DriverRegistry.register(driver) if (partitionColumn != null && (lowerBound == null || upperBound == null || numPartitions == null)) { sys.error("Partitioning incompletely specified") } val partitionInfo = if (partitionColumn == null) { null } else { JDBCPartitioningInfo( partitionColumn, lowerBound.toLong, upperBound.toLong, numPartitions.toInt) } val parts = JDBCRelation.columnPartition(partitionInfo) //我们将传递给getConnection的其他属性 val properties = new Properties() // Additional properties that we will pass to getConnection parameters.foreach(kv => properties.setProperty(kv._1, kv._2)) JDBCRelation(url, table, parts, properties)(sqlContext) } }
Example 18
Source File: DefaultSource.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.util.Properties import org.apache.spark.sql.SQLContext import org.apache.spark.sql.execution.datasources.jdbc.{JDBCRelation, JDBCPartitioningInfo, DriverRegistry} import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider} class DefaultSource extends RelationProvider with DataSourceRegister { override def shortName(): String = "jdbc" override def createRelation( sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { val url = parameters.getOrElse("url", sys.error("Option 'url' not specified")) val driver = parameters.getOrElse("driver", null) val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified")) val partitionColumn = parameters.getOrElse("partitionColumn", null) val lowerBound = parameters.getOrElse("lowerBound", null) val upperBound = parameters.getOrElse("upperBound", null) val numPartitions = parameters.getOrElse("numPartitions", null) if (driver != null) DriverRegistry.register(driver) if (partitionColumn != null && (lowerBound == null || upperBound == null || numPartitions == null)) { sys.error("Partitioning incompletely specified") } val partitionInfo = if (partitionColumn == null) { null } else { JDBCPartitioningInfo( partitionColumn, lowerBound.toLong, upperBound.toLong, numPartitions.toInt) } val parts = JDBCRelation.columnPartition(partitionInfo) //我们将传递给getConnection的其他属性 val properties = new Properties() // Additional properties that we will pass to getConnection parameters.foreach(kv => properties.setProperty(kv._1, kv._2)) JDBCRelation(url, table, parts, properties)(sqlContext) } }
Example 19
Source File: SQLExecutionSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import java.util.Properties import scala.collection.parallel.CompositeThrowable import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.sql.SQLContext //SQL执行测试套件 class SQLExecutionSuite extends SparkFunSuite { //并发查询执行 test("concurrent query execution (SPARK-10548)") { // Try to reproduce the issue with the old SparkContext //尝试旧sparkcontext再现问题 val conf = new SparkConf() .setMaster("local[*]") .setAppName("test") val badSparkContext = new BadSparkContext(conf) try { testConcurrentQueryExecution(badSparkContext) fail("unable to reproduce SPARK-10548") } catch { case e: IllegalArgumentException => assert(e.getMessage.contains(SQLExecution.EXECUTION_ID_KEY)) } finally { badSparkContext.stop() } // Verify that the issue is fixed with the latest SparkContext //验证问题是固定的最新sparkcontext val goodSparkContext = new SparkContext(conf) try { testConcurrentQueryExecution(goodSparkContext) } finally { goodSparkContext.stop() } } private class BadSparkContext(conf: SparkConf) extends SparkContext(conf) { protected[spark] override val localProperties = new InheritableThreadLocal[Properties] { override protected def childValue(parent: Properties): Properties = new Properties(parent) override protected def initialValue(): Properties = new Properties() } }
Example 20
Source File: GangliaSink.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.ganglia.GangliaReporter import info.ganglia.gmetric4j.gmetric.GMetric import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem class GangliaSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GANGLIA_KEY_PERIOD = "period" val GANGLIA_DEFAULT_PERIOD = 10 val GANGLIA_KEY_UNIT = "unit" val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS val GANGLIA_KEY_MODE = "mode" val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST // TTL for multicast messages. If listeners are X hops away in network, must be at least X. val GANGLIA_KEY_TTL = "ttl" val GANGLIA_DEFAULT_TTL = 1 val GANGLIA_KEY_HOST = "host" val GANGLIA_KEY_PORT = "port" def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) { throw new Exception("Ganglia sink requires 'host' property.") } if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) { throw new Exception("Ganglia sink requires 'port' property.") } val host = propertyToOption(GANGLIA_KEY_HOST).get val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL) val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE) .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE) val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt) .getOrElse(GANGLIA_DEFAULT_PERIOD) val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT) .map(u => TimeUnit.valueOf(u.toUpperCase)) .getOrElse(GANGLIA_DEFAULT_UNIT) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val ganglia = new GMetric(host, port, mode, ttl) val reporter: GangliaReporter = GangliaReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(ganglia) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 21
Source File: MetricsConfig.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.util.Utils import org.apache.spark.{Logging, SparkConf} private[spark] class MetricsConfig(conf: SparkConf) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file // 添加默认属性的情况下,没有任何属性文件 setDefaultProperties(properties) loadPropertiesFromFile(conf.getOption("spark.metrics.conf")) // Also look for the properties in provided Spark configuration //还要查找提供的Spark配置中的属性 val prefix = "spark.metrics.conf." conf.getAll.foreach { case (k, v) if k.startsWith(prefix) => properties.setProperty(k.substring(prefix.length()), v) case _ => } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { import scala.collection.JavaConversions._ val defaultProperty = propertyCategories(DEFAULT_PREFIX) for { (inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX) (k, v) <- defaultProperty if (prop.getProperty(k) == null) } { prop.setProperty(k, v) } } } //使用正则匹配properties中以source.开头的属性,然后将属性中的source反映得到的实例加入HashMap def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] import scala.collection.JavaConversions._ prop.foreach { kv => if (regex.findPrefixOf(kv._1).isDefined) { val regex(prefix, suffix) = kv._1 subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } private[this] def loadPropertiesFromFile(path: Option[String]): Unit = { var is: InputStream = null try { is = path match { case Some(f) => new FileInputStream(f) case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME) } if (is != null) { properties.load(is) } } catch { case e: Exception => val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME) logError(s"Error loading configuration file $file", e) } finally { if (is != null) { is.close() } } } }
Example 22
Source File: CsvSink.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.io.File import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.{CsvReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CSV_KEY_PERIOD = "period" val CSV_KEY_UNIT = "unit" val CSV_KEY_DIR = "directory" val CSV_DEFAULT_PERIOD = 10 val CSV_DEFAULT_UNIT = "SECONDS" val CSV_DEFAULT_DIR = "/tmp/" val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CSV_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match { case Some(s) => s case None => CSV_DEFAULT_DIR } val reporter: CsvReporter = CsvReporter.forRegistry(registry) .formatFor(Locale.US) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(new File(pollDir)) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 23
Source File: MetricsServlet.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import javax.servlet.http.HttpServletRequest import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.SecurityManager import org.apache.spark.ui.JettyUtils._ private[spark] class MetricsServlet( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SERVLET_KEY_PATH = "path" val SERVLET_KEY_SAMPLE = "sample" val SERVLET_DEFAULT_SAMPLE = false val servletPath = property.getProperty(SERVLET_KEY_PATH) val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean) .getOrElse(SERVLET_DEFAULT_SAMPLE) val mapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample)) //最终生成处理/metrics/json请求的ServletContextHandler,而请求的真正处理由getMetricsSnapshot方法 //利用fastJson解析,生成 def getHandlers: Array[ServletContextHandler] = { Array[ServletContextHandler]( createServletHandler(servletPath, new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr) ) } def getMetricsSnapshot(request: HttpServletRequest): String = { mapper.writeValueAsString(registry) } override def start() { } override def stop() { } override def report() { } }
Example 24
Source File: Slf4jSink.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.{Slf4jReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class Slf4jSink( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SLF4J_DEFAULT_PERIOD = 10 val SLF4J_DEFAULT_UNIT = "SECONDS" val SLF4J_KEY_PERIOD = "period" val SLF4J_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match { case Some(s) => s.toInt case None => SLF4J_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 25
Source File: ConsoleSink.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.{ConsoleReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CONSOLE_DEFAULT_PERIOD = 10 val CONSOLE_DEFAULT_UNIT = "SECONDS" val CONSOLE_KEY_PERIOD = "period" val CONSOLE_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CONSOLE_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 26
Source File: GraphiteSink.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.net.InetSocketAddress import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.graphite.{GraphiteUDP, Graphite, GraphiteReporter} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GRAPHITE_DEFAULT_PERIOD = 10 val GRAPHITE_DEFAULT_UNIT = "SECONDS" val GRAPHITE_DEFAULT_PREFIX = "" val GRAPHITE_KEY_HOST = "host" val GRAPHITE_KEY_PORT = "port" val GRAPHITE_KEY_PERIOD = "period" val GRAPHITE_KEY_UNIT = "unit" val GRAPHITE_KEY_PREFIX = "prefix" val GRAPHITE_KEY_PROTOCOL = "protocol" def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) { throw new Exception("Graphite sink requires 'host' property.") } if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) { throw new Exception("Graphite sink requires 'port' property.") } val host = propertyToOption(GRAPHITE_KEY_HOST).get val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match { case Some(s) => s.toInt case None => GRAPHITE_DEFAULT_PERIOD } val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT) } val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match { case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port)) case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port)) case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p") } val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .prefixedWith(prefix) .build(graphite) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 27
Source File: ActiveJob.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.Properties import org.apache.spark.TaskContext import org.apache.spark.util.CallSite private[spark] class ActiveJob( val jobId: Int,//每个作业都分配一个唯一的I val finalStage: ResultStage,//最终的stage val func: (TaskContext, Iterator[_]) => _,//作用于最后一个stage上的函数 val partitions: Array[Int],//分区列表,注意这里表示从多少个分区读入数据并进行处理 val callSite: CallSite, val listener: JobListener,//Job监听器 val properties: Properties) { //任务的分区数量 val numPartitions = partitions.length //标识每个partition相关的任务是否完成 val finished = Array.fill[Boolean](numPartitions)(false) //已经完成的任务数 var numFinished = 0 }
Example 28
Source File: ResetSystemProperties.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.util.Properties import org.apache.commons.lang3.SerializationUtils import org.scalatest.{BeforeAndAfterEach, Suite} import org.apache.spark.SparkFunSuite private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite => var oldProperties: Properties = null override def beforeEach(): Unit = { // we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because // the later way of creating a copy does not copy the properties but it initializes a new // Properties object with the given properties as defaults. They are not recognized at all // by standard Scala wrapper over Java Properties then. oldProperties = SerializationUtils.clone(System.getProperties) super.beforeEach() } override def afterEach(): Unit = { try { super.afterEach() } finally { System.setProperties(oldProperties) oldProperties = null } } }
Example 29
Source File: KafkaWriter.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.indexer.writers.kafka import java.util.Properties import com.codahale.metrics.Meter import com.expedia.open.tracing.buffer.SpanBuffer import com.expedia.www.haystack.commons.metrics.MetricsSupport import com.expedia.www.haystack.trace.commons.packer.PackedMessage import com.expedia.www.haystack.trace.indexer.metrics.AppMetricNames import com.expedia.www.haystack.trace.indexer.writers.TraceWriter import org.apache.kafka.clients.producer._ import org.slf4j.LoggerFactory import scala.util.Try object KafkaWriter extends MetricsSupport { protected val kafkaProducerFailures: Meter = metricRegistry.meter(AppMetricNames.KAFKA_PRODUCE_FAILURES) } class KafkaWriter(producerConfig: Properties, topic: String) extends TraceWriter { private val LOGGER = LoggerFactory.getLogger(classOf[KafkaWriter]) private val producer = new KafkaProducer[String, Array[Byte]](producerConfig) override def writeAsync(traceId: String, packedSpanBuffer: PackedMessage[SpanBuffer], isLastSpanBuffer: Boolean): Unit = { val record = new ProducerRecord[String, Array[Byte]](topic, traceId, packedSpanBuffer.packedDataBytes) producer.send(record, (_: RecordMetadata, exception: Exception) => { if (exception != null) { LOGGER.error(s"Fail to write the span buffer record to kafka topic=$topic", exception) KafkaWriter.kafkaProducerFailures.mark() } }) } override def close(): Unit = Try(producer.close()) }
Example 30
Source File: KafkaConfiguration.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.indexer.config.entities import java.util.Properties case class KafkaConfiguration(numStreamThreads: Int, pollTimeoutMs: Long, consumerProps: Properties, producerProps: Properties, produceTopic: String, consumeTopic: String, consumerCloseTimeoutInMillis: Int, commitOffsetRetries: Int, commitBackoffInMillis: Long, maxWakeups: Int, wakeupTimeoutInMillis: Int)
Example 31
Source File: KafkaTestClient.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.indexer.integration.clients import java.util.Properties import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer} import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer} import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster object KafkaTestClient { val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1) KAFKA_CLUSTER.start() } class KafkaTestClient { import KafkaTestClient._ val INPUT_TOPIC = "spans" val OUTPUT_TOPIC = "span-buffer" val APP_PRODUCER_CONFIG: Properties = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) props.put(ProducerConfig.ACKS_CONFIG, "1") props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20") props.put(ProducerConfig.RETRIES_CONFIG, "0") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer]) props } val APP_CONSUMER_CONFIG: Properties = new Properties() val TEST_PRODUCER_CONFIG: Properties = { val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) props.put(ProducerConfig.ACKS_CONFIG, "1") props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20") props.put(ProducerConfig.RETRIES_CONFIG, "0") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer]) props } val RESULT_CONSUMER_CONFIG = new Properties() def buildConfig = KafkaConfiguration(numStreamThreads = 1, pollTimeoutMs = 100, APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC, consumerCloseTimeoutInMillis = 3000, commitOffsetRetries = 3, commitBackoffInMillis = 250, maxWakeups = 5, wakeupTimeoutInMillis = 3000) def prepare(appId: String): Unit = { APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer") APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer]) APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers) RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer]) deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC) KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1) KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC) } private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*) }
Example 32
Source File: DistributedProcessing.scala From aecor with MIT License | 5 votes |
package aecor.kafkadistributedprocessing import java.util.Properties import aecor.kafkadistributedprocessing.internal.Kafka import aecor.kafkadistributedprocessing.internal.Kafka._ import cats.effect.{ ConcurrentEffect, ContextShift, Timer } import cats.implicits._ import cats.effect.implicits._ import fs2.Stream import org.apache.kafka.clients.consumer.ConsumerConfig import scala.concurrent.duration._ final class DistributedProcessing(settings: DistributedProcessingSettings) { private def assignRange(size: Int, partitionCount: Int, partition: Int): Option[(Int, Int)] = { val even = size / partitionCount val reminder = size % partitionCount if (partition >= partitionCount) { none } else { if (partition < reminder) { (partition * (even + 1), even + 1).some } else if (even > 0) { (reminder + partition * even, even).some } else none } } def start[F[_]: ConcurrentEffect: Timer: ContextShift](name: String, processes: List[F[Unit]]): F[Unit] = Kafka .assignPartitions( settings.asProperties(name), settings.topicName, settings.pollingInterval, settings.pollTimeout ) .parEvalMapUnordered(Int.MaxValue) { case AssignedPartition(partition, partitionCount, watchRevocation, release) => assignRange(processes.size, partitionCount, partition).fold(release) { case (offset, processCount) => Stream .range[F](offset, offset + processCount) .parEvalMapUnordered(processCount)(processes) .compile .drain .race(watchRevocation) .flatMap { case Left(_) => release case Right(callback) => callback } } } .compile .drain } object DistributedProcessing { def apply(settings: DistributedProcessingSettings): DistributedProcessing = new DistributedProcessing(settings) } final case class DistributedProcessingSettings(brokers: Set[String], topicName: String, pollingInterval: FiniteDuration = 500.millis, pollTimeout: FiniteDuration = 50.millis, consumerSettings: Map[String, String] = Map.empty) { def withClientId(clientId: String): DistributedProcessingSettings = withConsumerSetting(ConsumerConfig.CLIENT_ID_CONFIG, clientId) def clientId: Option[String] = consumerSettings.get(ConsumerConfig.CLIENT_ID_CONFIG) def withConsumerSetting(key: String, value: String): DistributedProcessingSettings = copy(consumerSettings = consumerSettings.updated(key, value)) def asProperties(groupId: String): Properties = { val properties = new Properties() consumerSettings.foreach { case (key, value) => properties.setProperty(key, value) } properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers.mkString(",")) properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId) properties } }
Example 33
Source File: KafkaConsumer.scala From aecor with MIT License | 5 votes |
package aecor.kafkadistributedprocessing.internal import java.time.Duration import java.util.Properties import java.util.concurrent.Executors import cats.effect.{ Async, ContextShift, Resource } import cats.~> import org.apache.kafka.clients.consumer.{ Consumer, ConsumerRebalanceListener, ConsumerRecords } import org.apache.kafka.common.PartitionInfo import org.apache.kafka.common.serialization.Deserializer import scala.collection.JavaConverters._ import scala.concurrent.ExecutionContext import scala.concurrent.duration.FiniteDuration private[kafkadistributedprocessing] final class KafkaConsumer[F[_], K, V]( withConsumer: (Consumer[K, V] => *) ~> F ) { def subscribe(topics: Set[String], listener: ConsumerRebalanceListener): F[Unit] = withConsumer(_.subscribe(topics.asJava, listener)) def subscribe(topics: Set[String]): F[Unit] = withConsumer(_.subscribe(topics.asJava)) val unsubscribe: F[Unit] = withConsumer(_.unsubscribe()) def partitionsFor(topic: String): F[Set[PartitionInfo]] = withConsumer(_.partitionsFor(topic).asScala.toSet) def close: F[Unit] = withConsumer(_.close()) def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] = withConsumer(_.poll(Duration.ofNanos(timeout.toNanos))) } private[kafkadistributedprocessing] object KafkaConsumer { final class Create[F[_]] { def apply[K, V]( config: Properties, keyDeserializer: Deserializer[K], valueDeserializer: Deserializer[V] )(implicit F: Async[F], contextShift: ContextShift[F]): Resource[F, KafkaConsumer[F, K, V]] = { val create = F.suspend { val executor = Executors.newSingleThreadExecutor() def eval[A](a: => A): F[A] = contextShift.evalOn(ExecutionContext.fromExecutor(executor)) { F.async[A] { cb => executor.execute(new Runnable { override def run(): Unit = cb { try Right(a) catch { case e: Throwable => Left(e) } } }) } } eval { val original = Thread.currentThread.getContextClassLoader Thread.currentThread.setContextClassLoader(null) val consumer = new org.apache.kafka.clients.consumer.KafkaConsumer[K, V]( config, keyDeserializer, valueDeserializer ) Thread.currentThread.setContextClassLoader(original) val withConsumer = new ((Consumer[K, V] => *) ~> F) { def apply[A](f: Consumer[K, V] => A): F[A] = eval(f(consumer)) } new KafkaConsumer[F, K, V](withConsumer) } } Resource.make(create)(_.close) } } def create[F[_]]: Create[F] = new Create[F] }
Example 34
Source File: KafkaTest.scala From aecor with MIT License | 5 votes |
package aecor.kafkadistributedprocessing import java.util.Properties import aecor.kafkadistributedprocessing.internal.Kafka.UnitDeserializer import aecor.kafkadistributedprocessing.internal.RebalanceEvents.RebalanceEvent import aecor.kafkadistributedprocessing.internal.RebalanceEvents.RebalanceEvent.{ PartitionsAssigned, PartitionsRevoked } import aecor.kafkadistributedprocessing.internal.{ Kafka, KafkaConsumer } import cats.effect.IO import cats.implicits._ import fs2.Stream import fs2.concurrent.Queue import org.apache.kafka.clients.consumer.ConsumerConfig import org.scalatest.funsuite.AnyFunSuite import scala.concurrent.duration._ class KafkaTest extends AnyFunSuite with IOSupport with KafkaSupport { val topic = "test" val partitionCount = 4 createCustomTopic(topic, partitions = partitionCount) val createConsumerAccess = { val properties = new Properties() properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers.mkString(",")) properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "test") KafkaConsumer.create[IO](properties, new UnitDeserializer, new UnitDeserializer) } val watchRebalanceEvents = Stream .resource(createConsumerAccess) .flatMap(Kafka.watchRebalanceEvents(_, topic, 500.millis, 50.millis)) test("Rebalance event stream") { val program = for { queue <- Queue.unbounded[IO, (Int, RebalanceEvent)] run = (n: Int) => watchRebalanceEvents .evalMap { x => val e = n -> x.value queue.enqueue1(e) >> x.commit } .compile .drain .start p1 <- run(1) l1 <- queue.dequeue.take(2).compile.toList p2 <- run(2) l2 <- queue.dequeue.take(4).compile.toList _ <- p1.cancel l3 <- queue.dequeue.take(2).compile.toList _ <- p2.cancel } yield (l1, l2, l3) val (l1, l2, l3) = program.unsafeRunTimed(40.seconds).get def fold(list: List[(Int, RebalanceEvent)]): Map[Int, Set[Int]] = list.foldLeft(Map.empty[Int, Set[Int]]) { case (s, (c, e)) => e match { case PartitionsRevoked(partitions) => s.updated(c, s.getOrElse(c, Set.empty[Int]) -- partitions.map(_.partition())) case PartitionsAssigned(partitions) => s.updated(c, s.getOrElse(c, Set.empty[Int]) ++ partitions.map(_.partition())) } } assert(fold(l1) == Map(1 -> Set(1, 0, 3, 2))) assert(fold(l2) == Map(1 -> Set(1, 0), 2 -> Set(2, 3))) assert(fold(l3) == Map(2 -> Set(1, 0, 3, 2))) } test("Topic partitions query works before subscription") { val program = createConsumerAccess.use(_.partitionsFor(topic)) val result = program.unsafeRunTimed(2.seconds).get assert(result.size == partitionCount) } }
Example 35
Source File: EventTimeHeartBeatExample.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.eventtime import java.util.Properties import java.util.concurrent.TimeUnit import com.tmalaska.flinktraining.example.session.HeartBeat import net.liftweb.json.DefaultFormats import net.liftweb.json.Serialization.read import org.apache.flink.api.scala._ import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.api.watermark.Watermark import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 object EventTimeHeartBeatExample { def main(args: Array[String]) { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val typeOfWindow = args(4) val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) // create a stream using socket val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val heartbeatStream:DataStream[HeartBeat] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) .map(json => { implicit val formats = DefaultFormats read[HeartBeat](json) }) .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[HeartBeat]() { override def getCurrentWatermark: Watermark = { new Watermark(System.currentTimeMillis() - 10000) } override def extractTimestamp(element: HeartBeat, previousElementTimestamp: Long): Long = { element.eventTime } }) // implement word count val entityCount = heartbeatStream .map(heartBeat => (heartBeat.entityId, 1)) val keyValuePair = entityCount.keyBy(0) val countPair = if (typeOfWindow.equals("slidingCount")) { //Slide by count. Have a sliding window of 5 messages and trigger or slide 2 messages keyValuePair.countWindow(5, 2).sum(1) } else if (typeOfWindow.equals("tumbleTime")) { //Tumble by time. Trigger and Slide by 5 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1) } else if (typeOfWindow.equals("slidingTime")) { //Slide by time. Have a sliding window of 5 seconds that tiggers every 2 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1) } else { //Tumble by time. Trigger every 5 seconds keyValuePair.countWindow(5).sum(1) } // print the results countPair.print() // execute the program env.execute("Scala WordCount Example") } } class MessageTimestamp extends AssignerWithPeriodicWatermarks[HeartBeat] { override def getCurrentWatermark: Watermark = { //TODO null } override def extractTimestamp(t: HeartBeat, l: Long): Long = { //TODO -1 } }
Example 36
Source File: SessionKafkaProducer.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.session import java.util.{Properties, Random} import net.liftweb.json.DefaultFormats import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import net.liftweb.json.Serialization.write object SessionKafkaProducer { def main(args:Array[String]): Unit = { implicit val formats = DefaultFormats val kafkaServerURL = args(0) val kafkaServerPort = args(1) val topic = args(2) val numberOfEntities = args(3).toInt val numberOfMessagesPerEntity = args(4).toInt val waitTimeBetweenMessageBatch = args(5).toInt val chancesOfMissing = args(6).toInt val props = new Properties() props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) props.put("acks", "all") props.put("retries", "0") props.put("batch.size", "16384") props.put("linger.ms", "1") props.put("buffer.memory", "33554432") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) val r = new Random() var sentCount = 0 println("About to send to " + topic) for (j <- 0 to numberOfMessagesPerEntity) { for (i <- 0 to numberOfEntities) { if (r.nextInt(chancesOfMissing) != 0) { val message = write(HeartBeat(i.toString, System.currentTimeMillis())) val producerRecord = new ProducerRecord[String,String](topic, message) producer.send(producerRecord) sentCount += 1 } } println("Sent Count:" + sentCount) Thread.sleep(waitTimeBetweenMessageBatch) } producer.close() } }
Example 37
Source File: StreamingSessionExample.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.session import java.util.Properties import net.liftweb.json.DefaultFormats import net.liftweb.json.Serialization.read import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.ProcessFunction import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.util.Collector import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ object StreamingSessionExample { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val sessionTimeOut = args(4).toInt val env = StreamExecutionEnvironment.getExecutionEnvironment //val socketStream = env.socketTextStream("localhost",9999, '\n') val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val messageStream:DataStream[String] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) val heartBeatStream = messageStream .map(str => { implicit val formats = DefaultFormats println("str:" + str) val hb = read[HeartBeat](str) (hb.entityId, hb.eventTime) }).keyBy(0).process(new MyProcessFunction(sessionTimeOut)) heartBeatStream.map(session => { println("session:" + session) session }) heartBeatStream.print() env.execute() } } class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] { private var state:ValueState[SessionObj] = null override def open(parameters: Configuration): Unit = { state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj])) } override def processElement(value: (String, Long), ctx: ProcessFunction[(String, Long), SessionObj]#Context, out: Collector[SessionObj]): Unit = { val currentSession = state.value() var outBoundSessionRecord:SessionObj = null if (currentSession == null) { outBoundSessionRecord = SessionObj(value._2, value._2, 1) } else { outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1) } state.update(outBoundSessionRecord) out.collect(outBoundSessionRecord) ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut) } override def onTimer(timestamp: Long, ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext, out: Collector[SessionObj]): Unit = { val result = state.value if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout state.clear() } } } case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int)
Example 38
Source File: StreamingSQL.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.wordcount import java.util.Properties import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.table.api.scala._ import org.apache.flink.table.api.{Table, TableEnvironment} import org.apache.flink.types.Row object StreamingSQL { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val env = StreamExecutionEnvironment.getExecutionEnvironment val tableEnv = TableEnvironment.getTableEnvironment(env) val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val entityCountStream:DataStream[(String, Int)] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) .flatMap(line => line.toUpperCase.split(' ')) .map(word => (word, 1)) tableEnv.registerDataStream("myTable2", entityCountStream, 'word, 'frequency) val roleUp = tableEnv.sqlQuery("SELECT word, SUM(frequency) FROM myTable2 GROUP BY word") val typeInfo = createTypeInformation[(String, Int)] val outStream = roleUp.toRetractStream(typeInfo) outStream.print() env.execute("Scala SQL Example") } } class CustomSinkFunction() extends SinkFunction[Row] { @throws[Exception] def invoke(value: Row): Unit = { //Do something println("-" + value) } }
Example 39
Source File: SimpleWordCount.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.wordcount import java.util.Properties import java.util.concurrent.TimeUnit import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.windowing.time.Time object SimpleWordCount { def main(args: Array[String]) { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val typeOfWindow = args(4) val env = StreamExecutionEnvironment.getExecutionEnvironment // create a stream using socket val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val wordCountStream:DataStream[String] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) // implement word count val wordsStream = wordCountStream .flatMap(line => line.toUpperCase.split(' ')) .map(word => (word, 1)) //.flatMap{_.toUpperCase.split(' ')} //.map{ (_,1) } val keyValuePair = wordsStream.keyBy(0) val countPair = if (typeOfWindow.equals("slidingCount")) { //Slide by count. Have a sliding window of 5 messages and trigger or slide 2 messages keyValuePair.countWindow(5, 2).sum(1) } else if (typeOfWindow.equals("tumbleTime")) { //Tumble by time. Trigger and Slide by 5 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1) } else if (typeOfWindow.equals("slidingTime")) { //Slide by time. Have a sliding window of 5 seconds that tiggers every 2 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1) } else { //Tumble by time. Trigger every 5 seconds keyValuePair.countWindow(5).sum(1) } // print the results countPair.print() // execute the program env.execute("Scala WordCount Example") } }
Example 40
Source File: DriverInitContainerBootstrapStep.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.k8s.submit.steps import java.io.StringWriter import java.util.Properties import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, ContainerBuilder, HasMetadata} import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.deploy.k8s.KubernetesUtils import org.apache.spark.deploy.k8s.submit.KubernetesDriverSpec import org.apache.spark.deploy.k8s.submit.steps.initcontainer.{InitContainerConfigurationStep, InitContainerSpec} private[spark] class DriverInitContainerBootstrapStep( steps: Seq[InitContainerConfigurationStep], configMapName: String, configMapKey: String) extends DriverConfigurationStep { override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { var initContainerSpec = InitContainerSpec( properties = Map.empty[String, String], driverSparkConf = Map.empty[String, String], initContainer = new ContainerBuilder().build(), driverContainer = driverSpec.driverContainer, driverPod = driverSpec.driverPod, dependentResources = Seq.empty[HasMetadata]) for (nextStep <- steps) { initContainerSpec = nextStep.configureInitContainer(initContainerSpec) } val configMap = buildConfigMap( configMapName, configMapKey, initContainerSpec.properties) val resolvedDriverSparkConf = driverSpec.driverSparkConf .clone() .set(INIT_CONTAINER_CONFIG_MAP_NAME, configMapName) .set(INIT_CONTAINER_CONFIG_MAP_KEY_CONF, configMapKey) .setAll(initContainerSpec.driverSparkConf) val resolvedDriverPod = KubernetesUtils.appendInitContainer( initContainerSpec.driverPod, initContainerSpec.initContainer) driverSpec.copy( driverPod = resolvedDriverPod, driverContainer = initContainerSpec.driverContainer, driverSparkConf = resolvedDriverSparkConf, otherKubernetesResources = driverSpec.otherKubernetesResources ++ initContainerSpec.dependentResources ++ Seq(configMap)) } private def buildConfigMap( configMapName: String, configMapKey: String, config: Map[String, String]): ConfigMap = { val properties = new Properties() config.foreach { entry => properties.setProperty(entry._1, entry._2) } val propertiesWriter = new StringWriter() properties.store(propertiesWriter, s"Java properties built from Kubernetes config map with name: $configMapName " + s"and config map key: $configMapKey") new ConfigMapBuilder() .withNewMetadata() .withName(configMapName) .endMetadata() .addToData(configMapKey, propertiesWriter.toString) .build() } }
Example 41
Source File: GangliaSink.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.ganglia.GangliaReporter import info.ganglia.gmetric4j.gmetric.GMetric import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem class GangliaSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GANGLIA_KEY_PERIOD = "period" val GANGLIA_DEFAULT_PERIOD = 10 val GANGLIA_KEY_UNIT = "unit" val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS val GANGLIA_KEY_MODE = "mode" val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST // TTL for multicast messages. If listeners are X hops away in network, must be at least X. val GANGLIA_KEY_TTL = "ttl" val GANGLIA_DEFAULT_TTL = 1 val GANGLIA_KEY_HOST = "host" val GANGLIA_KEY_PORT = "port" val GANGLIA_KEY_DMAX = "dmax" val GANGLIA_DEFAULT_DMAX = 0 def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) { throw new Exception("Ganglia sink requires 'host' property.") } if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) { throw new Exception("Ganglia sink requires 'port' property.") } val host = propertyToOption(GANGLIA_KEY_HOST).get val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL) val dmax = propertyToOption(GANGLIA_KEY_DMAX).map(_.toInt).getOrElse(GANGLIA_DEFAULT_DMAX) val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE) .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE) val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt) .getOrElse(GANGLIA_DEFAULT_PERIOD) val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT) .map(u => TimeUnit.valueOf(u.toUpperCase)) .getOrElse(GANGLIA_DEFAULT_UNIT) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val ganglia = new GMetric(host, port, mode, ttl) val reporter: GangliaReporter = GangliaReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .withDMax(dmax) .build(ganglia) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 42
Source File: KafkaContinuousSourceSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.util.Properties import java.util.concurrent.atomic.AtomicInteger import org.scalatest.time.SpanSugar._ import scala.collection.mutable import scala.util.Random import org.apache.spark.SparkContext import org.apache.spark.sql.{DataFrame, Dataset, ForeachWriter, Row} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.execution.streaming.StreamExecution import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution import org.apache.spark.sql.streaming.{StreamTest, Trigger} import org.apache.spark.sql.test.{SharedSQLContext, TestSparkSession} // Run tests in KafkaSourceSuiteBase in continuous execution mode. class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuousTest class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest { import testImplicits._ override val brokerProps = Map("auto.create.topics.enable" -> "false") test("subscribing topic by pattern with topic deletions") { val topicPrefix = newTopic() val topic = topicPrefix + "-seems" val topic2 = topicPrefix + "-bad" testUtils.createTopic(topic, partitions = 5) testUtils.sendMessages(topic, Array("-1")) require(testUtils.getLatestOffsets(Set(topic)).size === 5) val reader = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", testUtils.brokerAddress) .option("kafka.metadata.max.age.ms", "1") .option("subscribePattern", s"$topicPrefix-.*") .option("failOnDataLoss", "false") val kafka = reader.load() .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") .as[(String, String)] val mapped = kafka.map(kv => kv._2.toInt + 1) testStream(mapped)( makeSureGetOffsetCalled, AddKafkaData(Set(topic), 1, 2, 3), CheckAnswer(2, 3, 4), Execute { query => testUtils.deleteTopic(topic) testUtils.createTopic(topic2, partitions = 5) eventually(timeout(streamingTimeout)) { assert( query.lastExecution.logical.collectFirst { case DataSourceV2Relation(_, r: KafkaContinuousReader) => r }.exists { r => // Ensure the new topic is present and the old topic is gone. r.knownPartitions.exists(_.topic == topic2) }, s"query never reconfigured to new topic $topic2") } }, AddKafkaData(Set(topic2), 4, 5, 6), CheckAnswer(2, 3, 4, 5, 6, 7) ) } } class KafkaContinuousSourceStressForDontFailOnDataLossSuite extends KafkaSourceStressForDontFailOnDataLossSuite { override protected def startStream(ds: Dataset[Int]) = { ds.writeStream .format("memory") .queryName("memory") .trigger(Trigger.Continuous("1 second")) .start() } }
Example 43
Source File: RowDataSourceStrategySuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.sql.DriverManager import java.util.Properties import org.scalatest.BeforeAndAfter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.sources._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ import org.apache.spark.util.Utils class RowDataSourceStrategySuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext { import testImplicits._ val url = "jdbc:h2:mem:testdb0" val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass" var conn: java.sql.Connection = null before { Utils.classForName("org.h2.Driver") // Extra properties that will be specified for our database. We need these to test // usage of parameters from OPTIONS clause in queries. val properties = new Properties() properties.setProperty("user", "testUser") properties.setProperty("password", "testPass") properties.setProperty("rowId", "false") conn = DriverManager.getConnection(url, properties) conn.prepareStatement("create schema test").executeUpdate() conn.prepareStatement("create table test.inttypes (a INT, b INT, c INT)").executeUpdate() conn.prepareStatement("insert into test.inttypes values (1, 2, 3)").executeUpdate() conn.commit() sql( s""" |CREATE OR REPLACE TEMPORARY VIEW inttypes |USING org.apache.spark.sql.jdbc |OPTIONS (url '$url', dbtable 'TEST.INTTYPES', user 'testUser', password 'testPass') """.stripMargin.replaceAll("\n", " ")) } after { conn.close() } test("SPARK-17673: Exchange reuse respects differences in output schema") { val df = sql("SELECT * FROM inttypes") val df1 = df.groupBy("a").agg("b" -> "min") val df2 = df.groupBy("a").agg("c" -> "min") val res = df1.union(df2) assert(res.distinct().count() == 2) // would be 1 if the exchange was incorrectly reused } }
Example 44
Source File: StatsdSink.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import org.apache.spark.SecurityManager import org.apache.spark.internal.Logging import org.apache.spark.metrics.MetricsSystem private[spark] object StatsdSink { val STATSD_KEY_HOST = "host" val STATSD_KEY_PORT = "port" val STATSD_KEY_PERIOD = "period" val STATSD_KEY_UNIT = "unit" val STATSD_KEY_PREFIX = "prefix" val STATSD_DEFAULT_HOST = "127.0.0.1" val STATSD_DEFAULT_PORT = "8125" val STATSD_DEFAULT_PERIOD = "10" val STATSD_DEFAULT_UNIT = "SECONDS" val STATSD_DEFAULT_PREFIX = "" } private[spark] class StatsdSink( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink with Logging { import StatsdSink._ val host = property.getProperty(STATSD_KEY_HOST, STATSD_DEFAULT_HOST) val port = property.getProperty(STATSD_KEY_PORT, STATSD_DEFAULT_PORT).toInt val pollPeriod = property.getProperty(STATSD_KEY_PERIOD, STATSD_DEFAULT_PERIOD).toInt val pollUnit = TimeUnit.valueOf(property.getProperty(STATSD_KEY_UNIT, STATSD_DEFAULT_UNIT).toUpperCase) val prefix = property.getProperty(STATSD_KEY_PREFIX, STATSD_DEFAULT_PREFIX) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter = new StatsdReporter(registry, host, port, prefix) override def start(): Unit = { reporter.start(pollPeriod, pollUnit) logInfo(s"StatsdSink started with prefix: '$prefix'") } override def stop(): Unit = { reporter.stop() logInfo("StatsdSink stopped.") } override def report(): Unit = reporter.report() }
Example 45
Source File: CsvSink.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.io.File import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.{CsvReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CSV_KEY_PERIOD = "period" val CSV_KEY_UNIT = "unit" val CSV_KEY_DIR = "directory" val CSV_DEFAULT_PERIOD = 10 val CSV_DEFAULT_UNIT = "SECONDS" val CSV_DEFAULT_DIR = "/tmp/" val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CSV_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT)) case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match { case Some(s) => s case None => CSV_DEFAULT_DIR } val reporter: CsvReporter = CsvReporter.forRegistry(registry) .formatFor(Locale.US) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(new File(pollDir)) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 46
Source File: MetricsServlet.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import javax.servlet.http.HttpServletRequest import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.ui.JettyUtils._ private[spark] class MetricsServlet( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SERVLET_KEY_PATH = "path" val SERVLET_KEY_SAMPLE = "sample" val SERVLET_DEFAULT_SAMPLE = false val servletPath = property.getProperty(SERVLET_KEY_PATH) val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean) .getOrElse(SERVLET_DEFAULT_SAMPLE) val mapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample)) def getHandlers(conf: SparkConf): Array[ServletContextHandler] = { Array[ServletContextHandler]( createServletHandler(servletPath, new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr, conf) ) } def getMetricsSnapshot(request: HttpServletRequest): String = { mapper.writeValueAsString(registry) } override def start() { } override def stop() { } override def report() { } }
Example 47
Source File: Slf4jSink.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.{MetricRegistry, Slf4jReporter} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class Slf4jSink( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SLF4J_DEFAULT_PERIOD = 10 val SLF4J_DEFAULT_UNIT = "SECONDS" val SLF4J_KEY_PERIOD = "period" val SLF4J_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match { case Some(s) => s.toInt case None => SLF4J_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT)) case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 48
Source File: ConsoleSink.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.{ConsoleReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CONSOLE_DEFAULT_PERIOD = 10 val CONSOLE_DEFAULT_UNIT = "SECONDS" val CONSOLE_KEY_PERIOD = "period" val CONSOLE_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CONSOLE_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT)) case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 49
Source File: GraphiteSink.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.net.InetSocketAddress import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.graphite.{Graphite, GraphiteReporter, GraphiteUDP} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GRAPHITE_DEFAULT_PERIOD = 10 val GRAPHITE_DEFAULT_UNIT = "SECONDS" val GRAPHITE_DEFAULT_PREFIX = "" val GRAPHITE_KEY_HOST = "host" val GRAPHITE_KEY_PORT = "port" val GRAPHITE_KEY_PERIOD = "period" val GRAPHITE_KEY_UNIT = "unit" val GRAPHITE_KEY_PREFIX = "prefix" val GRAPHITE_KEY_PROTOCOL = "protocol" def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) { throw new Exception("Graphite sink requires 'host' property.") } if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) { throw new Exception("Graphite sink requires 'port' property.") } val host = propertyToOption(GRAPHITE_KEY_HOST).get val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match { case Some(s) => s.toInt case None => GRAPHITE_DEFAULT_PERIOD } val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase(Locale.ROOT)) case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT) } val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase(Locale.ROOT)) match { case Some("udp") => new GraphiteUDP(host, port) case Some("tcp") | None => new Graphite(host, port) case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p") } val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .prefixedWith(prefix) .build(graphite) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 50
Source File: ResultTask.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.lang.management.ManagementFactory import java.nio.ByteBuffer import java.util.Properties import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD private[spark] class ResultTask[T, U]( stageId: Int, stageAttemptId: Int, taskBinary: Broadcast[Array[Byte]], partition: Partition, locs: Seq[TaskLocation], val outputId: Int, localProperties: Properties, serializedTaskMetrics: Array[Byte], jobId: Option[Int] = None, appId: Option[String] = None, appAttemptId: Option[String] = None) extends Task[U](stageId, stageAttemptId, partition.index, localProperties, serializedTaskMetrics, jobId, appId, appAttemptId) with Serializable { @transient private[this] val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def runTask(context: TaskContext): U = { // Deserialize the RDD and the func using the broadcast variables. val threadMXBean = ManagementFactory.getThreadMXBean val deserializeStartTime = System.currentTimeMillis() val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime } else 0L val ser = SparkEnv.get.closureSerializer.newInstance() val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime } else 0L func(context, rdd.iterator(partition, context)) } // This is only callable on the driver side. override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ResultTask(" + stageId + ", " + partitionId + ")" }
Example 51
Source File: DAGSchedulerEvent.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.Properties import scala.language.existentials import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.util.{AccumulatorV2, CallSite} private[scheduler] case class MapStageSubmitted( jobId: Int, dependency: ShuffleDependency[_, _, _], callSite: CallSite, listener: JobListener, properties: Properties = null) extends DAGSchedulerEvent private[scheduler] case class StageCancelled( stageId: Int, reason: Option[String]) extends DAGSchedulerEvent private[scheduler] case class JobCancelled( jobId: Int, reason: Option[String]) extends DAGSchedulerEvent private[scheduler] case class JobGroupCancelled(groupId: String) extends DAGSchedulerEvent private[scheduler] case object AllJobsCancelled extends DAGSchedulerEvent private[scheduler] case class BeginEvent(task: Task[_], taskInfo: TaskInfo) extends DAGSchedulerEvent private[scheduler] case class GettingResultEvent(taskInfo: TaskInfo) extends DAGSchedulerEvent private[scheduler] case class CompletionEvent( task: Task[_], reason: TaskEndReason, result: Any, accumUpdates: Seq[AccumulatorV2[_, _]], taskInfo: TaskInfo) extends DAGSchedulerEvent private[scheduler] case class ExecutorAdded(execId: String, host: String) extends DAGSchedulerEvent private[scheduler] case class ExecutorLost(execId: String, reason: ExecutorLossReason) extends DAGSchedulerEvent private[scheduler] case class WorkerRemoved(workerId: String, host: String, message: String) extends DAGSchedulerEvent private[scheduler] case class TaskSetFailed(taskSet: TaskSet, reason: String, exception: Option[Throwable]) extends DAGSchedulerEvent private[scheduler] case object ResubmitFailedStages extends DAGSchedulerEvent private[scheduler] case class SpeculativeTaskSubmitted(task: Task[_]) extends DAGSchedulerEvent
Example 52
Source File: package.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache import java.util.Properties package object spark { private object SparkBuildInfo { val ( spark_version: String, spark_branch: String, spark_revision: String, spark_build_user: String, spark_repo_url: String, spark_build_date: String) = { val resourceStream = Thread.currentThread().getContextClassLoader. getResourceAsStream("spark-version-info.properties") if (resourceStream == null) { throw new SparkException("Could not find spark-version-info.properties") } try { val unknownProp = "<unknown>" val props = new Properties() props.load(resourceStream) ( props.getProperty("version", unknownProp), props.getProperty("branch", unknownProp), props.getProperty("revision", unknownProp), props.getProperty("user", unknownProp), props.getProperty("url", unknownProp), props.getProperty("date", unknownProp) ) } catch { case e: Exception => throw new SparkException("Error loading properties from spark-version-info.properties", e) } finally { if (resourceStream != null) { try { resourceStream.close() } catch { case e: Exception => throw new SparkException("Error closing spark build info resource stream", e) } } } } } val SPARK_VERSION = SparkBuildInfo.spark_version val SPARK_BRANCH = SparkBuildInfo.spark_branch val SPARK_REVISION = SparkBuildInfo.spark_revision val SPARK_BUILD_USER = SparkBuildInfo.spark_build_user val SPARK_REPO_URL = SparkBuildInfo.spark_repo_url val SPARK_BUILD_DATE = SparkBuildInfo.spark_build_date }
Example 53
Source File: MemoryTestingUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.memory import java.util.Properties import org.apache.spark.{SparkEnv, TaskContext, TaskContextImpl} object MemoryTestingUtils { def fakeTaskContext(env: SparkEnv): TaskContext = { val taskMemoryManager = new TaskMemoryManager(env.memoryManager, 0) new TaskContextImpl( stageId = 0, stageAttemptNumber = 0, partitionId = 0, taskAttemptId = 0, attemptNumber = 0, taskMemoryManager = taskMemoryManager, localProperties = new Properties, metricsSystem = env.metricsSystem) } }
Example 54
Source File: FakeTask.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.Properties import org.apache.spark.{Partition, SparkEnv, TaskContext} import org.apache.spark.executor.TaskMetrics class FakeTask( stageId: Int, partitionId: Int, prefLocs: Seq[TaskLocation] = Nil, serializedTaskMetrics: Array[Byte] = SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array()) extends Task[Int](stageId, 0, partitionId, new Properties, serializedTaskMetrics) { override def runTask(context: TaskContext): Int = 0 override def preferredLocations: Seq[TaskLocation] = prefLocs } object FakeTask { def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { createTaskSet(numTasks, stageAttemptId = 0, prefLocs: _*) } def createTaskSet(numTasks: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { createTaskSet(numTasks, stageId = 0, stageAttemptId, prefLocs: _*) } def createTaskSet(numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { if (prefLocs.size != 0 && prefLocs.size != numTasks) { throw new IllegalArgumentException("Wrong number of task locations") } val tasks = Array.tabulate[Task[_]](numTasks) { i => new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil) } new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null) } def createShuffleMapTaskSet( numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { if (prefLocs.size != 0 && prefLocs.size != numTasks) { throw new IllegalArgumentException("Wrong number of task locations") } val tasks = Array.tabulate[Task[_]](numTasks) { i => new ShuffleMapTask(stageId, stageAttemptId, null, new Partition { override def index: Int = i }, prefLocs(i), new Properties, SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array()) } new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null) } }
Example 55
Source File: TaskDescriptionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{ByteArrayOutputStream, DataOutputStream, UTFDataFormatException} import java.nio.ByteBuffer import java.util.Properties import scala.collection.mutable.HashMap import org.apache.spark.SparkFunSuite class TaskDescriptionSuite extends SparkFunSuite { test("encoding and then decoding a TaskDescription results in the same TaskDescription") { val originalFiles = new HashMap[String, Long]() originalFiles.put("fileUrl1", 1824) originalFiles.put("fileUrl2", 2) val originalJars = new HashMap[String, Long]() originalJars.put("jar1", 3) val originalProperties = new Properties() originalProperties.put("property1", "18") originalProperties.put("property2", "test value") // SPARK-19796 -- large property values (like a large job description for a long sql query) // can cause problems for DataOutputStream, make sure we handle correctly val sb = new StringBuilder() (0 to 10000).foreach(_ => sb.append("1234567890")) val largeString = sb.toString() originalProperties.put("property3", largeString) // make sure we've got a good test case intercept[UTFDataFormatException] { val out = new DataOutputStream(new ByteArrayOutputStream()) try { out.writeUTF(largeString) } finally { out.close() } } // Create a dummy byte buffer for the task. val taskBuffer = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4)) val originalTaskDescription = new TaskDescription( taskId = 1520589, attemptNumber = 2, executorId = "testExecutor", name = "task for test", index = 19, originalFiles, originalJars, originalProperties, taskBuffer ) val serializedTaskDescription = TaskDescription.encode(originalTaskDescription) val decodedTaskDescription = TaskDescription.decode(serializedTaskDescription) // Make sure that all of the fields in the decoded task description match the original. assert(decodedTaskDescription.taskId === originalTaskDescription.taskId) assert(decodedTaskDescription.attemptNumber === originalTaskDescription.attemptNumber) assert(decodedTaskDescription.executorId === originalTaskDescription.executorId) assert(decodedTaskDescription.name === originalTaskDescription.name) assert(decodedTaskDescription.index === originalTaskDescription.index) assert(decodedTaskDescription.addedFiles.equals(originalFiles)) assert(decodedTaskDescription.addedJars.equals(originalJars)) assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties)) assert(decodedTaskDescription.serializedTask.equals(taskBuffer)) } }
Example 56
Source File: ResetSystemProperties.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.util.Properties import org.apache.commons.lang3.SerializationUtils import org.scalatest.{BeforeAndAfterEach, Suite} private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite => var oldProperties: Properties = null override def beforeEach(): Unit = { // we need SerializationUtils.clone instead of `new Properties(System.getProperties())` because // the later way of creating a copy does not copy the properties but it initializes a new // Properties object with the given properties as defaults. They are not recognized at all // by standard Scala wrapper over Java Properties then. oldProperties = SerializationUtils.clone(System.getProperties) super.beforeEach() } override def afterEach(): Unit = { try { super.afterEach() } finally { System.setProperties(oldProperties) oldProperties = null } } }
Example 57
Source File: VelocityRenderer.scala From shellbase with Apache License 2.0 | 5 votes |
package com.sumologic.shellbase import java.io._ import java.util.Properties import org.apache.commons.io.IOUtils import org.apache.velocity.VelocityContext import org.apache.velocity.app.{Velocity, VelocityEngine} import scala.collection.JavaConversions._ object VelocityRenderer { val props = new Properties() props.setProperty("runtime.references.strict", "true") props.setProperty("velocimacro.arguments.strict", "true") props.setProperty("velocimacro.permissions.allow.inline.local.scope", "true") props.setProperty("directive.foreach.skip.invalid", "false") props.setProperty("runtime.log.logsystem.log4j.logger", "org.apache.velocity") props.setProperty("resource.loader", "class,file") props.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader") props.setProperty("class.resource.loader.class", "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader") Velocity.init(props) def render(map: Iterable[(String, AnyRef)], reader: Reader, writer: Writer): Unit = { val context = new VelocityContext() map.foreach { pair => context.put(pair._1, pair._2) } Velocity.evaluate(context, writer, "ops util velocity renderer", reader) } def render(templateVars: Iterable[(String, AnyRef)], templatePath: String, outputPath: String): Unit = { val templateReader = new InputStreamReader(getClass.getClassLoader.getResourceAsStream(templatePath)) val outputWriter = new OutputStreamWriter(new FileOutputStream(outputPath)) try { render(templateVars, templateReader, outputWriter) } finally { IOUtils.closeQuietly(templateReader) IOUtils.closeQuietly(outputWriter) } } def createScriptFromTemplate(scriptResource: String, variables: Map[AnyRef, AnyRef] = Map[AnyRef, AnyRef]()): File = { val engine = new VelocityEngine() engine.setProperty("resource.loader", "class") engine.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader") engine.setProperty("class.resource.loader.class", "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader") engine.init() val template = engine.getTemplate(scriptResource) val tempFile = File.createTempFile(".tmp", ".sh") val modifiableVariables = new java.util.HashMap[AnyRef, AnyRef]() modifiableVariables ++= variables val writer = new FileWriter(tempFile) try { template.merge(new VelocityContext(modifiableVariables), writer) } finally { IOUtils.closeQuietly(writer) } tempFile } }
Example 58
Source File: StreamingDemo.scala From flink-demos with Apache License 2.0 | 5 votes |
package com.dataartisans.flink.example.eventpattern import java.text.SimpleDateFormat import java.util import java.util.{Calendar, Properties, UUID} import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction} import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08 import org.apache.flink.util.Collector import org.elasticsearch.action.index.IndexRequest import org.elasticsearch.client.Requests class StateMachineMapper extends RichFlatMapFunction[Event, Alert] { private[this] var currentState: ValueState[State] = _ override def open(config: Configuration): Unit = { currentState = getRuntimeContext.getState( new ValueStateDescriptor("state", classOf[State], InitialState)) } override def flatMap(t: Event, out: Collector[Alert]): Unit = { val state = currentState.value() val nextState = state.transition(t.event) nextState match { case InvalidTransition => out.collect(Alert(t.sourceAddress, state, t.event)) case x if x.terminal => currentState.clear() case x => currentState.update(nextState) } } }
Example 59
Source File: JDBCRelation.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.jdbc import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.Partition import org.apache.spark.rdd.RDD import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, Row, SQLContext, SaveMode} def columnPartition(partitioning: JDBCPartitioningInfo): Array[Partition] = { if (partitioning == null) return Array[Partition](JDBCPartition(null, 0)) val numPartitions = partitioning.numPartitions val column = partitioning.column if (numPartitions == 1) return Array[Partition](JDBCPartition(null, 0)) // Overflow and silliness can happen if you subtract then divide. // Here we get a little roundoff, but that's (hopefully) OK. val stride: Long = (partitioning.upperBound / numPartitions - partitioning.lowerBound / numPartitions) var i: Int = 0 var currentValue: Long = partitioning.lowerBound var ans = new ArrayBuffer[Partition]() while (i < numPartitions) { val lowerBound = if (i != 0) s"$column >= $currentValue" else null currentValue += stride val upperBound = if (i != numPartitions - 1) s"$column < $currentValue" else null val whereClause = if (upperBound == null) { lowerBound } else if (lowerBound == null) { upperBound } else { s"$lowerBound AND $upperBound" } ans += JDBCPartition(whereClause, i) i = i + 1 } ans.toArray } } private[sql] case class JDBCRelation( url: String, table: String, parts: Array[Partition], properties: Properties = new Properties())(@transient val sqlContext: SQLContext) extends BaseRelation with PrunedFilteredScan with InsertableRelation { override val needConversion: Boolean = false override val schema: StructType = JDBCRDD.resolveTable(url, table, properties) override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row] JDBCRDD.scanTable( sqlContext.sparkContext, schema, url, properties, table, requiredColumns, filters, parts).asInstanceOf[RDD[Row]] } override def insert(data: DataFrame, overwrite: Boolean): Unit = { data.write .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append) .jdbc(url, table, properties) } }
Example 60
Source File: DefaultSource.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.jdbc import java.util.Properties import org.apache.spark.sql.SQLContext import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, DataSourceRegister} class DefaultSource extends RelationProvider with DataSourceRegister { override def shortName(): String = "jdbc" override def createRelation( sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { val url = parameters.getOrElse("url", sys.error("Option 'url' not specified")) val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified")) val partitionColumn = parameters.getOrElse("partitionColumn", null) val lowerBound = parameters.getOrElse("lowerBound", null) val upperBound = parameters.getOrElse("upperBound", null) val numPartitions = parameters.getOrElse("numPartitions", null) if (partitionColumn != null && (lowerBound == null || upperBound == null || numPartitions == null)) { sys.error("Partitioning incompletely specified") } val partitionInfo = if (partitionColumn == null) { null } else { JDBCPartitioningInfo( partitionColumn, lowerBound.toLong, upperBound.toLong, numPartitions.toInt) } val parts = JDBCRelation.columnPartition(partitionInfo) val properties = new Properties() // Additional properties that we will pass to getConnection parameters.foreach(kv => properties.setProperty(kv._1, kv._2)) JDBCRelation(url, table, parts, properties)(sqlContext) } }
Example 61
Source File: SQLExecutionSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import java.util.Properties import scala.collection.parallel.CompositeThrowable import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.sql.SQLContext class SQLExecutionSuite extends SparkFunSuite { test("concurrent query execution (SPARK-10548)") { // Try to reproduce the issue with the old SparkContext val conf = new SparkConf() .setMaster("local[*]") .setAppName("test") val badSparkContext = new BadSparkContext(conf) try { testConcurrentQueryExecution(badSparkContext) fail("unable to reproduce SPARK-10548") } catch { case e: IllegalArgumentException => assert(e.getMessage.contains(SQLExecution.EXECUTION_ID_KEY)) } finally { badSparkContext.stop() } // Verify that the issue is fixed with the latest SparkContext val goodSparkContext = new SparkContext(conf) try { testConcurrentQueryExecution(goodSparkContext) } finally { goodSparkContext.stop() } } private class BadSparkContext(conf: SparkConf) extends SparkContext(conf) { protected[spark] override val localProperties = new InheritableThreadLocal[Properties] { override protected def childValue(parent: Properties): Properties = new Properties(parent) override protected def initialValue(): Properties = new Properties() } }
Example 62
Source File: PostgresIntegrationSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.jdbc import java.sql.Connection import java.util.Properties import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.{Literal, If} import org.apache.spark.tags.DockerTest @DockerTest class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { override val db = new DatabaseOnDocker { override val imageName = "postgres:9.4.5" override val env = Map( "POSTGRES_PASSWORD" -> "rootpass" ) override val jdbcPort = 5432 override def getJdbcUrl(ip: String, port: Int): String = s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" } override def dataPreparation(conn: Connection): Unit = { conn.prepareStatement("CREATE DATABASE foo").executeUpdate() conn.setCatalog("foo") conn.prepareStatement("CREATE TABLE bar (c0 text, c1 integer, c2 double precision, c3 bigint, " + "c4 bit(1), c5 bit(10), c6 bytea, c7 boolean, c8 inet, c9 cidr, " + "c10 integer[], c11 text[], c12 real[])").executeUpdate() conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', " + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', " + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}')""").executeUpdate() } test("Type mapping for various types") { val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) val rows = df.collect() assert(rows.length == 1) val types = rows(0).toSeq.map(x => x.getClass) assert(types.length == 13) assert(classOf[String].isAssignableFrom(types(0))) assert(classOf[java.lang.Integer].isAssignableFrom(types(1))) assert(classOf[java.lang.Double].isAssignableFrom(types(2))) assert(classOf[java.lang.Long].isAssignableFrom(types(3))) assert(classOf[java.lang.Boolean].isAssignableFrom(types(4))) assert(classOf[Array[Byte]].isAssignableFrom(types(5))) assert(classOf[Array[Byte]].isAssignableFrom(types(6))) assert(classOf[java.lang.Boolean].isAssignableFrom(types(7))) assert(classOf[String].isAssignableFrom(types(8))) assert(classOf[String].isAssignableFrom(types(9))) assert(classOf[Seq[Int]].isAssignableFrom(types(10))) assert(classOf[Seq[String]].isAssignableFrom(types(11))) assert(classOf[Seq[Double]].isAssignableFrom(types(12))) assert(rows(0).getString(0).equals("hello")) assert(rows(0).getInt(1) == 42) assert(rows(0).getDouble(2) == 1.25) assert(rows(0).getLong(3) == 123456789012345L) assert(rows(0).getBoolean(4) == false) // BIT(10)'s come back as ASCII strings of ten ASCII 0's and 1's... assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](5), Array[Byte](49, 48, 48, 48, 49, 48, 48, 49, 48, 49))) assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), Array[Byte](0xDE.toByte, 0xAD.toByte, 0xBE.toByte, 0xEF.toByte))) assert(rows(0).getBoolean(7) == true) assert(rows(0).getString(8) == "172.16.0.42") assert(rows(0).getString(9) == "192.168.0.0/16") assert(rows(0).getSeq(10) == Seq(1, 2)) assert(rows(0).getSeq(11) == Seq("a", null, "b")) assert(rows(0).getSeq(12).toSeq == Seq(0.11f, 0.22f)) } test("Basic write test") { val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) // Test only that it doesn't crash. df.write.jdbc(jdbcUrl, "public.barcopy", new Properties) // Test write null values. df.select(df.queryExecution.analyzed.output.map { a => Column(Literal.create(null, a.dataType)).as(a.name) }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties) } }
Example 63
Source File: GangliaSink.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.ganglia.GangliaReporter import info.ganglia.gmetric4j.gmetric.GMetric import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem class GangliaSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GANGLIA_KEY_PERIOD = "period" val GANGLIA_DEFAULT_PERIOD = 10 val GANGLIA_KEY_UNIT = "unit" val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS val GANGLIA_KEY_MODE = "mode" val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST // TTL for multicast messages. If listeners are X hops away in network, must be at least X. val GANGLIA_KEY_TTL = "ttl" val GANGLIA_DEFAULT_TTL = 1 val GANGLIA_KEY_HOST = "host" val GANGLIA_KEY_PORT = "port" def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) { throw new Exception("Ganglia sink requires 'host' property.") } if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) { throw new Exception("Ganglia sink requires 'port' property.") } val host = propertyToOption(GANGLIA_KEY_HOST).get val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL) val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE) .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE) val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt) .getOrElse(GANGLIA_DEFAULT_PERIOD) val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT) .map(u => TimeUnit.valueOf(u.toUpperCase)) .getOrElse(GANGLIA_DEFAULT_UNIT) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val ganglia = new GMetric(host, port, mode, ttl) val reporter: GangliaReporter = GangliaReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(ganglia) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 64
Source File: MetricsConfig.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.util.Utils import org.apache.spark.{Logging, SparkConf} private[spark] class MetricsConfig(conf: SparkConf) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file setDefaultProperties(properties) loadPropertiesFromFile(conf.getOption("spark.metrics.conf")) // Also look for the properties in provided Spark configuration val prefix = "spark.metrics.conf." conf.getAll.foreach { case (k, v) if k.startsWith(prefix) => properties.setProperty(k.substring(prefix.length()), v) case _ => } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { val defaultProperty = propertyCategories(DEFAULT_PREFIX).asScala for((inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX); (k, v) <- defaultProperty if (prop.get(k) == null)) { prop.put(k, v) } } } def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] prop.asScala.foreach { kv => if (regex.findPrefixOf(kv._1.toString).isDefined) { val regex(prefix, suffix) = kv._1.toString subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2.toString) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } private[this] def loadPropertiesFromFile(path: Option[String]): Unit = { var is: InputStream = null try { is = path match { case Some(f) => new FileInputStream(f) case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME) } if (is != null) { properties.load(is) } } catch { case e: Exception => val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME) logError(s"Error loading configuration file $file", e) } finally { if (is != null) { is.close() } } } }
Example 65
Source File: CsvSink.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.io.File import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.{CsvReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CSV_KEY_PERIOD = "period" val CSV_KEY_UNIT = "unit" val CSV_KEY_DIR = "directory" val CSV_DEFAULT_PERIOD = 10 val CSV_DEFAULT_UNIT = "SECONDS" val CSV_DEFAULT_DIR = "/tmp/" val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CSV_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match { case Some(s) => s case None => CSV_DEFAULT_DIR } val reporter: CsvReporter = CsvReporter.forRegistry(registry) .formatFor(Locale.US) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(new File(pollDir)) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 66
Source File: MetricsServlet.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import javax.servlet.http.HttpServletRequest import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.{SparkConf, SecurityManager} import org.apache.spark.ui.JettyUtils._ private[spark] class MetricsServlet( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SERVLET_KEY_PATH = "path" val SERVLET_KEY_SAMPLE = "sample" val SERVLET_DEFAULT_SAMPLE = false val servletPath = property.getProperty(SERVLET_KEY_PATH) val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean) .getOrElse(SERVLET_DEFAULT_SAMPLE) val mapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample)) def getHandlers(conf: SparkConf): Array[ServletContextHandler] = { Array[ServletContextHandler]( createServletHandler(servletPath, new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr, conf) ) } def getMetricsSnapshot(request: HttpServletRequest): String = { mapper.writeValueAsString(registry) } override def start() { } override def stop() { } override def report() { } }
Example 67
Source File: Slf4jSink.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.{Slf4jReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class Slf4jSink( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SLF4J_DEFAULT_PERIOD = 10 val SLF4J_DEFAULT_UNIT = "SECONDS" val SLF4J_KEY_PERIOD = "period" val SLF4J_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match { case Some(s) => s.toInt case None => SLF4J_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 68
Source File: ConsoleSink.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.{ConsoleReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CONSOLE_DEFAULT_PERIOD = 10 val CONSOLE_DEFAULT_UNIT = "SECONDS" val CONSOLE_KEY_PERIOD = "period" val CONSOLE_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CONSOLE_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 69
Source File: GraphiteSink.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.net.InetSocketAddress import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.graphite.{GraphiteUDP, Graphite, GraphiteReporter} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GRAPHITE_DEFAULT_PERIOD = 10 val GRAPHITE_DEFAULT_UNIT = "SECONDS" val GRAPHITE_DEFAULT_PREFIX = "" val GRAPHITE_KEY_HOST = "host" val GRAPHITE_KEY_PORT = "port" val GRAPHITE_KEY_PERIOD = "period" val GRAPHITE_KEY_UNIT = "unit" val GRAPHITE_KEY_PREFIX = "prefix" val GRAPHITE_KEY_PROTOCOL = "protocol" def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) { throw new Exception("Graphite sink requires 'host' property.") } if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) { throw new Exception("Graphite sink requires 'port' property.") } val host = propertyToOption(GRAPHITE_KEY_HOST).get val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match { case Some(s) => s.toInt case None => GRAPHITE_DEFAULT_PERIOD } val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT) } val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match { case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port)) case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port)) case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p") } val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .prefixedWith(prefix) .build(graphite) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 70
Source File: ResetSystemProperties.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.util.Properties import org.apache.commons.lang3.SerializationUtils import org.scalatest.{BeforeAndAfterEach, Suite} import org.apache.spark.SparkFunSuite private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite => var oldProperties: Properties = null override def beforeEach(): Unit = { // we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because // the later way of creating a copy does not copy the properties but it initializes a new // Properties object with the given properties as defaults. They are not recognized at all // by standard Scala wrapper over Java Properties then. oldProperties = SerializationUtils.clone(System.getProperties) super.beforeEach() } override def afterEach(): Unit = { try { super.afterEach() } finally { System.setProperties(oldProperties) oldProperties = null } } }
Example 71
Source File: PropertiesConversions.scala From asura with MIT License | 5 votes |
package asura.common.config import java.util.Properties import com.typesafe.config.Config object PropertiesConversions { def toProperties(configs: Config*): Properties = { val properties = new Properties() configs.foreach(config => { config.entrySet().forEach(entry => { properties.put(entry.getKey, config.getString(entry.getKey)) }) }) properties } }
Example 72
Source File: PackageTest.scala From lighthouse with Apache License 2.0 | 5 votes |
package be.dataminded.lighthouse.datalake import java.util.Properties import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers class PackageTest extends AnyFunSpec with Matchers { describe("asProperties") { it("should convert a Scala to Java Properties implicitly") { val properties: Properties = Map("test1" -> "1", "test2" -> "2") properties.getProperty("test1") should equal("1") properties.getProperty("test2") should equal("2") } it("does only convert maps of type Map[String, String]") { assertDoesNotCompile("val properties: Properties = Map(\"test1\" -> 1, \"test2\" -> 2)") } } }
Example 73
Source File: BuildProperties.scala From cosmos with Apache License 2.0 | 5 votes |
package com.mesosphere.cosmos import java.util.Properties private[cosmos] class BuildProperties private[cosmos](resourceName: String) { private val props = { val props = new Properties() Option(this.getClass.getResourceAsStream(resourceName)) match { case Some(is) => props.load(is) is.close() props case _ => throw new IllegalStateException(s"Unable to load classpath resources: $resourceName") } } } object BuildProperties { private[this] val loaded = new BuildProperties("/build.properties") def apply(): BuildProperties = loaded implicit class BuildPropertiesOps(val bp: BuildProperties) extends AnyVal { def cosmosVersion: String = Option(bp.props.getProperty("cosmos.version")).getOrElse("unknown-version") } }
Example 74
Source File: KafkaBatchProducer.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.kafka2.writer import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.immutable.Map import scala.language.implicitConversions import scala.reflect.runtime.universe._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants} import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException} def produceToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): Unit = { def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName() logger.info(" @Begin --> " + MethodName) val kafkaProps: Properties = conf.kafkaProducerProps logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}") val kafkaTopic = conf.kafkaTopics val kafkaTopicsOptionsMap : Map[String, Map[String, String]] = KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf) logger.info("Kafka options loaded -> " + kafkaTopicsOptionsMap) val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap) logger.info("Begin Publishing to Kafka....") try { val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(kafkaTopic) kafkaTopicOptions match { case None => throw new IllegalStateException(s"""Could not load options for the kafka topic -> $kafkaTopic""") case Some(kafkaOptions) => dataFrame .write .format(KafkaConstants.KAFKA_FORMAT) .option(KafkaConstants.KAFKA_TOPIC, kafkaTopic) .options(kafkaOptions) .save() } } catch { case ex: Throwable => { ex.printStackTrace() val msg = s""" |kafkaTopic -> ${kafkaTopic} |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}} """.stripMargin throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}") } } logger.info("Publish to Kafka - Completed !") } }
Example 75
Source File: KafkaStreamProducer.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.kafka2.writer import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.immutable.Map import scala.language.implicitConversions import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.streaming.DataStreamWriter import com.paypal.gimel.common.conf.GimelConstants import com.paypal.gimel.kafka2.conf.{KafkaClientConfiguration, KafkaConstants} import com.paypal.gimel.kafka2.utilities.{KafkaOptionsLoaderUtils, KafkaUtilitiesException} object KafkaStreamProducer { val logger = com.paypal.gimel.logger.Logger() def produceStreamToKafka(conf: KafkaClientConfiguration, dataFrame: DataFrame): DataStreamWriter[Row] = { def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName() logger.info(" @Begin --> " + MethodName) val kafkaProps: Properties = conf.kafkaProducerProps logger.info(s"Kafka Props for Producer -> ${kafkaProps.asScala.mkString("\n")}") logger.info("Begin Publishing to Kafka....") // Retrieve kafka options from OptionsLoader if specified val kafkaTopicsOptionsMap : Map[String, Map[String, String]] = KafkaOptionsLoaderUtils.getAllKafkaTopicsOptions(conf) logger.info("kafkaTopicsOptionsMap -> " + kafkaTopicsOptionsMap) try { val eachKafkaTopicToOptionsMap = KafkaOptionsLoaderUtils.getEachKafkaTopicToOptionsMap(kafkaTopicsOptionsMap) val kafkaTopicOptions = eachKafkaTopicToOptionsMap.get(conf.kafkaTopics) kafkaTopicOptions match { case None => throw new IllegalStateException(s"""Could not load options for the kafka topic -> $conf.kafkaTopics""") case Some(kafkaOptions) => dataFrame .writeStream .format(KafkaConstants.KAFKA_FORMAT) .option(KafkaConstants.KAFKA_TOPIC, conf.kafkaTopics) .option(GimelConstants.STREAMING_CHECKPOINT_LOCATION, conf.streamingCheckpointLocation) .outputMode(conf.streamingOutputMode) .options(kafkaOptions) } } catch { case ex: Throwable => { ex.printStackTrace() val msg = s""" |kafkaTopic -> ${conf.kafkaTopics} |kafkaParams --> ${kafkaProps.asScala.mkString("\n")}} """.stripMargin throw new KafkaUtilitiesException(s"Failed While Pushing Data Into Kafka \n ${msg}") } } } }
Example 76
Source File: GimelServicesProperties.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.common.gimelservices import java.util.Properties import scala.collection.JavaConverters._ import com.paypal.gimel.common.conf.{GimelConstants, PCatalogUrlConfigs} import com.paypal.gimel.common.utilities.GenericUtils.{getConfigValue, withResources} import com.paypal.gimel.logger.Logger class GimelServicesProperties(userProps: Map[String, String] = Map[String, String]()) { // Get Logger val logger = Logger() // Get Properties val props: Map[String, String] = getProps // Rest Services Method, Host & Port val restMethod: String = getConfigValue(PCatalogUrlConfigs.API_PROTOCOL, userProps, props) val restHost: String = getConfigValue(PCatalogUrlConfigs.API_HOST, userProps, props) val restPort: String = getConfigValue(PCatalogUrlConfigs.API_PORT, userProps, props) // Rest APIs val apiClusterByName: String = userProps.getOrElse(PCatalogUrlConfigs.CLUSTER_BY_NAME, props(PCatalogUrlConfigs.CLUSTER_BY_NAME)) val apiClusterById: String = userProps.getOrElse(PCatalogUrlConfigs.CLUSTER_BY_ID, props(PCatalogUrlConfigs.CLUSTER_BY_ID)) val apiClusters: String = userProps.getOrElse(PCatalogUrlConfigs.API_CLUSTERS, props(PCatalogUrlConfigs.API_CLUSTERS)) val apiObjectSchema: String = userProps.getOrElse(PCatalogUrlConfigs.API_OBJECT_SCHEMA, props(PCatalogUrlConfigs.API_OBJECT_SCHEMA)) val apiDeactivateObjectSchema: String = userProps.getOrElse(PCatalogUrlConfigs.DEACTIVATE_OBJECT_SCHEMA, props(PCatalogUrlConfigs.DEACTIVATE_OBJECT_SCHEMA)) val apiObjectSchemaByStorageSystemId: String = userProps.getOrElse(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_ID, props(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_ID)) val apiPagedObjectSchemaByStorageSystemId: String = userProps.getOrElse(PCatalogUrlConfigs.PAGED_OBJECT_SCHEMA_BY_SYSTEM_ID, props(PCatalogUrlConfigs.PAGED_OBJECT_SCHEMA_BY_SYSTEM_ID)) val apiUnregisteredObjectSchemaByStorageSystemId: String = userProps.getOrElse(PCatalogUrlConfigs.UNREGISTERED_OBJECT_SCHEMA_BY_SYSTEM_ID, props(PCatalogUrlConfigs.UNREGISTERED_OBJECT_SCHEMA_BY_SYSTEM_ID)) val apiObjectSchemaByStorageSystemIdContainerObject: String = userProps.getOrElse(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_CONTAINER_OBJECT, props(PCatalogUrlConfigs.OBJECT_SCHEMA_BY_SYSTEM_CONTAINER_OBJECT)) val apiDataSetRegister: String = userProps.getOrElse(PCatalogUrlConfigs.REGISTER_DATASET, props(PCatalogUrlConfigs.REGISTER_DATASET)) val apiDataSetChangeLog: String = userProps.getOrElse(PCatalogUrlConfigs.CHANGE_LOG_DATASET, props(PCatalogUrlConfigs.CHANGE_LOG_DATASET)) val apiDataSetDeploymentStatus: String = userProps.getOrElse(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_SUCESS, props(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_SUCESS)) val apiDataSetFailureDeploymentStatus: String = userProps.getOrElse(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_FAILURE, props(PCatalogUrlConfigs.DATASET_DEPLOYMENT_STATUS_FOR_FAILURE)) val apiStorageSystems: String = userProps.getOrElse(PCatalogUrlConfigs.API_STORAGE_SYSTEMS, props(PCatalogUrlConfigs.API_STORAGE_SYSTEMS)) val apiStorageSystemById: String = userProps.getOrElse(PCatalogUrlConfigs.SYSTEM_BY_ID, props(PCatalogUrlConfigs.SYSTEM_BY_ID)) val apiStorageSystemAttributesByName: String = userProps.getOrElse(PCatalogUrlConfigs.SYSTEM_ATTRIBUTES_BY_NAME, props(PCatalogUrlConfigs.SYSTEM_ATTRIBUTES_BY_NAME)) val apiStorageTypeById: String = userProps.getOrElse(PCatalogUrlConfigs.TYPE_BY_ID, props(PCatalogUrlConfigs.TYPE_BY_ID)) val apiObjectSchemaContainers: String = userProps.getOrElse(PCatalogUrlConfigs.CONTAINERS_BY_OBJECT_SCHEMA, props(PCatalogUrlConfigs.CONTAINERS_BY_OBJECT_SCHEMA)) val apiStorageTypeAttributeKeys: String = userProps.getOrElse(PCatalogUrlConfigs.ATTRIBUTE_KEYS_BY_TYPE_ID, props(PCatalogUrlConfigs.ATTRIBUTE_KEYS_BY_TYPE_ID)) val apiUserByName: String = userProps.getOrElse(PCatalogUrlConfigs.USER_BY_NAME, props(PCatalogUrlConfigs.USER_BY_NAME)) val apiDatasetPost: String = userProps.getOrElse(PCatalogUrlConfigs.REGISTER_DATASET, props(PCatalogUrlConfigs.REGISTER_DATASET)) val apiStorageSystemContainers: String = userProps.getOrElse(PCatalogUrlConfigs.STORAGE_SYSTEM_CONTAINERS, props(PCatalogUrlConfigs.STORAGE_SYSTEM_CONTAINERS)) val apiDatasetByName: String = userProps.getOrElse(PCatalogUrlConfigs.DATASET_BY_NAME, props(PCatalogUrlConfigs.DATASET_BY_NAME)) val apiRangerPoliciesByLocation: String = userProps.getOrElse(PCatalogUrlConfigs.RANGER_POLICIES_BY_LOCATION, props(PCatalogUrlConfigs.RANGER_POLICIES_BY_LOCATION)) val apiStorageTypes = userProps.getOrElse(PCatalogUrlConfigs.STORAGE_TYPES, props(PCatalogUrlConfigs.STORAGE_TYPES)) val apiZoneByName = userProps.getOrElse(PCatalogUrlConfigs.ZONE_BY_NAME, props(PCatalogUrlConfigs.ZONE_BY_NAME)) val apiPostStorageSystem = userProps.getOrElse(PCatalogUrlConfigs.POST_STORAGE_SYSTEM, props(PCatalogUrlConfigs.POST_STORAGE_SYSTEM)) val appName = userProps.getOrElse(GimelConstants.APP_NAME, userProps.getOrElse(GimelConstants.SPARK_APP_NAME, "Unknown")) val appId = userProps.getOrElse(GimelConstants.APP_ID, userProps.getOrElse(GimelConstants.SPARK_APP_ID, "Unknown")) val appTag: String = userProps.getOrElse(GimelConstants.APP_TAG, "Unknown").toString // Rest URLs val baseUrl = s"$restMethod://$restHost:$restPort" val urlClusterByName = s"$baseUrl$apiClusterByName" val urlClusterById = s"$baseUrl$apiClusterById" val urlClusters = s"$baseUrl$apiClusters" val urlObjectSchema = s"$baseUrl$apiObjectSchema" val urlDeactivateObject = s"$baseUrl$apiDeactivateObjectSchema" val urlObjectSchemaByStorageSystemId = s"$baseUrl$apiObjectSchemaByStorageSystemId" val urlPagedObjectSchemaByStorageSystemId = s"$baseUrl$apiPagedObjectSchemaByStorageSystemId" val urlUnregisteredObjectSchemaByStorageSystemId = s"$baseUrl$apiUnregisteredObjectSchemaByStorageSystemId" val urlObjectSchemaBySystemContainerObject = s"$baseUrl$apiObjectSchemaByStorageSystemIdContainerObject" val urlDataSetRegister = s"$baseUrl$apiDataSetRegister" val urlDataSetChangeLog = s"$baseUrl$apiDataSetChangeLog" val urlDataSetDeploymentStatus = s"$baseUrl$apiDataSetDeploymentStatus" val urlDataSetFailureDeploymentStatus = s"$baseUrl$apiDataSetFailureDeploymentStatus" val urlStorageSystems = s"$baseUrl$apiStorageSystems" val urlStorageSystemById = s"$baseUrl$apiStorageSystemById" val urlStorageSystemAttributesByName = s"$baseUrl$apiStorageSystemAttributesByName" val urlStorageTypeById = s"$baseUrl$apiStorageTypeById" val urlObjectSchemaContainers = s"$baseUrl$apiObjectSchemaContainers" val urlStorageTypeAttributeKeys = s"$baseUrl$apiStorageTypeAttributeKeys" val urlUserByName = s"$baseUrl$apiUserByName" val urlDataSetPost = s"$baseUrl$apiDatasetPost" val urlStorageSystemContainers = s"$baseUrl$apiStorageSystemContainers" val urlRangerPoliciesByLocation = s"$baseUrl$apiRangerPoliciesByLocation" val urlDataSetByName = s"$baseUrl$apiDatasetByName" val urlStorageTypes = s"$baseUrl$apiStorageTypes" val urlByZoneName = s"$baseUrl$apiZoneByName" val urlStorageSystemPost = s"$baseUrl$apiPostStorageSystem" // Druid URLs val restDruidMethod: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_PROTOCOL, props(PCatalogUrlConfigs.REST_DRUID_PROTOCOL)) val restDruidHost: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_HOST, props(PCatalogUrlConfigs.REST_DRUID_HOST)) val restDruidPort: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_PORT, props(PCatalogUrlConfigs.REST_DRUID_PORT)) val baseDruidUrl = s"$restDruidMethod://$restDruidHost:$restDruidPort" val apiDruidDataSource: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_DATASOURCES, props(PCatalogUrlConfigs.REST_DRUID_DATASOURCES)) val urlDruidDataSource = s"$baseDruidUrl$apiDruidDataSource" val apiDruidFull: String = userProps.getOrElse(PCatalogUrlConfigs.REST_DRUID_FULL, props(PCatalogUrlConfigs.REST_DRUID_FULL)) def apply(params: Map[String, String]): GimelServicesProperties = new GimelServicesProperties(params) }
Example 77
Source File: KafkaAdminUtils.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.common.storageadmin import java.util.Properties import kafka.admin._ import kafka.server.ConfigType import kafka.utils.ZkUtils import org.I0Itec.zkclient.{ZkClient, ZkConnection} import org.I0Itec.zkclient.exception.ZkMarshallingError import org.I0Itec.zkclient.serialize.ZkSerializer import org.apache.kafka.common.security.JaasUtils import com.paypal.gimel.logger.Logger object KafkaAdminUtils { val logger = Logger() val isSecurityEnabled = JaasUtils.isZkSecurityEnabled() val sessionTimeOutInMs: Int = 10 * 1000 val connectionTimeOutInMs: Int = 10 * 1000 val zkClient: (String) => ZkClient = new ZkClient(_: String, sessionTimeOutInMs, connectionTimeOutInMs, GimelZKStringSerializer) val zkConnection: (String) => ZkConnection = new ZkConnection(_: String, sessionTimeOutInMs) def isTopicExists(zookKeeperHostAndPort: String, kafkaTopicName: String): Boolean = { def MethodName: String = new Exception().getStackTrace().apply(1).getMethodName() logger.info(" @Begin --> " + MethodName) val client = zkClient(zookKeeperHostAndPort) val connect = zkConnection(zookKeeperHostAndPort) val zkUtil: ZkUtils = new ZkUtils(client, connect, isSecurityEnabled) val result = AdminUtils.topicExists(zkUtil, kafkaTopicName) connect.close() result } } object GimelZKStringSerializer extends ZkSerializer { @throws(classOf[ZkMarshallingError]) def serialize(data: Object): Array[Byte] = { data.asInstanceOf[String].getBytes("UTF-8") } @throws(classOf[ZkMarshallingError]) def deserialize(bytes: Array[Byte]): Object = { if (bytes == null) { null } else { new String(bytes, "UTF-8") } } }
Example 78
Source File: GimelProperties.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.common.conf import java.io.{File, FileInputStream} import java.util.{Calendar, Properties} import scala.collection.JavaConverters._ import scala.collection.mutable import com.paypal.gimel.logger.Logger class GimelProperties(userProps: Map[String, String] = Map[String, String]()) { // Get Logger val logger = Logger() logger.info(s"Initiating --> ${this.getClass.getName}") // Get Properties val props: mutable.Map[String, String] = getProps val runTagUUID: String = java.util.UUID.randomUUID.toString val startTimeMS: String = Calendar.getInstance().getTimeInMillis.toString val tagToAdd: String = s"_$startTimeMS" private def getConf(key: String): String = { userProps.getOrElse(key, props(key)) } // Kafka Properties val kafkaBroker: String = getConf(GimelConstants.KAFKA_BROKER_LIST) val kafkaConsumerCheckPointRoot: String = getConf(GimelConstants.KAFKA_CONSUMER_CHECKPOINT_PATH) val kafkaAvroSchemaKey: String = getConf(GimelConstants.KAFKA_CDH_SCHEMA) val confluentSchemaURL: String = getConf(GimelConstants.CONFLUENT_SCHEMA_URL) val hbaseNameSpace: String = getConf(GimelConstants.HBASE_NAMESPACE) val zkHostAndPort: String = getConf(GimelConstants.ZOOKEEPER_LIST) val zkPrefix: String = getConf(GimelConstants.ZOOKEEPER_STATE) val esHost: String = getConf(GimelConstants.ES_NODE) val esPort: String = getConf(GimelConstants.ES_PORT) // Kerberos val keytab: String = getConf(GimelConstants.KEY_TAB) val principal: String = getConf(GimelConstants.KEY_TAB_PRINCIPAL) val cluster: String = getConf(GimelConstants.CLUSTER) val dataSetDeploymentClusters: String = getConf(GimelConstants.DEPLOYMENT_CLUSTERS) val defaultESCluster: String = props(GimelConstants.ES_POLLING_STORAGES) def hiveURL(cluster: String): String = { userProps.getOrElse(s"gimel.hive.$cluster.url", props(s"gimel.hive.$cluster.url")) } def esURL(escluster: String): String = { val alternateConfig = props(s"gimel.es.${defaultESCluster}.url") userProps.getOrElse(GimelConstants.ES_URL_WITH_PORT, alternateConfig) } def apply(params: Map[String, String]): GimelProperties = new GimelProperties(params) }
Example 79
Source File: MavenMutantRunner.scala From stryker4s with Apache License 2.0 | 5 votes |
package stryker4s.maven.runner import java.nio.file.Path import java.util.Properties import better.files._ import org.apache.maven.project.MavenProject import org.apache.maven.shared.invoker.{DefaultInvocationRequest, InvocationRequest, Invoker} import stryker4s.config.Config import stryker4s.model.{Killed, MavenRunnerContext, Mutant, MutantRunResult, Survived} import stryker4s.mutants.findmutants.SourceCollector import stryker4s.report.Reporter import stryker4s.run.MutantRunner import scala.collection.JavaConverters._ import stryker4s.config.TestFilter class MavenMutantRunner(project: MavenProject, invoker: Invoker, sourceCollector: SourceCollector, reporter: Reporter)( implicit config: Config ) extends MutantRunner(sourceCollector, reporter) { type Context = MavenRunnerContext def initializeTestContext(tmpDir: File): Context = { val goals = List("test") val properties = new Properties(project.getProperties) setTestProperties(properties) invoker.setWorkingDirectory(tmpDir.toJava) MavenRunnerContext(properties, goals, tmpDir) } override def runInitialTest(context: Context): Boolean = { val request = createRequest(context) val result = invoker.execute(request) result.getExitCode == 0 } override def runMutant(mutant: Mutant, context: Context): Path => MutantRunResult = { val request = createRequestWithMutation(mutant, context) val result = invoker.execute(request) result.getExitCode match { case 0 => Survived(mutant, _) case _ => Killed(mutant, _) } } private def createRequest(context: Context): InvocationRequest = new DefaultInvocationRequest() .setGoals(context.goals.asJava) .setOutputHandler(debug(_)) .setBatchMode(true) .setProperties(context.properties) private def createRequestWithMutation(mutant: Mutant, context: Context): InvocationRequest = createRequest(context) .addShellEnvironment("ACTIVE_MUTATION", String.valueOf(mutant.id)) private def setTestProperties(properties: Properties): Unit = { // Stop after first failure. Only works with surefire plugin, not scalatest properties.setProperty( "surefire.skipAfterFailureCount", 1.toString ) // https://maven.apache.org/surefire/maven-surefire-plugin/examples/single-test.html val surefireFilter = "test" // https://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin val scalatestFilter = "wildcardSuites" if (config.testFilter.nonEmpty) { if (properties.getProperty(surefireFilter) != null) { val newTestProperty = properties.getProperty(surefireFilter) +: config.testFilter properties.setProperty(surefireFilter, newTestProperty.mkString(", ")) } else if (properties.getProperty(scalatestFilter) != null) { val newTestProperty = properties.getProperty(scalatestFilter) +: config.testFilter properties.setProperty(scalatestFilter, newTestProperty.mkString(",")) } else { properties.setProperty(surefireFilter, config.testFilter.mkString(", ")) properties.setProperty(scalatestFilter, config.testFilter.mkString(",")) } } } }
Example 80
Source File: EmailSender.scala From diffy with GNU Affero General Public License v3.0 | 5 votes |
package ai.diffy.util import com.twitter.logging.Logger import com.twitter.util.{FuturePool, Future} import javax.mail._ import javax.mail.internet.{InternetAddress, MimeMessage} import java.util.Properties case class SimpleMessage( from: String, to: String, bcc: String, subject: String, body: String) class EmailSender(log: Logger, send: MimeMessage => Unit = Transport.send) { private[this] val props = new Properties props.put("mail.smtp.host", "localhost") props.put("mail.smtp.auth", "false") props.put("mail.smtp.port", "25") private[this] val session = Session.getDefaultInstance(props, null) def apply(msg: SimpleMessage): Future[Unit] = FuturePool.unboundedPool { val message = new MimeMessage(session) message.setFrom(new InternetAddress(msg.from)) message.setRecipients( Message.RecipientType.TO, InternetAddress.parse(msg.to) map { _.asInstanceOf[Address]} ) message.addRecipients( Message.RecipientType.BCC, InternetAddress.parse(msg.bcc) map { _.asInstanceOf[Address]} ) message.setSubject(msg.subject) message.setContent(msg.body, "text/html; charset=utf-8") try { send(message) } catch { case e => log.error("Failed to send email report. Ensure Diffy can access port 25.") } } }
Example 81
Source File: StanfordBaseWrapperPlugin.scala From recogito2 with Apache License 2.0 | 5 votes |
package org.pelagios.recogito.plugins.ner.stanford import java.util.{ArrayList, Properties} import edu.stanford.nlp.ling.CoreAnnotations import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP} import edu.stanford.nlp.util.StringUtils import org.pelagios.recogito.sdk.PluginEnvironment import org.pelagios.recogito.sdk.ner._ import scala.collection.JavaConverters._ import org.slf4j.LoggerFactory case class StanfordEntity(chars: String, entityTag: String, charOffset: Int) abstract class StanfordBaseWrapperPlugin( lang: String, config: String, description: String ) extends NERPlugin { private val logger = LoggerFactory.getLogger(this.getClass) private lazy val pipeline = { logger.info("Initializing NER pipeline") val pipeline = new StanfordCoreNLP(props) logger.info("Pipeline initialized") pipeline } private def toEntityType(entityTag: String) = entityTag match { case "LOCATION" | "CITY" | "COUNTRY" | "STATE_OR_PROVINCE" | "NATIONALITY" => Some(EntityType.LOCATION) case "PERSON" => Some(EntityType.PERSON) case "DATE" => Some(EntityType.DATE) case _ => None } private lazy val props = { val props = StringUtils.argsToProperties(Seq("-props", config):_*) props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner") props } override val getName = "Stanford CoreNLP" override val getDescription = description override val getOrganization = "Stanford NLP Group" override val getVersion = "3.9.1" override val getSupportedLanguages = Seq(lang).asJava override def parse(text: String, env: PluginEnvironment) = { val document = new CoreDocument(text) pipeline.annotate(document) val entities = document.tokens().asScala.foldLeft(Seq.empty[StanfordEntity]) { (result, token) => val entityTag = token.get(classOf[CoreAnnotations.NamedEntityTagAnnotation]) val chars = token.get(classOf[CoreAnnotations.TextAnnotation]) val charOffset = token.beginPosition result.headOption match { case Some(previousEntity) if previousEntity.entityTag == entityTag => // Append to previous phrase if entity tag is the same StanfordEntity(previousEntity.chars + " " + chars, entityTag, previousEntity.charOffset) +: result.tail case _ => // Either this is the first token (result.headOption == None), or a new phrase StanfordEntity(chars, entityTag, charOffset) +: result } } // StanfordCoreNLP.clearAnnotatorPool entities.withFilter(_.entityTag != "O") .flatMap(e => toEntityType(e.entityTag).map(etype => new Entity(e.chars, etype, e.charOffset))).asJava } }
Example 82
Source File: RocksDB.scala From unicorn with Apache License 2.0 | 5 votes |
package unicorn.bigtable.rocksdb import java.io.File import java.util.Properties import org.rocksdb.{ColumnFamilyDescriptor, Options} import unicorn.bigtable.Database def create(path: String): RocksDB = { val dir = new java.io.File(path) require(!dir.exists, s"Directory $path exists") dir.mkdir new RocksDB(path) } def apply(path: String): RocksDB = { new RocksDB(path) } }
Example 83
Source File: Cassandra.scala From unicorn with Apache License 2.0 | 5 votes |
package unicorn.bigtable.cassandra import java.util.Properties import java.net.{InetAddress, UnknownHostException} import scala.collection.JavaConversions._ import org.apache.cassandra.locator.SimpleSnitch import org.apache.cassandra.thrift.Cassandra.Client import org.apache.cassandra.thrift.{ConsistencyLevel, KsDef, CfDef} import org.apache.thrift.transport.TFramedTransport import org.apache.thrift.transport.TSocket import org.apache.thrift.protocol.TBinaryProtocol import unicorn.bigtable._ import unicorn.util.Logging override def compactTable(name: String): Unit = { // fail silently log.warn("Cassandra client API doesn't support compaction") } } object Cassandra { def apply(host: String, port: Int): Cassandra = { // For ultra-wide row, we set the maxLength to 16MB. // Note that we also need to set the server side configuration // thrift_framed_transport_size_in_mb in cassandra.yaml // In case of ultra-wide row, it is better to use intra row scan. val transport = new TFramedTransport(new TSocket(host, port), 16 * 1024 * 1024) transport.open new Cassandra(transport) } }
Example 84
Source File: NatsOutputMonitor.scala From smart-meter with MIT License | 5 votes |
package com.logimethods.nats.connector.spark.monitor import java.util.Properties import org.nats._ import java.nio.ByteBuffer; // @see https://github.com/tyagihas/scala_nats object NatsOutputMonitor extends App { Thread.sleep(2000) val inputSubject = args(0) println("Will be listening to messages from " + inputSubject) val natsUrl = System.getenv("NATS_URI") if (inputSubject.toUpperCase().contains("STREAMING")) { val clusterID = System.getenv("NATS_CLUSTER_ID"); System.out.println("NATS_CLUSTER_ID = " + clusterID); NatsStreamingOutputMonitor.main(inputSubject, natsUrl, clusterID) } else { val properties = new Properties() //@see https://github.com/tyagihas/java_nats/blob/master/src/main/java/org/nats/Connection.java properties.put("servers", natsUrl) val conn = Conn.connect(properties) if (args.length > 1) {// TEST mode // TODO Consider Integers & Floats val espectedValue = args(1).toInt println("Is especting a value equals to " + espectedValue) var iterations = 3 conn.subscribe(inputSubject, (msg: Msg) => { println("Received message: " + msg.body) iterations -= 1 if (iterations <= 0) { val receivedValue = msg.body.toInt if (receivedValue == espectedValue) { // "Tests passed!" System.exit(0) } else { // "Tests failed!" System.exit(1) } } }) } else { // REGULAR mode conn.subscribe(inputSubject, (msg: MsgB) => { import java.time._ val f = ByteBuffer.wrap(msg.body) if (msg.subject.contains("max")) { println(s"Received message: (${msg.subject}, ${LocalDateTime.ofEpochSecond(f.getLong(), 0, ZoneOffset.MIN)}, ${f.getFloat()})") } else if (msg.subject.contains("alert")) { println(s"Received message: (${msg.subject}, ${LocalDateTime.ofEpochSecond(f.getLong(), 0, ZoneOffset.MIN)}, ${f.getInt()})") } else { println(s"Received message: (${msg.subject}, ${f})") } }) } } }
Example 85
Source File: NatsInjection.scala From smart-meter with MIT License | 5 votes |
package com.logimethods.smartmeter.inject import akka.actor.{ActorRef, Props} import io.gatling.core.Predef._ import io.gatling.core.action.builder.ActionBuilder import com.logimethods.connector.gatling.to_nats._ import scala.concurrent.duration._ import java.util.Properties import io.nats.client.Nats.PROP_URL import com.logimethods.smartmeter.generate._ class NatsInjection extends Simulation { // println("System properties: " + System.getenv()) try { val properties = new Properties() val natsUrl = System.getenv("NATS_URI") properties.setProperty(PROP_URL, natsUrl) val subject = System.getenv("GATLING_TO_NATS_SUBJECT") if (subject == null) { println("No Subject has been defined through the 'GATLING_TO_NATS_SUBJECT' Environment Variable!!!") } else { println("Will emit messages to " + subject) val natsProtocol = NatsProtocol(properties, subject) val usersPerSec = System.getenv("GATLING_USERS_PER_SEC").toDouble val duration = System.getenv("GATLING_DURATION").toInt val streamingDuration = System.getenv("STREAMING_DURATION").toInt val slot = System.getenv("TASK_SLOT").toInt val randomness = System.getenv("RANDOMNESS").toFloat val predictionLength = System.getenv("PREDICTION_LENGTH").toInt val timeRoot = System.getenv("TIME_ROOT").toInt TimeProvider.config = Some(timeRoot) val natsScn = scenario("smartmeter_"+slot).exec( NatsBuilder(new ConsumerInterpolatedVoltageProvider(slot, usersPerSec, streamingDuration, randomness, predictionLength))) setUp( natsScn.inject(constantUsersPerSec(usersPerSec) during (duration minute)) ).protocols(natsProtocol) } } catch { case e: Exception => { println(e.toString()) e.printStackTrace() } } }
Example 86
Source File: NatsStreamingInjection.scala From smart-meter with MIT License | 5 votes |
package com.logimethods.smartmeter.inject import akka.actor.{ActorRef, Props} import io.gatling.core.Predef._ import io.gatling.core.action.builder.ActionBuilder import com.logimethods.connector.gatling.to_nats._ import scala.concurrent.duration._ import java.util.Properties import com.logimethods.smartmeter.generate._ class NatsStreamingInjection extends Simulation { // println("System properties: " + System.getenv()) try { val natsUrl = System.getenv("NATS_URI") val clusterID = System.getenv("NATS_CLUSTER_ID") var subject = System.getenv("GATLING_TO_NATS_SUBJECT") if (subject == null) { println("No Subject has been defined through the 'GATLING_TO_NATS_SUBJECT' Environment Variable!!!") } else { println("Will emit messages to " + subject) val natsProtocol = NatsStreamingProtocol(natsUrl, clusterID, subject) val usersPerSec = System.getenv("GATLING_USERS_PER_SEC").toDouble val duration = System.getenv("GATLING_DURATION").toInt val streamingDuration = System.getenv("STREAMING_DURATION").toInt val slot = System.getenv("TASK_SLOT").toInt val randomness = System.getenv("RANDOMNESS").toFloat val predictionLength = System.getenv("PREDICTION_LENGTH").toInt val timeRoot = System.getenv("TIME_ROOT").toInt TimeProvider.config = Some(timeRoot) val natsScn = scenario("smartmeter_"+slot).exec( NatsStreamingBuilder(new ConsumerInterpolatedVoltageProvider(slot, usersPerSec, streamingDuration, randomness, predictionLength))) setUp( natsScn.inject(constantUsersPerSec(usersPerSec) during (duration minute)) ).protocols(natsProtocol) } } catch { case e: Exception => { println(e.toString()) e.printStackTrace() } } }
Example 87
Source File: Credentials.scala From sbt-coursier with Apache License 2.0 | 5 votes |
package coursier import java.io.{File, FileInputStream} import java.util.Properties import lmcoursier.definitions.Authentication // actually deprecated (all public ways of creating that are) sealed abstract class Credentials extends Product with Serializable { def user: String def password: String def authentication: Authentication = Authentication(user, password) } object Credentials { private final case class Direct(user: String, password: String) extends Credentials { override def toString = s"Direct($user, ******)" } private final case class FromFile(file: File) extends Credentials { private lazy val props = { val p = new Properties() p.load(new FileInputStream(file)) p } private def findKey(keys: Seq[String]) = keys .iterator .map(props.getProperty) .filter(_ != null) .toStream .headOption .getOrElse { throw new NoSuchElementException(s"${keys.head} key in $file") } lazy val user: String = findKey(FromFile.fileUserKeys) lazy val password: String = findKey(FromFile.filePasswordKeys) } private object FromFile { // from sbt.Credentials private val fileUserKeys = Seq("user", "user.name", "username") private val filePasswordKeys = Seq("password", "pwd", "pass", "passwd") } @deprecated("Use coursierExtraCredentials rather than coursierCredentials", "1.1.0-M14") def apply(user: String, password: String): Credentials = Direct(user, password) @deprecated("Use coursierExtraCredentials rather than coursierCredentials", "1.1.0-M14") def apply(file: File): Credentials = FromFile(file) }
Example 88
Source File: Application.scala From kafka-serde-scala with Apache License 2.0 | 5 votes |
package io.github.azhur.kafkaserdescala.example import java.util.Properties import io.github.azhur.kafkaserdecirce.CirceSupport import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.streams.{ KafkaStreams, StreamsConfig, Topology } import org.apache.kafka.streams.scala.StreamsBuilder object Application extends App with CirceSupport { import io.circe.generic.auto._ import org.apache.kafka.streams.scala.Serdes._ import org.apache.kafka.streams.scala.ImplicitConversions._ case class User(id: Long, name: String, age: Int) val topology = buildTopology("input_users", "output_users") val streamingApp = new KafkaStreams(topology, streamProperties()) streamingApp.start() sys.addShutdownHook({ streamingApp.close() }) def buildTopology(inputTopic: String, outputTopic: String): Topology = { val streamsBuilder = new StreamsBuilder() streamsBuilder .stream[String, User](inputTopic) .filter((_, user) => user.age > 18) .to(outputTopic) streamsBuilder.build() } def streamProperties(): Properties = { val streamsConfiguration = new Properties streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-app") streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092") streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, Topology.AutoOffsetReset.EARLIEST.toString.toLowerCase) streamsConfiguration } }
Example 89
Source File: SQLConnectorParser.scala From fusion-data with Apache License 2.0 | 5 votes |
package mass.connector.sql import java.util.Properties import helloscala.common.Configuration import mass.connector.{ ConnectorParser, ConnectorSetting } import mass.core.XmlUtils import scala.xml.Node class SQLConnectorParser extends ConnectorParser { import mass.core.XmlUtils.XmlRich override val `type` = "jdbc" def parseSettingFromXML(node: Node): ConnectorSetting = { val props = new Properties() val id = node.attr("name") props.put("poolName", id) (node \\ "props" \\ "prop").foreach { prop => val key = (prop \\ "@key").text val value = getText(prop) props.put(key, value) } ConnectorSetting(Configuration.load(props)) } override def parseFromXML(node: Node): SQLConnector = { val setting = parseSettingFromXML(node) SQLConnector(node.attr("name"), setting) } @inline private def getText(prop: Node): String = prop.getAttr("value").getOrElse(XmlUtils.text(prop \ "value")) }
Example 90
Source File: Producer.scala From fusion-data with Apache License 2.0 | 5 votes |
package kafkasample.demo import java.util.Properties import java.util.concurrent.TimeUnit import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord, RecordMetadata } object Producer { def main(args: Array[String]): Unit = { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) try { run(producer) } finally { TimeUnit.SECONDS.sleep(5) producer.close() } } private def run[K, V](producer: KafkaProducer[String, String]) { val record = new ProducerRecord[String, String]("customerCountries", "羊八井222") producer.send(record, (metadata: RecordMetadata, e: Exception) => { if (e ne null) { e.printStackTrace() } println(s"metadata: $metadata") }) } }
Example 91
Source File: Consumer.scala From fusion-data with Apache License 2.0 | 5 votes |
package kafkasample.demo import java.util.{ Collections, Properties } import java.util.concurrent.TimeUnit import org.apache.kafka.clients.consumer.KafkaConsumer object Consumer { @volatile private var isStop = false def main(args: Array[String]): Unit = { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("group.id", "CountryCounter") val consumer = new KafkaConsumer[String, String](props) val thread = new Thread() { override def run(): Unit = Consumer.run(consumer) } try { thread.start() } finally { TimeUnit.SECONDS.sleep(50) isStop = true thread.join() consumer.close() } } private def run(consumer: KafkaConsumer[String, String]): Unit = { consumer.subscribe(Collections.singleton("customerCountries")) while (!isStop && !Thread.currentThread().isInterrupted) { val records = consumer.poll(java.time.Duration.ofMillis(100)) records.forEach { record => println(s"topic = ${record.topic()}, partition = ${record.partition()}, offset = ${record .offset()}, key: ${record.key()}, value = ${record.value()}") } consumer.commitAsync() } } }
Example 92
Source File: HazelCastConnection.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hazelcast import java.io.{File, FileNotFoundException} import java.net.URI import java.util.{Properties, UUID} import com.datamountaineer.streamreactor.connect.hazelcast.config.{HazelCastConnectionConfig, HazelCastSocketConfig} import com.hazelcast.cache.HazelcastCachingProvider import com.hazelcast.client.HazelcastClient import com.hazelcast.client.config.{ClientConfig, ClientNetworkConfig, SocketOptions} import com.hazelcast.config.{GroupConfig, SSLConfig} import com.hazelcast.core.HazelcastInstance import javax.cache.{CacheManager, Caching} import scala.collection.JavaConverters._ object HazelCastConnection { def buildClient(config: HazelCastConnectionConfig): HazelcastInstance = { val clientConfig = new ClientConfig val networkConfig = clientConfig.getNetworkConfig if (config.sslEnabled) { setSSLOptions(config) networkConfig.setSSLConfig(new SSLConfig().setEnabled(true)) } networkConfig.setAddresses(config.members.toList.asJava) val groupConfig = new GroupConfig(config.group, config.pass) clientConfig.setGroupConfig(groupConfig) buildSocketOptions(networkConfig, config.socketConfig) clientConfig.setInstanceName(config.group + "-kafka-connect-" + UUID.randomUUID().toString) HazelcastClient.newHazelcastClient(clientConfig) } private def buildSocketOptions(clientNetworkConfig: ClientNetworkConfig, socketConfig: HazelCastSocketConfig): SocketOptions = { val socketOptions = clientNetworkConfig.getSocketOptions socketOptions.setKeepAlive(socketConfig.keepAlive) socketOptions.setTcpNoDelay(socketConfig.tcpNoDelay) socketOptions.setReuseAddress(socketConfig.reuseAddress) socketOptions.setLingerSeconds(socketConfig.lingerSeconds) socketOptions.setBufferSize(socketConfig.bufferSize) socketOptions } def getCacheManager(client: HazelcastInstance, name: String) : CacheManager = { val instanceName = client.getName() val cachingProvider = Caching.getCachingProvider() // Create Properties instance pointing to a named HazelcastInstance val properties = new Properties() properties.setProperty(HazelcastCachingProvider.HAZELCAST_INSTANCE_NAME, instanceName) val cacheManagerName = new URI(name ) val cacheManager = cachingProvider.getCacheManager(cacheManagerName, null, properties ) cacheManager } def setSSLOptions(config: HazelCastConnectionConfig) = { config.keyStoreLocation match { case Some(path) => if (!new File(path).exists) { throw new FileNotFoundException(s"Keystore not found in: $path") } System.setProperty("javax.net.ssl.keyStorePassword", config.keyStorePassword.getOrElse("")) System.setProperty("javax.net.ssl.keyStore", path) System.setProperty("javax.net.ssl.keyStoreType", config.keyStoreType.getOrElse("jks")) case None => } config.trustStoreLocation match { case Some(path) => if (!new File(path).exists) { throw new FileNotFoundException(s"Truststore not found in: $path") } System.setProperty("javax.net.ssl.trustStorePassword", config.trustStorePassword.getOrElse("")) System.setProperty("javax.net.ssl.trustStore", path) System.setProperty("javax.net.ssl.trustStoreType", config.trustStoreType.getOrElse("jks")) case None => } } }
Example 93
Source File: GitRepositoryState.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import java.util.Properties object GitRepositoryState { val props = { val p = new Properties() p.load(getClass.getClassLoader.getResourceAsStream("git.properties")) p } def describe: String = props.getProperty("git.commit.id.describe") def build: String = props.getProperty("git.build.time") def commitDate: String = props.getProperty("git.commit.time") def commitId: String = props.getProperty("git.commit.id") def summary:String = s"$describe ($commitId), committed at $commitDate, built at $build" }
Example 94
Source File: MySqlDemo.scala From Hands-On-Deep-Learning-with-Apache-Spark with MIT License | 5 votes |
package org.googlielmo.sparkdatabase import java.sql.DriverManager import java.util.Properties import org.apache.spark.sql.SparkSession object MySqlDemo { @throws[Exception] def main(args: Array[String]): Unit = { var jdbcUsername = "myslus" var jdbcPassword = "your_password" Class.forName("com.mysql.jdbc.Driver") val jdbcHostname = "your_db_hostname_or_ip" val jdbcPort = 3306 val jdbcDatabase ="sparkdb" // Create the JDBC URL without passing in the user and password parameters. val jdbcUrl = s"jdbc:mysql://${jdbcHostname}:${jdbcPort}/${jdbcDatabase}" // Create a Properties() object to hold the parameters. val connectionProperties = new Properties() connectionProperties.put("user", s"${jdbcUsername}") connectionProperties.put("password", s"${jdbcPassword}") val connection = DriverManager.getConnection(jdbcUrl, jdbcUsername, jdbcPassword) connection.isClosed() val spark = SparkSession .builder() .master("local[*]") .appName("Spark MySQL basic example") .getOrCreate() import spark.implicits._ val jdbcDF = spark.read .format("jdbc") .option("url", jdbcUrl) .option("dbtable", s"${jdbcDatabase}.sparkexample") .option("user", jdbcUsername) .option("password", jdbcPassword) .load() jdbcDF.printSchema() println("Record count = " + jdbcDF.count()) val filteredJDBC = jdbcDF.select("MerchantCountryCode", "TransactionAmountUSD") .groupBy("MerchantCountryCode") .avg("TransactionAmountUSD") filteredJDBC.collect.foreach { println } spark.close() } }
Example 95
Source File: functions.scala From Hands-On-Deep-Learning-with-Apache-Spark with MIT License | 5 votes |
package com.databricks.spark.corenlp import java.util.Properties import scala.collection.JavaConverters._ import edu.stanford.nlp.ling.CoreAnnotations import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations import edu.stanford.nlp.pipeline.{Annotation, CleanXmlAnnotator, StanfordCoreNLP, TokenizerAnnotator} import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment import edu.stanford.nlp.sentiment.SentimentCoreAnnotations import edu.stanford.nlp.simple.{Document, Sentence} import edu.stanford.nlp.util.Quadruple import org.apache.spark.sql.functions.udf object functions { @transient private var sentimentPipeline: StanfordCoreNLP = _ private def getOrCreateSentimentPipeline(): StanfordCoreNLP = { if (sentimentPipeline == null) { val props = new Properties() props.setProperty("annotators", "tokenize, ssplit, parse, sentiment") sentimentPipeline = new StanfordCoreNLP(props) } sentimentPipeline } private case class OpenIE(subject: String, relation: String, target: String, confidence: Double) { def this(quadruple: Quadruple[String, String, String, java.lang.Double]) = this(quadruple.first, quadruple.second, quadruple.third, quadruple.fourth) } private case class CorefMention(sentNum: Int, startIndex: Int, mention: String) private case class CorefChain(representative: String, mentions: Seq[CorefMention]) private case class SemanticGraphEdge( source: String, sourceIndex: Int, relation: String, target: String, targetIndex: Int, weight: Double) def sentiment = udf { sentence: String => val pipeline = getOrCreateSentimentPipeline() val annotation = pipeline.process(sentence) val tree = annotation.get(classOf[CoreAnnotations.SentencesAnnotation]) .asScala .head .get(classOf[SentimentCoreAnnotations.SentimentAnnotatedTree]) RNNCoreAnnotations.getPredictedClass(tree) } }
Example 96
Source File: DBFunctions.scala From albedo with MIT License | 5 votes |
package ws.vinta.albedo.closures import java.sql.DriverManager import java.util.Properties import scala.collection.mutable.ArrayBuffer object DBFunctions { def selectUserStarredRepos(userId: Int, limit: Int, offset: Int): Array[Int] = { val dbUrl = "jdbc:mysql://127.0.0.1:3306/albedo?verifyServerCertificate=false&useSSL=false&rewriteBatchedStatements=true" val props = new Properties() props.setProperty("driver", "com.mysql.jdbc.Driver") props.setProperty("user", "root") props.setProperty("password", "123") val connection = DriverManager.getConnection(dbUrl, props) val statement = connection.createStatement() val resultSet = statement.executeQuery(s""" SELECT repo_id FROM app_repostarring WHERE user_id = $userId ORDER BY starred_at DESC LIMIT $limit OFFSET $offset; """.stripMargin(' ')) val repoIds = ArrayBuffer.empty[Int] while (resultSet.next()) { val repoId = resultSet.getInt("repo_id") repoIds += repoId } connection.close() repoIds.toArray } }
Example 97
Source File: Producer.scala From awesome-recommendation-engine with Apache License 2.0 | 5 votes |
package example.producer import java.util.Properties import example.utils.KafkaConfig import kafka.producer.{KeyedMessage, ProducerConfig, Producer => KafkaProducer} case class Producer[A](topic: String) { protected val config = new ProducerConfig(KafkaConfig()) private lazy val producer = new KafkaProducer[A, A](config) def send(message: A) = sendMessage(producer, keyedMessage(topic, message)) def sendStream(stream: Stream[A]) = { val iter = stream.iterator while(iter.hasNext) { send(iter.next()) } } private def keyedMessage(topic: String, message: A): KeyedMessage[A, A] = new KeyedMessage[A, A](topic, message) private def sendMessage(producer: KafkaProducer[A, A], message: KeyedMessage[A, A]) = producer.send(message) } object Producer { def apply[T](topic: String, props: Properties) = new Producer[T](topic) { override val config = new ProducerConfig(props) } }
Example 98
Source File: KafkaConfig.scala From awesome-recommendation-engine with Apache License 2.0 | 5 votes |
package example.utils import java.util.Properties import com.typesafe.config.ConfigFactory import KafkaConfig._ trait KafkaConfig extends Properties { private val consumerPrefixWithDot = consumerPrefix + "." private val producerPrefixWithDot = producerPrefix + "." private val allKeys = Seq(groupId, zookeeperConnect, brokers, serializer, partitioner, requiredAcks) lazy val typesafeConfig = ConfigFactory.load() allKeys.map { key => if (typesafeConfig.hasPath(key)) put(key.replace(consumerPrefixWithDot, "").replace(producerPrefixWithDot, ""), typesafeConfig.getString(key)) } def getCustomString(key: String) = typesafeConfig.getString(key) def getCustomInt(key: String) = typesafeConfig.getInt(key) } object KafkaConfig { val consumerPrefix = "consumer" val producerPrefix = "producer" //Consumer keys val groupId = s"$consumerPrefix.group.id" val zookeeperConnect = s"$consumerPrefix.zookeeper.connect" //example.producer.Producer keys val brokers = s"$producerPrefix.metadata.broker.list" val serializer = s"$producerPrefix.serializer.class" val partitioner = s"$producerPrefix.partitioner.class" val requiredAcks = s"$producerPrefix.request.required.acks" def apply() = new KafkaConfig {} }
Example 99
Source File: ConnectionUtils.scala From azure-sqldb-spark with MIT License | 5 votes |
package com.microsoft.azure.sqldb.spark.connect import java.sql.{Connection, DriverManager, SQLException} import java.util.Properties import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig} /** * Helper and utility methods used for setting up or using a connection */ private[spark] object ConnectionUtils { /** * Retrieves all connection properties in the Config object * and returns them as a [[Properties]] object. * * @param config the Config object with specified connection properties. * @return A connection [[Properties]] object. */ def createConnectionProperties(config: Config): Properties = { val connectionProperties = new Properties() for (key <- config.getAllKeys) { connectionProperties.put(key.toString, config.get[String](key.toString).get) } connectionProperties } /** * Adds the "jdbc:sqlserver://" suffix to a general server url * * @param url the string url without the JDBC prefix * @return the url with the added JDBC prefix */ def createJDBCUrl(url: String): String = SqlDBConfig.JDBCUrlPrefix + url /** * Gets a JDBC connection based on Config properties * * @param config any read or write Config * @return a JDBC Connection */ def getConnection(config: Config): Connection = { Class.forName(SqlDBConfig.SQLjdbcDriver) DriverManager.getConnection( createJDBCUrl(config.get[String](SqlDBConfig.URL).get), createConnectionProperties(config)) } /** * Retrieves the DBTable or QueryCustom specified in the config. * NOTE: only one property can exist within config. * * @param config the Config object with specified properties. * @return The specified DBTable or QueryCustom */ def getTableOrQuery(config: Config): String = { config.get[String](SqlDBConfig.DBTable).getOrElse( getQueryCustom(config.get[String](SqlDBConfig.QueryCustom).get) ) } /** * The JDBC driver requires parentheses and a temp variable around any custom queries. * This adds the required syntax so users only need to specify the query. * * @param query the default query * @return the syntactically correct query to be executed by the JDBC driver. */ def getQueryCustom(query: String): String = s"($query) QueryCustom" }
Example 100
Source File: ConnectionUtilsSpec.scala From azure-sqldb-spark with MIT License | 5 votes |
package com.microsoft.azure.sqldb.spark.connect import java.util.Properties import com.microsoft.azure.sqldb.spark.SqlDBSpark import com.microsoft.azure.sqldb.spark.config.Config class ConnectionUtilsSpec extends SqlDBSpark { "createConnectionProperties" should "return all properties in configuration in a Properties object" in { val url = "mssql.database.windows.net" val database = "MyDatabase" val user = "admin" val password = "password" val dbTable = "dbo.Customers" val config = Config(Map( "url" -> url, "databaseName" -> database, "user" -> user, "password" -> password, "dbTable" -> dbTable )) val controlProperties = new Properties controlProperties.put("url", url.toLowerCase) controlProperties.put("databasename", database.toLowerCase) controlProperties.put("user", user.toLowerCase) controlProperties.put("password", password.toLowerCase) controlProperties.put("dbtable", dbTable.toLowerCase) val testProperties = ConnectionUtils.createConnectionProperties(config) Seq(testProperties.keySet()) should contain theSameElementsAs Seq(controlProperties.keySet()) } "createJDBCUrl" should "return the server url with jdbc prefix" in { val url = "mssql.database.windows.net" ConnectionUtils.createJDBCUrl(url) should be ("jdbc:sqlserver://" + url) } "getQueryCustom" should "return original query in parenthesis" in { val query = "SELECT * FROM MYTABLE" ConnectionUtils.getQueryCustom(query) should be ("(" + query + ") QueryCustom") } "getTableOrQuery" should "return appropriate table or query from a config object" in { val dbTable = "dbo.Customers" val tableConfig = Config(Map( "url" -> "mssql.database.windows.net", "databaseName" -> "MyDatabase", "user" -> "admin", "password" -> "password", "dbTable" -> dbTable )) ConnectionUtils.getTableOrQuery(tableConfig) should be (dbTable) val queryCustom = "SELECT * FROM dbo.Customers" val queryConfig = Config(Map( "url" -> "mssql.database.windows.net", "databaseName" -> "MyDatabase", "user" -> "admin", "password" -> "password", "QueryCustom" -> queryCustom )) ConnectionUtils.getTableOrQuery(queryConfig) should be (ConnectionUtils.getQueryCustom(queryCustom)) } }
Example 101
Source File: WriteSample.scala From azure-sqldb-spark with MIT License | 5 votes |
// Import libraries import com.microsoft.azure.sqldb.spark.config.Config import com.microsoft.azure.sqldb.spark.connect._ val url = "[Enter your url here]" val databaseName = "[Enter your database name here]" val dbTable = "[Enter your database table here]" val user = "[Enter your username here]" val password = "[Enter your password here]" // Acquire data to be written. // df could be aquired in any way. val localTable = "[Enter your local persisted table here]" val df = spark.sql(s"SELECT * FROM $localTable") // WRITE FROM CONFIG val writeConfig = Config(Map( "url" -> url, "databaseName" -> databaseName, "dbTable" -> dbTable, "user" -> user, "password" -> password, "connectTimeout" -> "5", "queryTimeout" -> "5" )) df.write.mode(SaveMode.Append).sqlDB(writeConfig)
Example 102
Source File: ReadSample.scala From azure-sqldb-spark with MIT License | 5 votes |
// Import libraries import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata import com.microsoft.azure.sqldb.spark.config.Config import com.microsoft.azure.sqldb.spark.connect._ val url = "[Enter your url here]" val databaseName = "[Enter your database name here]" val dbTable = "[Enter your database table here]" val user = "[Enter your username here]" val password = "[Enter your password here]" // READ FROM CONFIG val readConfig = Config(Map( "url" -> url, "databaseName" -> databaseName, "user" -> user, "password" -> password, "connectTimeout" -> "5", "queryTimeout" -> "5", "dbTable" -> dbTable )) val df = sqlContext.read.sqlDB(readConfig) println("Total rows: " + df.count) df.show() // TRADITIONAL SYNTAX import java.util.Properties val properties = new Properties() properties.put("databaseName", databaseName) properties.put("user", user) properties.put("password", password) properties.put("connectTimeout", "5") properties.put("queryTimeout", "5") val df = sqlContext.read.sqlDB(url, dbTable, properties) println("Total rows: " + df.count) df.show()
Example 103
Source File: SchemaRegistryService.scala From kafka-testing with Apache License 2.0 | 5 votes |
package com.landoop.kafka.testing import java.net.{Socket, SocketException} import java.util.Properties import com.typesafe.scalalogging.StrictLogging import io.confluent.kafka.schemaregistry.avro.AvroCompatibilityLevel import io.confluent.kafka.schemaregistry.client.rest.RestService import io.confluent.kafka.schemaregistry.rest.{SchemaRegistryConfig, SchemaRegistryRestApplication} import io.confluent.kafka.schemaregistry.storage.{SchemaRegistry, SchemaRegistryIdentity} import org.eclipse.jetty.server.Server class SchemaRegistryService(val port: Int, val zookeeperConnection: String, val kafkaTopic: String, val avroCompatibilityLevel: AvroCompatibilityLevel, val masterEligibility: Boolean) extends StrictLogging { private val app = new SchemaRegistryRestApplication({ val prop = new Properties prop.setProperty("port", port.asInstanceOf[Integer].toString) prop.setProperty(SchemaRegistryConfig.KAFKASTORE_CONNECTION_URL_CONFIG, zookeeperConnection) prop.put(SchemaRegistryConfig.KAFKASTORE_TOPIC_CONFIG, kafkaTopic) prop.put(SchemaRegistryConfig.COMPATIBILITY_CONFIG, avroCompatibilityLevel.toString) prop.put(SchemaRegistryConfig.MASTER_ELIGIBILITY, masterEligibility.asInstanceOf[AnyRef]) prop }) val restServer = startServer(port) var Endpoint: String = getEndpoint(restServer) val restClient = new RestService(Endpoint) def startServer(port: Int, retries: Int = 5): Option[Server] = { var retry = retries > 0 var restServer: Option[Server] = None if (retry) { if (isPortInUse(port)) { logger.info(s"Schema Registry Port $port is already in use") Thread.sleep(2000) startServer(port, retries - 1) } else { restServer = Some(app.createServer) restServer.get.start() } } restServer } def getEndpoint(restServer: Option[Server]): String = { if (restServer.isDefined) { val uri = restServer.get.getURI.toString if (uri.endsWith("/")) { uri.substring(0, uri.length - 1) } else { uri } } else "" } private def isPortInUse(port: Integer): Boolean = try { new Socket("127.0.0.1", port).close() true } catch { case e: SocketException => false } def close() { if (restServer.isDefined) { restServer.get.stop() restServer.get.join() } } def isMaster: Boolean = app.schemaRegistry.isMaster def setMaster(schemaRegistryIdentity: SchemaRegistryIdentity): Unit = app.schemaRegistry.setMaster(schemaRegistryIdentity) def myIdentity: SchemaRegistryIdentity = app.schemaRegistry.myIdentity def masterIdentity: SchemaRegistryIdentity = app.schemaRegistry.masterIdentity def schemaRegistry: SchemaRegistry = app.schemaRegistry }
Example 104
Source File: ConnectSamples.scala From kafka-testing with Apache License 2.0 | 5 votes |
package com.landoop.kafka.testing import java.util import java.util.Properties import org.apache.kafka.connect.runtime.distributed.DistributedConfig import org.apache.kafka.connect.runtime.{ConnectorConfig, WorkerConfig} import scala.collection.JavaConverters._ object ConnectSamples { def workerConfig(bootstapServers: String, schemaRegistryUrl: String): util.Map[String, AnyRef] = Map( DistributedConfig.GROUP_ID_CONFIG -> "testing-group-id", WorkerConfig.BOOTSTRAP_SERVERS_CONFIG -> bootstapServers, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter", WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter", WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG -> "com.qubole.streamx.ByteArrayConverter", DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG -> "connect-offsets", DistributedConfig.CONFIG_TOPIC_CONFIG -> "connect-configs", DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG -> "connect-status", WorkerConfig.INTERNAL_KEY_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter", WorkerConfig.INTERNAL_VALUE_CONVERTER_CLASS_CONFIG -> "org.apache.kafka.connect.json.JsonConverter", "schema.registry.url" -> schemaRegistryUrl ).asInstanceOf[Map[String, AnyRef]].asJava val sourceConfig: util.Map[String, AnyRef] = Map( ConnectorConfig.NAME_CONFIG -> "file-source-connector", ConnectorConfig.CONNECTOR_CLASS_CONFIG -> "org.apache.kafka.connect.file.FileStreamSourceConnector", ConnectorConfig.TASKS_MAX_CONFIG -> "1", "topic" -> "file-topic", "file" -> "/var/log/*" ).asInstanceOf[Map[String, AnyRef]].asJava def workerProperties(bootstapServers: String, schemaRegistryUrl: String): Properties = { val props = new Properties() props.putAll(workerConfig(bootstapServers, schemaRegistryUrl)) props } val sourceProperties: Properties = { val props = new Properties() props.putAll(sourceConfig) props } }
Example 105
Source File: Config.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package net.elodina.mesos.zipkin import java.io.{File, FileInputStream} import java.net.URI import java.util.Properties import net.elodina.mesos.zipkin.utils.{BindAddress, Period} object Config { val DEFAULT_FILE = new File("zipkin-mesos.properties") var debug: Boolean = false var genTraces: Boolean = false var storage: String = "file:zipkin-mesos.json" var master: Option[String] = None var principal: Option[String] = None var secret: Option[String] = None var user: Option[String] = None var frameworkName: String = "zipkin" var frameworkRole: String = "*" var frameworkTimeout: Period = new Period("30d") var log: Option[File] = None var api: Option[String] = None var bindAddress: Option[BindAddress] = None def apiPort: Int = { val port = new URI(getApi).getPort if (port == -1) 80 else port } def replaceApiPort(port: Int): Unit = { val prev: URI = new URI(getApi) api = Some("" + new URI( prev.getScheme, prev.getUserInfo, prev.getHost, port, prev.getPath, prev.getQuery, prev.getFragment )) } def getApi: String = { api.getOrElse(throw new Error("api not initialized")) } def getMaster: String = { master.getOrElse(throw new Error("master not initialized")) } def getZk: String = { master.getOrElse(throw new Error("zookeeper not initialized")) } private[zipkin] def loadFromFile(file: File): Unit = { val props: Properties = new Properties() val stream: FileInputStream = new FileInputStream(file) props.load(stream) stream.close() if (props.containsKey("debug")) debug = java.lang.Boolean.valueOf(props.getProperty("debug")) if (props.containsKey("genTraces")) genTraces = java.lang.Boolean.valueOf(props.getProperty("genTraces")) if (props.containsKey("storage")) storage = props.getProperty("storage") if (props.containsKey("master")) master = Some(props.getProperty("master")) if (props.containsKey("user")) user = Some(props.getProperty("user")) if (props.containsKey("principal")) principal = Some(props.getProperty("principal")) if (props.containsKey("secret")) secret = Some(props.getProperty("secret")) if (props.containsKey("framework-name")) frameworkName = props.getProperty("framework-name") if (props.containsKey("framework-role")) frameworkRole = props.getProperty("framework-role") if (props.containsKey("framework-timeout")) frameworkTimeout = new Period(props.getProperty("framework-timeout")) if (props.containsKey("log")) log = Some(new File(props.getProperty("log"))) if (props.containsKey("api")) api = Some(props.getProperty("api")) if (props.containsKey("bind-address")) bindAddress = Some(new BindAddress(props.getProperty("bind-address"))) } override def toString: String = { s""" |debug: $debug, storage: $storage |mesos: master=$master, user=${if (user.isEmpty || user.get.isEmpty) "<default>" else user} |principal=${principal.getOrElse("<none>")}, secret=${if (secret.isDefined) "*****" else "<none>"} |framework: name=$frameworkName, role=$frameworkRole, timeout=$frameworkTimeout |api: $api, bind-address: ${bindAddress.getOrElse("<all>")}, genTraces: $genTraces """.stripMargin.trim } }
Example 106
Source File: GenZipkinTraces.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package net.elodina.mesos.zipkin import java.util.Properties import com.github.kristofa.brave.TraceInfo import org.junit.Test import com.github.kristofa.brave.KafkaZipkinTracing._ class GenZipkinTraces { lazy val KAFKA_BROKER = Option(System.getenv("KAFKA_BROKER")).getOrElse("localhost:9092") lazy val KAFKA_TOPIC = Option(System.getenv("KAFKA_TOPIC")).getOrElse("zipkin") @Test def generate(): Unit = { //Setting producer type to sync in this test, as we don't bother about throughput here, just want all messages sent val kafkaProps = new Properties() kafkaProps.setProperty("producer.type", "sync") initTracing(KAFKA_BROKER, "serviceFoo", Some(KAFKA_TOPIC), kafkaProps = kafkaProps) var traceInfo: Option[TraceInfo] = None // Logging fire and forget request withClientTracer { ct => Option(ct.startNewSpan("someRequest")).foreach { traceIds => // The below line emulates adding trace info to RPC call traceInfo = Some(TraceInfo(Some(traceIds.getTraceId), Some(traceIds.getSpanId), sampled = true)) ct.setClientSent() submitClientTracer() } } // Emulate RPC call try { Thread.sleep(1500) } catch { case e: InterruptedException => //ignore } // Creating new tracing for server side initTracing(KAFKA_BROKER, "serviceBar", Some(KAFKA_TOPIC), kafkaProps = kafkaProps) // Here actual servers should parse TraceInfo instances from an incoming request initServerFromTraceInfo(traceInfo) withServerTracer { st => st.setServerReceived() } // Now receiving service is making request of his own withClientTracer { ct => Option(ct.startNewSpan("otherRequest")).foreach { traceIds => ct.setClientSent() submitClientTracer() } } withServerTracer { st => st.submitAnnotation("Completed Processing") } submitServerTracer() } }
Example 107
Source File: MailAPI.scala From SqlShift with MIT License | 5 votes |
package com.goibibo.sqlshift.alerting import java.util.Properties import javax.mail.Message.RecipientType import javax.mail.internet.{InternetAddress, MimeMessage, _} import javax.mail.{Authenticator, PasswordAuthentication, Session, Transport} import com.goibibo.sqlshift.models.Configurations.AppConfiguration import com.goibibo.sqlshift.models.Params.MailParams import org.slf4j.{Logger, LoggerFactory} class MailAPI(mailParams: MailParams) { private val logger: Logger = LoggerFactory.getLogger(classOf[MailAPI]) private val prop = new Properties() { put("mail.smtp.host", mailParams.host) put("mail.smtp.port", mailParams.port.toString) } private val session: Session = mailParams.password match { case Some(password) => prop.setProperty("mail.smtp.auth", "true") Session.getDefaultInstance(prop, new Authenticator { override def getPasswordAuthentication: PasswordAuthentication = { new PasswordAuthentication(mailParams.username, password) } }) case None => Session.getDefaultInstance(prop) } def send(appConfs: List[AppConfiguration]): Unit = { val from = "[email protected]" logger.info("Mail from: {}", from) var subject = "SQLShift:" var text = "<html>" + "<body>" + "<table border='1' style='width:100%' bgcolor='#F5F5F5'>" + "<tr> <th size=6>Mysql schema</th>" + "<th size=6>Mysql table_name</th>" + "<th size=6>Redshift schema</th>" + "<th size=6>Status</th>" + "<th size=6>Migration Time(sec)</th>" + "<th size=6>Error</th></tr>" logger.info(s"Mail to: '${mailParams.to}' and cc: '${mailParams.cc}'") val tos: List[String] = mailParams.to.split(",").toList var ccs: List[String] = List() if (mailParams.cc != "") ccs = mailParams.cc.split(",").toList var errorCnt = 0 var successCnt = 0 for (appConf <- appConfs) { text += "<tr>" + "<td bgcolor='#FFE4C4'>" + appConf.mysqlConf.db + "</td>" + "<td bgcolor='#E0FFFF'>" + appConf.mysqlConf.tableName + "</td>" + "<td bgcolor='#F5F5DC'>" + appConf.redshiftConf.schema + "</td>" + "<td bgcolor='#E0FFFF'>" + appConf.status.get.isSuccessful + "</td>" + "<td bgcolor='#E0FFFF'>" + appConf.migrationTime.get + "</td>" if (appConf.status.get.isSuccessful) { successCnt += 1 } else { text += "<td bgcolor='#F0FFFF'>%s\n%s</td></tr>" .format(appConf.status.get.e.getMessage, appConf.status.get.e.getStackTrace.mkString("\n")) errorCnt += 1 } } subject += " Failed " + errorCnt.toString + " Success " + successCnt.toString + mailParams.subject text += "</table></body></html>" logger.info("Subject: {}", subject) val message = new MimeMessage(session) message.setFrom(new InternetAddress(from)) for (to <- tos) message.addRecipient(RecipientType.TO, new InternetAddress(to)) for (cc <- ccs) message.addRecipient(RecipientType.CC, new InternetAddress(cc)) message.setSubject(subject) message.setText(text) val mimeBdyPart = new MimeBodyPart() mimeBdyPart.setContent(text, "text/html; charset=utf-8") val multiPart = new MimeMultipart() logger.info("Sending message...") multiPart.addBodyPart(mimeBdyPart) message.setContent(multiPart) Transport.send(message) } }
Example 108
Source File: MailUtil.scala From SqlShift with MIT License | 5 votes |
package com.goibibo.sqlshift.alerting import java.util.Properties import com.goibibo.sqlshift.models.Params.MailParams import scala.collection.JavaConverters._ object MailUtil { def getMailParams(prop: Properties): MailParams = { val scalaProp = prop.asScala var mailParams: MailParams = MailParams(host = scalaProp("alert.host"), username = null, to = scalaProp.getOrElse("alert.to", ""), cc = scalaProp.getOrElse("alert.cc", ""), subject = scalaProp.getOrElse("alert.subject", "") ) mailParams = scalaProp.get("alert.port") match { case Some(port) => mailParams.copy(port = port.toInt) case None => mailParams } mailParams = scalaProp.get("alert.username") match { case Some(username) => mailParams.copy(username = username) case None => mailParams } mailParams = scalaProp.get("alert.password") match { case Some(pass) => mailParams.copy(password = Some(pass)) case None => mailParams } mailParams } }
Example 109
Source File: MySQLUtil.scala From SqlShift with MIT License | 5 votes |
package com.goibibo.sqlshift import java.net.URL import java.sql.{Connection, DriverManager} import java.util.Properties import com.typesafe.config.Config import org.slf4j.{Logger, LoggerFactory} import scala.io.Source object MySQLUtil { private val logger: Logger = LoggerFactory.getLogger(this.getClass) private def getMySQLConnection(config: Config): Connection = { val mysql = config.getConfig("mysql") val connectionProps = new Properties() connectionProps.put("user", mysql.getString("username")) connectionProps.put("password", mysql.getString("password")) val jdbcUrl = s"jdbc:mysql://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("db")}?createDatabaseIfNotExist=true&useSSL=false" Class.forName("com.mysql.jdbc.Driver") DriverManager.getConnection(jdbcUrl, connectionProps) } def createTableAndInsertRecords(config: Config, tableName: String, psvFile: URL): Unit = { logger.info("Inserting records in table: {}", tableName) val records = Source.fromFile(psvFile.toURI).getLines().toList.drop(1) // removing header val conn = getMySQLConnection(config) val statement = conn.createStatement() try { val tableCreateQuery = config.getString("table.tableCreateQuery").replace("${tableName}", tableName) logger.info("Running query: {}", tableCreateQuery) statement.executeUpdate(tableCreateQuery) val insertIntoQuery = config.getString("table.insertIntoQuery").replace("${tableName}", tableName) logger.info("Running query: {}", insertIntoQuery) records.foreach { record: String => val columns = record.split("\\|") val query = insertIntoQuery.format(columns: _*) statement.executeUpdate(query) } } finally { statement.close() conn.close() } } }
Example 110
Source File: SparkNRedshiftUtil.scala From SqlShift with MIT License | 5 votes |
package com.goibibo.sqlshift import java.sql.{Connection, DriverManager} import java.util.Properties import com.databricks.spark.redshift.RedshiftReaderM import com.typesafe.config.Config import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, Suite} import org.slf4j.{Logger, LoggerFactory} trait SparkNRedshiftUtil extends BeforeAndAfterAll { self: Suite => private val logger: Logger = LoggerFactory.getLogger(this.getClass) @transient private var _sc: SparkContext = _ @transient private var _sqlContext: SQLContext = _ def sc: SparkContext = _sc def sqlContext: SQLContext = _sqlContext private def getRedshiftConnection(config: Config): Connection = { val mysql = config.getConfig("redshift") val connectionProps = new Properties() connectionProps.put("user", mysql.getString("username")) connectionProps.put("password", mysql.getString("password")) val jdbcUrl = s"jdbc:redshift://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("database")}?useSSL=false" Class.forName("com.amazon.redshift.jdbc4.Driver") DriverManager.getConnection(jdbcUrl, connectionProps) } val getSparkContext: (SparkContext, SQLContext) = { val sparkConf: SparkConf = new SparkConf().setAppName("Full Dump Testing").setMaster("local") val sc: SparkContext = new SparkContext(sparkConf) val sqlContext: SQLContext = new SQLContext(sc) System.setProperty("com.amazonaws.services.s3.enableV4", "true") sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.ap-south-1.amazonaws.com") sc.hadoopConfiguration.set("fs.s3a.fast.upload", "true") (sc, sqlContext) } def readTableFromRedshift(config: Config, tableName: String): DataFrame = { val redshift: Config = config.getConfig("redshift") val options = Map("dbtable" -> tableName, "user" -> redshift.getString("username"), "password" -> redshift.getString("password"), "url" -> s"jdbc:redshift://${redshift.getString("hostname")}:${redshift.getInt("portno")}/${redshift.getString("database")}", "tempdir" -> config.getString("s3.location"), "aws_iam_role" -> config.getString("redshift.iamRole") ) RedshiftReaderM.getDataFrameForConfig(options, sc, sqlContext) } def dropTableRedshift(config: Config, tables: String*): Unit = { logger.info("Droping table: {}", tables) val conn = getRedshiftConnection(config) val statement = conn.createStatement() try { val dropTableQuery = s"""DROP TABLE ${tables.mkString(",")}""" logger.info("Running query: {}", dropTableQuery) statement.executeUpdate(dropTableQuery) } finally { statement.close() conn.close() } } override protected def beforeAll(): Unit = { super.beforeAll() val (sc, sqlContext) = getSparkContext _sc = sc _sqlContext = sqlContext } override protected def afterAll(): Unit = { super.afterAll() _sc.stop() } }
Example 111
Source File: SimpleProducer.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.producer import java.util.{Properties} import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer} object SimpleProducer extends App{ val topic = "sample_topic" private val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String,String](props) try { for(i <- 0 to 10) { producer.send(new ProducerRecord[String, String](topic, "title "+i.toString,"data from topic")) println(s"Sent: $i") } println("Message sent successfully") producer.close() } catch { case ex: Exception => ex.printStackTrace() } }
Example 112
Source File: package.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example import java.util.Properties import org.apache.kafka.common.serialization.StringSerializer package object writer { val topic = "first_topic" val numbersProducerConfig = Map( "bootstrap.servers" -> "127.0.0.1:9092", "key.serializer" -> classOf[StringSerializer].getName, "value.serializer" -> classOf[StringSerializer].getName ) implicit def buildPropertiesFromMap(properties: Map[String, String]): Properties = (new Properties /: properties) { case (a, (k, v)) => a.put(k,v) a } def setupLogging(): Unit = { import org.apache.log4j.{Level, Logger} val rootLogger = Logger.getRootLogger rootLogger.setLevel(Level.ERROR) } }
Example 113
Source File: KafkaMessageSender.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.scala.kafka def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 114
Source File: PropertiesApi.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.iep.archaius import java.io.StringWriter import java.util.Properties import akka.actor.ActorRefFactory import akka.http.scaladsl.model.HttpCharsets import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.model.HttpEntity import akka.http.scaladsl.model.HttpRequest import akka.http.scaladsl.model.HttpResponse import akka.http.scaladsl.model.MediaTypes import akka.http.scaladsl.model.StatusCodes import akka.http.scaladsl.server.Route import com.netflix.atlas.akka.CustomDirectives._ import com.netflix.atlas.akka.WebApi import com.netflix.atlas.json.Json import com.netflix.frigga.Names class PropertiesApi( val propContext: PropertiesContext, implicit val actorRefFactory: ActorRefFactory ) extends WebApi { def routes: Route = { endpointPath("api" / "v1" / "property") { get { parameter("asg") { asg => extractRequest { request => val cluster = Names.parseName(asg).getCluster if (propContext.initialized) { val props = propContext.getClusterProps(cluster) complete(encode(request, props)) } else { complete(HttpResponse(StatusCodes.ServiceUnavailable)) } } } } } } private def encode(request: HttpRequest, props: List[PropertiesApi.Property]): HttpResponse = { val useJson = request.headers.exists(h => h.is("accept") && h.value == "application/json") if (useJson) { HttpResponse( StatusCodes.OK, entity = HttpEntity(MediaTypes.`application/json`, Json.encode(props)) ) } else { val ps = new Properties props.foreach { p => ps.setProperty(p.key, p.value) } val writer = new StringWriter() ps.store(writer, s"count: ${ps.size}") writer.close() val entity = HttpEntity(MediaTypes.`text/plain`.toContentType(HttpCharsets.`UTF-8`), writer.toString) HttpResponse(StatusCodes.OK, entity = entity) } } } object PropertiesApi { case class Property(id: String, cluster: String, key: String, value: String, timestamp: Long) }
Example 115
Source File: PropertiesApiSuite.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.iep.archaius import java.io.StringReader import java.util.Properties import akka.http.scaladsl.model.HttpResponse import akka.http.scaladsl.model.MediaTypes import akka.http.scaladsl.model.StatusCode import akka.http.scaladsl.model.StatusCodes import akka.http.scaladsl.model.headers._ import akka.http.scaladsl.testkit.RouteTestTimeout import akka.http.scaladsl.testkit.ScalatestRouteTest import com.netflix.atlas.akka.RequestHandler import com.netflix.atlas.json.Json import com.netflix.spectator.api.DefaultRegistry import com.netflix.spectator.api.ManualClock import org.scalatest.funsuite.AnyFunSuite class PropertiesApiSuite extends AnyFunSuite with ScalatestRouteTest { import scala.concurrent.duration._ implicit val routeTestTimeout = RouteTestTimeout(5.second) val clock = new ManualClock() val registry = new DefaultRegistry(clock) val propContext = new PropertiesContext(registry) val endpoint = new PropertiesApi(propContext, system) val routes = RequestHandler.standardOptions(endpoint.routes) private def assertJsonContentType(response: HttpResponse): Unit = { assert(response.entity.contentType.mediaType === MediaTypes.`application/json`) } private def assertResponse(response: HttpResponse, expected: StatusCode): Unit = { assert(response.status === expected) assertJsonContentType(response) } test("no asg") { Get("/api/v1/property") ~> routes ~> check { assert(response.status === StatusCodes.BadRequest) } } test("empty") { propContext.update(Nil) Get("/api/v1/property?asg=foo-main-v001") ~> addHeader(Accept(MediaTypes.`application/json`)) ~> routes ~> check { assertResponse(response, StatusCodes.OK) assert(responseAs[String] === "[]") } } test("properties response") { propContext.update( List( PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L), PropertiesApi.Property("foo-main::1", "foo-main", "1", "2", 12345L), PropertiesApi.Property("bar-main::c", "bar-main", "c", "d", 12345L) ) ) Get("/api/v1/property?asg=foo-main-v001") ~> routes ~> check { assert(response.status === StatusCodes.OK) val props = new Properties props.load(new StringReader(responseAs[String])) assert(props.size === 2) assert(props.getProperty("a") === "b") assert(props.getProperty("1") === "2") } } test("json response") { propContext.update( List( PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L) ) ) Get("/api/v1/property?asg=foo-main-v001") ~> addHeader(Accept(MediaTypes.`application/json`)) ~> routes ~> check { assertResponse(response, StatusCodes.OK) val props = Json.decode[List[PropertiesApi.Property]](responseAs[String]) assert(props === List(PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L))) } } }
Example 116
Source File: SchemaRegistryOps.scala From embedded-kafka-schema-registry with MIT License | 5 votes |
package net.manub.embeddedkafka.schemaregistry.ops import java.net.{ServerSocket, URI} import java.util.Properties import io.confluent.kafka.schemaregistry.rest.{ SchemaRegistryConfig, SchemaRegistryRestApplication } import io.confluent.rest.RestConfig import net.manub.embeddedkafka.EmbeddedServer import net.manub.embeddedkafka.ops.RunningServersOps import net.manub.embeddedkafka.schemaregistry.{EmbeddedKafkaConfig, EmbeddedSR} import scala.jdk.CollectionConverters._ def stopSchemaRegistry(): Unit = runningServers.stopAndRemove(isEmbeddedSR) private[embeddedkafka] def isEmbeddedSR(server: EmbeddedServer): Boolean = server.isInstanceOf[EmbeddedSR] private[embeddedkafka] def schemaRegistryPort( restApp: SchemaRegistryRestApplication ): Int = { val listeners = restApp.getConfiguration.originalProperties .getProperty(RestConfig.LISTENERS_CONFIG) URI.create(listeners).getPort } }
Example 117
Source File: package.scala From aloha with MIT License | 5 votes |
package com.eharmony import java.util.Properties import org.apache.commons.io.IOUtils import org.apache.commons.vfs2.VFS package object aloha { def pkgName = getClass.getPackage.getName def version: String = _version private[this] lazy val _version: String = { val is = VFS.getManager.resolveFile("res:" + pkgName.replaceAll("\\.", "/") + "/version.properties").getContent.getInputStream try { val p = new Properties() p.load(is) p.getProperty("aloha.version") } finally { IOUtils closeQuietly is } } }
Example 118
Source File: SparkImplicits.scala From apache-spark-test with Apache License 2.0 | 5 votes |
package com.github.dnvriend.spark.datasources import java.util.Properties import akka.NotUsed import akka.stream.Materializer import akka.stream.scaladsl.{ Sink, Source } import org.apache.spark.sql._ import org.apache.spark.sql.streaming.DataStreamReader import scala.collection.immutable._ import scala.concurrent.duration.{ FiniteDuration, _ } import scala.concurrent.{ Await, Future } import scala.reflect.runtime.universe._ import slick.driver.PostgresDriver.api._ object SparkImplicits { implicit class DataSourceOps(dfr: DataFrameReader) { def helloworld(path: String): DataFrame = dfr.format("helloworld").load(path) def person(path: String): DataFrame = dfr.format("person").load(path) def jdbc(table: String)(implicit jdbcOptions: Map[String, String]): DataFrame = dfr.format("jdbc").options(jdbcOptions ++ Map("dbtable" -> table)).load() } implicit class DataStreamReaderOps(dsr: DataStreamReader) { def currentPersistenceIds(path: String = "jdbc-read-journal"): DataFrame = dsr.format("current-persistence-id").load(path) def eventsByPersistenceId(path: String = "jdbc-read-journal"): DataFrame = dsr.format("current-events-by-persistence-id").load(path) } implicit class DataFrameWriterOps[T](dfw: DataFrameWriter[T]) { def ignore = dfw.mode(SaveMode.Ignore) def jdbc(table: String)(implicit jdbcOptions: Map[String, String]) = { val properties = jdbcOptions.foldLeft(new Properties) { case (prop, (k, v)) => prop.put(k, v); prop } dfw.jdbc(jdbcOptions("url"), table, properties) // does not (yet) work see: https://issues.apache.org/jira/browse/SPARK-7646 // dfw.format("jdbc").mode(SaveMode.Overwrite).options(jdbcOptions ++ Map("dbtable" -> table)) } } trait DataFrameQueryGenerator[A] { def upsert: String } implicit class DatasetOps(df: DataFrame) { def withSession[A](db: Database)(f: Session => A): A = { val session = db.createSession() try f(session) finally session.close() } def withStatement[A](db: Database)(f: java.sql.Statement => A): A = withSession(db)(session ⇒ session.withStatement()(f)) def upsert[A](table: String)(implicit db: Database, dfq: DataFrameQueryGenerator[A]): DataFrame = withStatement(db) { stmt => stmt.executeUpdate(dfq.upsert) df } } implicit class SparkSessionOps(spark: SparkSession) { def fromFuture[A <: Product: TypeTag](data: Future[Seq[A]])(implicit _timeout: FiniteDuration = null): DataFrame = spark.createDataFrame(Await.result(data, Option(_timeout).getOrElse(15.minutes))) def fromSource[A <: Product: TypeTag](data: Source[A, NotUsed])(implicit _timeout: FiniteDuration = null, mat: Materializer): DataFrame = fromFuture(data.runWith(Sink.seq)) } }
Example 119
Source File: MessageListener.scala From model-serving-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.modelserving.client import java.time.Duration import java.util.Properties import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer} import org.apache.kafka.common.serialization.ByteArrayDeserializer class MessageListener[K, V]( brokers: String, topic: String, group: String, keyDeserealizer: String, valueDeserealizer: String, processor: RecordProcessorTrait[K, V]) extends Runnable { import MessageListener._ import scala.collection.JavaConverters._ val consumer = new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserealizer, valueDeserealizer)) consumer.subscribe(Seq(topic).asJava) var completed = false def complete(): Unit = { completed = true } override def run(): Unit = { while (!completed) { val records = consumer.poll(Duration.ofMillis(100)).asScala for (record <- records) { processor.processRecord(record) } } consumer.close() System.out.println("Listener completes") } def start(): Unit = { val t = new Thread(this) t.start() } }
Example 120
Source File: KafkaMessageSender.scala From model-serving-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.modelserving.client import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.apache.kafka.common.serialization.ByteArraySerializer class MessageSender(val brokers: String) { import MessageSender._ val producer = new KafkaProducer[Array[Byte], Array[Byte]]( providerProperties(brokers, classOf[ByteArraySerializer].getName, classOf[ByteArraySerializer].getName)) def writeKeyValue(topic: String, key: Array[Byte], value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, key, value)).get producer.flush() } def writeValue(topic: String, value: Array[Byte]): Unit = { val result = producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get producer.flush() } def batchWriteValue(topic: String, batch: Seq[Array[Byte]]): Seq[RecordMetadata] = { val result = batch.map(value => producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, null.asInstanceOf[Array[Byte]], value)).get) producer.flush() result } def close(): Unit = { producer.close() } }
Example 121
Source File: KafkaReadWrite.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.examples.kafka.dsl import java.util.Properties import org.apache.gearpump.cluster.client.ClientContext import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption} import org.apache.gearpump.streaming.dsl.scalaapi.StreamApp import org.apache.gearpump.streaming.kafka.KafkaStoreFactory import org.apache.gearpump.streaming.kafka.dsl.KafkaDSL import org.apache.gearpump.streaming.kafka.dsl.KafkaDSL._ import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.util.AkkaApp object KafkaReadWrite extends AkkaApp with ArgumentsParser { override val options: Array[(String, CLIOption[Any])] = Array( "source" -> CLIOption[Int]("<hom many kafka producer tasks>", required = false, defaultValue = Some(1)), "sink" -> CLIOption[Int]("<hom many kafka processor tasks>", required = false, defaultValue = Some(1)), "zookeeperConnect" -> CLIOption[String]("<zookeeper connect string>", required = false, defaultValue = Some("localhost:2181")), "brokerList" -> CLIOption[String]("<broker server list string>", required = false, defaultValue = Some("localhost:9092")), "sourceTopic" -> CLIOption[String]("<kafka source topic>", required = false, defaultValue = Some("topic1")), "sinkTopic" -> CLIOption[String]("<kafka sink topic>", required = false, defaultValue = Some("topic2")), "atLeastOnce" -> CLIOption[Boolean]("<turn on at least once source>", required = false, defaultValue = Some(true)) ) override def main(akkaConf: Config, args: Array[String]): Unit = { val config = parse(args) val sourceNum = config.getInt("source") val sinkNum = config.getInt("sink") val zookeeperConnect = config.getString("zookeeperConnect") val brokerList = config.getString("brokerList") val sourceTopic = config.getString("sourceTopic") val sinkTopic = config.getString("sinkTopic") val atLeastOnce = config.getBoolean("atLeastOnce") val props = new Properties val appName = "KafkaDSL" props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeperConnect) props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList) props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName) val context = ClientContext(akkaConf) val app = StreamApp(appName, context) if (atLeastOnce) { val checkpointStoreFactory = new KafkaStoreFactory(props) KafkaDSL.createAtLeastOnceStream(app, sourceTopic, checkpointStoreFactory, props, sourceNum) .writeToKafka(sinkTopic, props, sinkNum) } else { KafkaDSL.createAtMostOnceStream(app, sourceTopic, props, sourceNum) .writeToKafka(sinkTopic, props, sinkNum) } context.submit(app) context.close() } }
Example 122
Source File: KafkaReadWrite.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.examples.kafka import java.util.Properties import akka.actor.ActorSystem import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.slf4j.Logger import org.apache.gearpump.cluster.UserConfig import org.apache.gearpump.cluster.client.ClientContext import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult} import org.apache.gearpump.streaming.partitioner.ShufflePartitioner import org.apache.gearpump.streaming.StreamApplication import org.apache.gearpump.streaming.kafka._ import org.apache.gearpump.streaming.sink.DataSinkProcessor import org.apache.gearpump.streaming.source.DataSourceProcessor import org.apache.gearpump.util.Graph._ import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil} object KafkaReadWrite extends AkkaApp with ArgumentsParser { private val LOG: Logger = LogUtil.getLogger(getClass) override val options: Array[(String, CLIOption[Any])] = Array( "source" -> CLIOption[Int]("<hom many kafka producer tasks>", required = false, defaultValue = Some(1)), "sink" -> CLIOption[Int]("<hom many kafka processor tasks>", required = false, defaultValue = Some(1)), "zookeeperConnect" -> CLIOption[String]("<zookeeper connect string>", required = false, defaultValue = Some("localhost:2181")), "brokerList" -> CLIOption[String]("<broker server list string>", required = false, defaultValue = Some("localhost:9092")), "sourceTopic" -> CLIOption[String]("<kafka source topic>", required = false, defaultValue = Some("topic1")), "sinkTopic" -> CLIOption[String]("<kafka sink topic>", required = false, defaultValue = Some("topic2")) ) def application(config: ParseResult, system: ActorSystem): StreamApplication = { implicit val actorSystem = system val appName = "KafkaReadWrite" val sourceNum = config.getInt("source") val sinkNum = config.getInt("sink") val zookeeperConnect = config.getString("zookeeperConnect") val brokerList = config.getString("brokerList") val sourceTopic = config.getString("sourceTopic") val sinkTopic = config.getString("sinkTopic") val appConfig = UserConfig.empty val props = new Properties props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeperConnect) props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList) props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName) val source = new KafkaSource(sourceTopic, props) val checkpointStoreFactory = new KafkaStoreFactory(props) source.setCheckpointStore(checkpointStoreFactory) val sourceProcessor = DataSourceProcessor(source, sourceNum) val sink = new KafkaSink(sinkTopic, props) val sinkProcessor = DataSinkProcessor(sink, sinkNum) val partitioner = new ShufflePartitioner val computation = sourceProcessor ~ partitioner ~> sinkProcessor val app = StreamApplication(appName, Graph(computation), appConfig) app } override def main(akkaConf: Config, args: Array[String]): Unit = { val config = parse(args) val context = ClientContext(akkaConf) val appId = context.submit(application(config, context.system)) context.close() } }
Example 123
Source File: KafkaWordCount.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.examples.kafka.wordcount import java.util.Properties import akka.actor.ActorSystem import kafka.api.OffsetRequest import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.slf4j.Logger import org.apache.gearpump.cluster.UserConfig import org.apache.gearpump.cluster.client.ClientContext import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult} import org.apache.gearpump.streaming.partitioner.HashPartitioner import org.apache.gearpump.streaming.kafka._ import org.apache.gearpump.streaming.sink.DataSinkProcessor import org.apache.gearpump.streaming.source.DataSourceProcessor import org.apache.gearpump.streaming.{Processor, StreamApplication} import org.apache.gearpump.util.Graph._ import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil} object KafkaWordCount extends AkkaApp with ArgumentsParser { private val LOG: Logger = LogUtil.getLogger(getClass) override val options: Array[(String, CLIOption[Any])] = Array( "source" -> CLIOption[Int]("<how many kafka source tasks>", required = false, defaultValue = Some(1)), "split" -> CLIOption[Int]("<how many split tasks>", required = false, defaultValue = Some(1)), "sum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)), "sink" -> CLIOption[Int]("<how many kafka sink tasks>", required = false, defaultValue = Some(1)) ) def application(config: ParseResult, system: ActorSystem): StreamApplication = { implicit val actorSystem = system val appName = "KafkaWordCount" val sourceNum = config.getInt("source") val splitNum = config.getInt("split") val sumNum = config.getInt("sum") val sinkNum = config.getInt("sink") val appConfig = UserConfig.empty val props = new Properties props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181") props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092") props.put(KafkaConfig.CONSUMER_START_OFFSET_CONFIG, new java.lang.Long(OffsetRequest.LatestTime)) props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName) val sourceTopic = "topic1" val source = new KafkaSource(sourceTopic, props) val checkpointStoreFactory = new KafkaStoreFactory(props) source.setCheckpointStore(checkpointStoreFactory) val sourceProcessor = DataSourceProcessor(source, sourceNum) val split = Processor[Split](splitNum) val sum = Processor[Sum](sumNum) val sink = new KafkaSink("topic2", props) val sinkProcessor = DataSinkProcessor(sink, sinkNum) val partitioner = new HashPartitioner val computation = sourceProcessor ~ partitioner ~> split ~ partitioner ~> sum ~ partitioner ~> sinkProcessor val app = StreamApplication(appName, Graph(computation), appConfig) app } override def main(akkaConf: Config, args: Array[String]): Unit = { val config = parse(args) val context = ClientContext(akkaConf) val appId = context.submit(application(config, context.system)) context.close() } }
Example 124
Source File: NumericalDataProducer.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.integrationtest.kafka import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer import org.apache.log4j.Logger import org.apache.gearpump.streaming.serializer.ChillSerializer class NumericalDataProducer(topic: String, bootstrapServers: String) { private val LOG = Logger.getLogger(getClass) private val producer = createProducer private val WRITE_SLEEP_NANOS = 10 private val serializer = new ChillSerializer[Int] var lastWriteNum = 0 def start(): Unit = { produceThread.start() } def stop(): Unit = { if (produceThread.isAlive) { produceThread.interrupt() produceThread.join() } producer.close() } def producedNumbers: Range = { Range(1, lastWriteNum + 1) } private def createProducer: KafkaProducer[Array[Byte], Array[Byte]] = { val properties = new Properties() properties.setProperty("bootstrap.servers", bootstrapServers) new KafkaProducer[Array[Byte], Array[Byte]](properties, new ByteArraySerializer, new ByteArraySerializer) } private val produceThread = new Thread(new Runnable { override def run(): Unit = { try { while (!Thread.currentThread.isInterrupted) { lastWriteNum += 1 val msg = serializer.serialize(lastWriteNum) val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, msg) producer.send(record) Thread.sleep(0, WRITE_SLEEP_NANOS) } } catch { case ex: InterruptedException => LOG.error("message producing is stopped by an interrupt") } } }) }
Example 125
Source File: KafkaDSL.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.dsl import java.util.Properties import org.apache.gearpump.cluster.UserConfig import org.apache.gearpump.streaming.dsl.scalaapi.{Stream, StreamApp} import org.apache.gearpump.streaming.kafka.{KafkaSink, KafkaSource} import org.apache.gearpump.streaming.transaction.api.CheckpointStoreFactory object KafkaDSL { def writeToKafka( topic: String, properties: Properties, parallelism: Int = 1, userConfig: UserConfig = UserConfig.empty, description: String = "KafkaSink"): Stream[T] = { stream.sink(new KafkaSink(topic, properties), parallelism, userConfig, description) } }
Example 126
Source File: AbstractKafkaSink.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.sink import java.util.Properties import org.apache.gearpump.Message import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.gearpump.streaming.sink.DataSink import org.apache.gearpump.streaming.task.TaskContext import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer object AbstractKafkaSink { private val LOG = LogUtil.getLogger(classOf[AbstractKafkaSink]) val producerFactory = new KafkaProducerFactory { override def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] = { new KafkaProducer[Array[Byte], Array[Byte]](config.getProducerConfig, new ByteArraySerializer, new ByteArraySerializer) } } trait KafkaProducerFactory extends java.io.Serializable { def getKafkaProducer(config: KafkaConfig): KafkaProducer[Array[Byte], Array[Byte]] } } abstract class AbstractKafkaSink private[kafka]( topic: String, props: Properties, kafkaConfigFactory: KafkaConfigFactory, factory: KafkaProducerFactory) extends DataSink { import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink._ def this(topic: String, props: Properties) = { this(topic, props, new KafkaConfigFactory, AbstractKafkaSink.producerFactory) } private lazy val config = kafkaConfigFactory.getKafkaConfig(props) // Lazily construct producer since KafkaProducer is not serializable private lazy val producer = factory.getKafkaProducer(config) override def open(context: TaskContext): Unit = { LOG.info("KafkaSink opened") } override def write(message: Message): Unit = { message.value match { case (k: Array[Byte], v: Array[Byte]) => val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, k, v) producer.send(record) LOG.debug("KafkaSink sent record {} to Kafka", record) case v: Array[Byte] => val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, v) producer.send(record) LOG.debug("KafkaSink sent record {} to Kafka", record) case m => val errorMsg = s"unexpected message type ${m.getClass}; " + s"Array[Byte] or (Array[Byte], Array[Byte]) required" LOG.error(errorMsg) } } override def close(): Unit = { producer.close() LOG.info("KafkaSink closed") } }
Example 127
Source File: KafkaStore.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.lib.store import java.util.Properties import com.twitter.bijection.Injection import kafka.api.OffsetRequest import org.apache.gearpump.Time.MilliSeconds import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.gearpump.streaming.transaction.api.{CheckpointStore, CheckpointStoreFactory} import org.apache.gearpump.util.LogUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.ByteArraySerializer class KafkaStore private[kafka]( val topic: String, val producer: KafkaProducer[Array[Byte], Array[Byte]], val optConsumer: Option[KafkaConsumer]) extends CheckpointStore { import org.apache.gearpump.streaming.kafka.lib.store.KafkaStore._ private var maxTime: MilliSeconds = 0L override def persist(time: MilliSeconds, checkpoint: Array[Byte]): Unit = { // make sure checkpointed timestamp is monotonically increasing // hence (1, 1), (3, 2), (2, 3) is checkpointed as (1, 1), (3, 2), (3, 3) if (time > maxTime) { maxTime = time } val key = maxTime val value = checkpoint val message = new ProducerRecord[Array[Byte], Array[Byte]]( topic, 0, Injection[Long, Array[Byte]](key), value) producer.send(message) LOG.debug("KafkaStore persisted state ({}, {})", key, value) } override def recover(time: MilliSeconds): Option[Array[Byte]] = { var checkpoint: Option[Array[Byte]] = None optConsumer.foreach { consumer => while (consumer.hasNext && checkpoint.isEmpty) { val kafkaMsg = consumer.next() checkpoint = for { k <- kafkaMsg.key t <- Injection.invert[MilliSeconds, Array[Byte]](k).toOption c = kafkaMsg.msg if t >= time } yield c } consumer.close() } checkpoint match { case Some(c) => LOG.info(s"KafkaStore recovered checkpoint ($time, $c)") case None => LOG.info(s"no checkpoint existing for $time") } checkpoint } override def close(): Unit = { producer.close() LOG.info("KafkaStore closed") } }
Example 128
Source File: KafkaSinkSpec.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka import java.util.Properties import com.twitter.bijection.Injection import org.apache.gearpump.streaming.kafka.lib.sink.AbstractKafkaSink.KafkaProducerFactory import org.apache.gearpump.streaming.kafka.util.KafkaConfig import org.apache.gearpump.streaming.kafka.util.KafkaConfig.KafkaConfigFactory import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.mockito.Mockito._ import org.scalacheck.Gen import org.scalatest.mock.MockitoSugar import org.scalatest.prop.PropertyChecks import org.scalatest.{Matchers, PropSpec} import org.apache.gearpump.Message import org.apache.gearpump.streaming.MockUtil class KafkaSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar { val dataGen = for { topic <- Gen.alphaStr key <- Gen.alphaStr msg <- Gen.alphaStr } yield (topic, Injection[String, Array[Byte]](key), Injection[String, Array[Byte]](msg)) property("KafkaSink write should send producer record") { forAll(dataGen) { (data: (String, Array[Byte], Array[Byte])) => val props = mock[Properties] val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]] val producerFactory = mock[KafkaProducerFactory] val configFactory = mock[KafkaConfigFactory] val config = mock[KafkaConfig] when(configFactory.getKafkaConfig(props)).thenReturn(config) when(producerFactory.getKafkaProducer(config)).thenReturn(producer) val (topic, key, msg) = data val kafkaSink = new KafkaSink(topic, props, configFactory, producerFactory) kafkaSink.write(Message((key, msg))) verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]]( r => r.topic == topic && (r.key sameElements key) && (r.value sameElements msg))) kafkaSink.write(Message(msg)) verify(producer).send(MockUtil.argMatch[ProducerRecord[Array[Byte], Array[Byte]]]( r => r.topic() == topic && (r.key == null) && (r.value() sameElements msg) )) kafkaSink.close() } } property("KafkaSink close should close kafka producer") { val props = mock[Properties] val producer = mock[KafkaProducer[Array[Byte], Array[Byte]]] val producerFactory = mock[KafkaProducerFactory] val configFactory = mock[KafkaConfigFactory] val config = mock[KafkaConfig] when(configFactory.getKafkaConfig(props)).thenReturn(config) when(producerFactory.getKafkaProducer(config)).thenReturn(producer) val kafkaSink = new KafkaSink("topic", props, configFactory, producerFactory) kafkaSink.close() verify(producer).close() } }
Example 129
Source File: KafkaServerHarness.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.kafka.util import java.util.Properties import kafka.admin.AdminUtils import kafka.common.KafkaException import kafka.server.{KafkaConfig => KafkaServerConfig, KafkaServer} import kafka.utils.{TestUtils, Utils} trait KafkaServerHarness extends ZookeeperHarness { val configs: List[KafkaServerConfig] private var servers: List[KafkaServer] = null private var brokerList: String = null def getServers: List[KafkaServer] = servers def getBrokerList: String = brokerList override def setUp() { super.setUp() if (configs.size <= 0) { throw new KafkaException("Must supply at least one server config.") } brokerList = TestUtils.getBrokerListStrFromConfigs(configs) servers = configs.map(TestUtils.createServer(_)) } override def tearDown() { servers.foreach(_.shutdown()) servers.foreach(_.config.logDirs.foreach(Utils.rm)) super.tearDown() } def createTopicUntilLeaderIsElected(topic: String, partitions: Int, replicas: Int, timeout: Long = 10000): Map[Int, Option[Int]] = { val zkClient = getZkClient try { // Creates topic AdminUtils.createTopic(zkClient, topic, partitions, replicas, new Properties) // Waits until the update metadata request for new topic reaches all servers (0 until partitions).map { case i => TestUtils.waitUntilMetadataIsPropagated(servers, topic, i, timeout) i -> TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, i, timeout) }.toMap } catch { case e: Exception => throw e } } }
Example 130
Source File: HadoopRunner.scala From ETL-Starter-Kit with MIT License | 5 votes |
package com.etl.utils import java.util.Properties import com.twitter.scalding.{RichXHandler, Tool} import org.apache.hadoop class HadoopRunner(job: String, prop: Properties) { def run() { val args = prop.get("main.args").asInstanceOf[String] require(args != null, "unable to get main.args") try { hadoop.util.ToolRunner.run(new hadoop.conf.Configuration, new Tool, args.split(" ")) } catch { case t: Throwable => { //create the exception URL link in GitHub wiki val gitHubLink = RichXHandler.createXUrl(t) val extraInfo = (if (RichXHandler().handlers.exists(h => h(t))) { RichXHandler.mapping(t.getClass) + "\n" } else { "" }) + "If you know what exactly caused this error, please consider contributing to GitHub via following link.\n" + gitHubLink //re-throw the exception with extra info throw new Throwable(extraInfo, t) } } } }
Example 131
Source File: LogAnalyticsSinkConfiguration.scala From spark-monitoring with MIT License | 5 votes |
package org.apache.spark.metrics.sink.loganalytics import java.util.Properties import java.util.concurrent.TimeUnit import com.microsoft.pnp.LogAnalyticsEnvironment import org.apache.spark.LogAnalyticsConfiguration private[spark] object LogAnalyticsSinkConfiguration { private[spark] val LOGANALYTICS_KEY_WORKSPACEID = "workspaceId" private[spark] val LOGANALYTICS_KEY_SECRET = "secret" private[spark] val LOGANALYTICS_KEY_LOGTYPE = "logType" private[spark] val LOGANALYTICS_KEY_TIMESTAMPFIELD = "timestampField" private[spark] val LOGANALYTICS_KEY_PERIOD = "period" private[spark] val LOGANALYTICS_KEY_UNIT = "unit" private[spark] val LOGANALYTICS_DEFAULT_LOGTYPE = "SparkMetrics" private[spark] val LOGANALYTICS_DEFAULT_PERIOD = "10" private[spark] val LOGANALYTICS_DEFAULT_UNIT = "SECONDS" } private[spark] class LogAnalyticsSinkConfiguration(properties: Properties) extends LogAnalyticsConfiguration { import LogAnalyticsSinkConfiguration._ override def getWorkspaceId: Option[String] = { Option(properties.getProperty(LOGANALYTICS_KEY_WORKSPACEID, LogAnalyticsEnvironment.getWorkspaceId)) } override def getSecret: Option[String] = { Option(properties.getProperty(LOGANALYTICS_KEY_SECRET, LogAnalyticsEnvironment.getWorkspaceKey)) } override protected def getLogType: String = properties.getProperty(LOGANALYTICS_KEY_LOGTYPE, LOGANALYTICS_DEFAULT_LOGTYPE) override protected def getTimestampFieldName: Option[String] = Option(properties.getProperty(LOGANALYTICS_KEY_TIMESTAMPFIELD, null)) val pollPeriod: Int = { val value = properties.getProperty(LOGANALYTICS_KEY_PERIOD, LOGANALYTICS_DEFAULT_PERIOD).toInt logInfo(s"Setting polling period to $value") value } val pollUnit: TimeUnit = { val value = TimeUnit.valueOf( properties.getProperty(LOGANALYTICS_KEY_UNIT, LOGANALYTICS_DEFAULT_UNIT).toUpperCase) logInfo(s"Setting polling unit to $value") value } }
Example 132
Source File: LogAnalyticsMetricsSink.scala From spark-monitoring with MIT License | 5 votes |
package org.apache.spark.metrics.sink.loganalytics import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import org.apache.spark.internal.Logging import org.apache.spark.metrics.sink.Sink import org.apache.spark.{SecurityManager, SparkException} private class LogAnalyticsMetricsSink( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink with Logging { private val config = new LogAnalyticsSinkConfiguration(property) org.apache.spark.metrics.MetricsSystem.checkMinimalPollingPeriod(config.pollUnit, config.pollPeriod) var reporter = LogAnalyticsReporter.forRegistry(registry) .withWorkspaceId(config.workspaceId) .withWorkspaceKey(config.secret) .withLogType(config.logType) .build() override def start(): Unit = { reporter.start(config.pollPeriod, config.pollUnit) logInfo(s"LogAnalyticsMetricsSink started") } override def stop(): Unit = { reporter.stop() logInfo("LogAnalyticsMetricsSink stopped.") } override def report(): Unit = { reporter.report() } }
Example 133
Source File: MiniClusterUtils.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.test.framework import java.io._ import java.nio.charset.StandardCharsets.UTF_8 import java.util.Properties import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration trait MiniClusterUtils { protected def saveProperties(props: Map[String, String], dest: File): Unit = { val jprops = new Properties() props.foreach { case (k, v) => jprops.put(k, v) } val tempFile = new File(dest.getAbsolutePath() + ".tmp") val out = new OutputStreamWriter(new FileOutputStream(tempFile), UTF_8) try { jprops.store(out, "Configuration") } finally { out.close() } tempFile.renameTo(dest) } protected def loadProperties(file: File): Map[String, String] = { val in = new InputStreamReader(new FileInputStream(file), UTF_8) val props = new Properties() try { props.load(in) } finally { in.close() } props.asScala.toMap } protected def saveConfig(conf: Configuration, dest: File): Unit = { val redacted = new Configuration(conf) // This setting references a test class that is not available when using a real Spark // installation, so remove it from client configs. redacted.unset("net.topology.node.switch.mapping.impl") val out = new FileOutputStream(dest) try { redacted.writeXml(out) } finally { out.close() } } }
Example 134
Source File: package.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache import java.util.Properties import scala.util.control.NonFatal package object livy { private object LivyBuildInfo { val ( livyVersion: String, livyBuildUser: String, livyRevision: String, livyBranch: String, livyBuildDate: String, livyRepo: String ) = { val unknown = "<unknown>" val defaultValue = (unknown, unknown, unknown, unknown, unknown, unknown) val resource = Option(Thread.currentThread().getContextClassLoader .getResourceAsStream("livy-version-info.properties")) try { resource.map { r => val properties = new Properties() properties.load(r) ( properties.getProperty("version", unknown), properties.getProperty("user", unknown), properties.getProperty("revision", unknown), properties.getProperty("branch", unknown), properties.getProperty("date", unknown), properties.getProperty("url", unknown) ) }.getOrElse(defaultValue) } catch { case NonFatal(e) => // swallow the exception defaultValue } finally { try { resource.foreach(_.close()) } catch { case NonFatal(e) => // swallow the exception in closing the stream } } } } val LIVY_VERSION = LivyBuildInfo.livyVersion val LIVY_BUILD_USER = LivyBuildInfo.livyBuildUser val LIVY_REVISION = LivyBuildInfo.livyRevision val LIVY_BRANCH = LivyBuildInfo.livyBranch val LIVY_BUILD_DATE = LivyBuildInfo.livyBuildDate val LIVY_REPO_URL = LivyBuildInfo.livyRepo }
Example 135
Source File: SessionSpec.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.repl import java.util.Properties import java.util.concurrent.{ConcurrentLinkedQueue, CountDownLatch, TimeUnit} import org.apache.spark.SparkConf import org.scalatest.{BeforeAndAfter, FunSpec} import org.scalatest.Matchers._ import org.scalatest.concurrent.Eventually import org.scalatest.time._ import org.apache.livy.LivyBaseUnitTestSuite import org.apache.livy.repl.Interpreter.ExecuteResponse import org.apache.livy.rsc.RSCConf import org.apache.livy.sessions._ class SessionSpec extends FunSpec with Eventually with LivyBaseUnitTestSuite with BeforeAndAfter { override implicit val patienceConfig = PatienceConfig(timeout = scaled(Span(30, Seconds)), interval = scaled(Span(100, Millis))) private val rscConf = new RSCConf(new Properties()).set(RSCConf.Entry.SESSION_KIND, "spark") describe("Session") { var session: Session = null after { if (session != null) { session.close() session = null } } it("should call state changed callbacks in happy path") { val expectedStateTransitions = Array("not_started", "starting", "idle", "busy", "idle", "busy", "idle") val actualStateTransitions = new ConcurrentLinkedQueue[String]() session = new Session(rscConf, new SparkConf(), None, { s => actualStateTransitions.add(s.toString) }) session.start() session.execute("") eventually { actualStateTransitions.toArray shouldBe expectedStateTransitions } } it("should not transit to idle if there're any pending statements.") { val expectedStateTransitions = Array("not_started", "starting", "idle", "busy", "busy", "busy", "idle", "busy", "idle") val actualStateTransitions = new ConcurrentLinkedQueue[String]() val blockFirstExecuteCall = new CountDownLatch(1) val interpreter = new SparkInterpreter(new SparkConf()) { override def execute(code: String): ExecuteResponse = { blockFirstExecuteCall.await(10, TimeUnit.SECONDS) super.execute(code) } } session = new Session(rscConf, new SparkConf(), Some(interpreter), { s => actualStateTransitions.add(s.toString) }) session.start() for (_ <- 1 to 2) { session.execute("") } blockFirstExecuteCall.countDown() eventually { actualStateTransitions.toArray shouldBe expectedStateTransitions } } it("should remove old statements when reaching threshold") { rscConf.set(RSCConf.Entry.RETAINED_STATEMENTS, 2) session = new Session(rscConf, new SparkConf()) session.start() session.statements.size should be (0) session.execute("") session.statements.size should be (1) session.statements.map(_._1).toSet should be (Set(0)) session.execute("") session.statements.size should be (2) session.statements.map(_._1).toSet should be (Set(0, 1)) session.execute("") eventually { session.statements.size should be (2) session.statements.map(_._1).toSet should be (Set(1, 2)) } // Continue submitting statements, total statements in memory should be 2. session.execute("") eventually { session.statements.size should be (2) session.statements.map(_._1).toSet should be (Set(2, 3)) } } } }
Example 136
Source File: BaseSessionSpec.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.repl import java.util.Properties import java.util.concurrent.atomic.AtomicInteger import scala.concurrent.Await import scala.concurrent.duration._ import scala.language.postfixOps import org.apache.spark.SparkConf import org.json4s._ import org.scalatest.{FlatSpec, Matchers} import org.scalatest.concurrent.Eventually._ import org.apache.livy.LivyBaseUnitTestSuite import org.apache.livy.rsc.RSCConf import org.apache.livy.rsc.driver.{Statement, StatementState} import org.apache.livy.sessions._ abstract class BaseSessionSpec(kind: Kind) extends FlatSpec with Matchers with LivyBaseUnitTestSuite { implicit val formats = DefaultFormats private val rscConf = new RSCConf(new Properties()).set(RSCConf.Entry.SESSION_KIND, kind.toString) private val sparkConf = new SparkConf() protected def execute(session: Session)(code: String): Statement = { val id = session.execute(code) eventually(timeout(30 seconds), interval(100 millis)) { val s = session.statements(id) s.state.get() shouldBe StatementState.Available s } } protected def withSession(testCode: Session => Any): Unit = { val stateChangedCalled = new AtomicInteger() val session = new Session(rscConf, sparkConf, None, { _ => stateChangedCalled.incrementAndGet() }) try { // Session's constructor should fire an initial state change event. stateChangedCalled.intValue() shouldBe 1 Await.ready(session.start(), 30 seconds) assert(session.state === SessionState.Idle) // There should be at least 1 state change event fired when session transits to idle. stateChangedCalled.intValue() should (be > 1) testCode(session) } finally { session.close() } } it should "start in the starting or idle state" in { val session = new Session(rscConf, sparkConf) val future = session.start() try { Await.ready(future, 60 seconds) session.state should (equal (SessionState.Starting) or equal (SessionState.Idle)) } finally { session.close() } } it should "eventually become the idle state" in withSession { session => session.state should equal (SessionState.Idle) } }
Example 137
Source File: Utils.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy import java.io.{Closeable, File, InputStreamReader} import java.net.URL import java.nio.charset.StandardCharsets.UTF_8 import java.security.SecureRandom import java.util.Properties import scala.annotation.tailrec import scala.collection.JavaConverters._ import scala.concurrent.TimeoutException import scala.concurrent.duration.Duration import org.apache.commons.codec.binary.Base64 object Utils { def getPropertiesFromFile(file: File): Map[String, String] = { loadProperties(file.toURI().toURL()) } def loadProperties(url: URL): Map[String, String] = { val inReader = new InputStreamReader(url.openStream(), UTF_8) try { val properties = new Properties() properties.load(inReader) properties.stringPropertyNames().asScala.map { k => (k, properties.getProperty(k).trim()) }.toMap } finally { inReader.close() } } def isProcessAlive(process: Process): Boolean = { try { process.exitValue() false } catch { case _: IllegalThreadStateException => true } } def startDaemonThread(name: String)(f: => Unit): Thread = { val thread = new Thread(name) { override def run(): Unit = f } thread.setDaemon(true) thread.start() thread } def usingResource[A <: Closeable, B](resource: A)(f: A => B): B = { try { f(resource) } finally { resource.close() } } def createSecret(secretBitLength: Int): String = { val rnd = new SecureRandom() val secretBytes = new Array[Byte](secretBitLength / java.lang.Byte.SIZE) rnd.nextBytes(secretBytes) Base64.encodeBase64String(secretBytes) } }
Example 138
Source File: stringMap.scala From case-classy with Apache License 2.0 | 5 votes |
package classy import java.io.InputStream //#=jvm import java.util.Properties package object stringMap { type StringMap = scala.collection.Map[String, String] type JavaStringMap = java.util.Map[String, String] type StringMapDecoder[A] = Decoder[StringMap, A] object StringMapDecoder { def apply[A](implicit ev: StringMapDecoder[A]): StringMapDecoder[A] = ev def instance[A](f: StringMap => Either[DecodeError, A]): StringMapDecoder[A] = Decoder.instance(f) } implicit val stringMapReadString: Read[StringMap, String] = Read.instance(decoders.stringMapToString) implicit val stringMapReadNested: Read[StringMap, StringMap] = Read.instance(decoders.stringMapToStringMap) implicit val stringMapReadListString: Read[StringMap, List[String]] = Read.instance(decoders.stringMapToListString) implicit val stringMapReadListStringMap: Read[StringMap, List[StringMap]] = Read.instance(decoders.stringMapToListStringMap) val readStringMap: Read.From[StringMap] = Read.from[StringMap] implicit class StringMapDecoderOps[A]( private val decoder: Decoder[StringMap, A] ) extends AnyVal { import scala.collection.convert.{ Wrappers => wrap } def fromProperties: Decoder[Properties, A] = decoder.mapInput(properties => wrap.JPropertiesWrapper(properties)) } //#+jvm implicit class PropertiesDecoderOps[A]( private val decoder: Decoder[Properties, A] ) extends AnyVal { def fromInputStream: Decoder[InputStream, A] = decoder <<< decoders.inputStreamToProperties } //#-jvm }
Example 139
Source File: KafkaTestUtilsTest.scala From spark-testing-base with Apache License 2.0 | 5 votes |
package com.holdenkarau.spark.testing.kafka import java.util.Properties import scala.collection.JavaConversions._ import kafka.consumer.ConsumerConfig import org.apache.spark.streaming.kafka.KafkaTestUtils import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{BeforeAndAfterAll, FunSuite} @RunWith(classOf[JUnitRunner]) class KafkaTestUtilsTest extends FunSuite with BeforeAndAfterAll { private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll(): Unit = { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() } override def afterAll(): Unit = if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } test("Kafka send and receive message") { val topic = "test-topic" val message = "HelloWorld!" kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, message.getBytes) val consumerProps = new Properties() consumerProps.put("zookeeper.connect", kafkaTestUtils.zkAddress) consumerProps.put("group.id", "test-group") consumerProps.put("flow-topic", topic) consumerProps.put("auto.offset.reset", "smallest") consumerProps.put("zookeeper.session.timeout.ms", "2000") consumerProps.put("zookeeper.connection.timeout.ms", "6000") consumerProps.put("zookeeper.sync.time.ms", "2000") consumerProps.put("auto.commit.interval.ms", "2000") val consumer = kafka.consumer.Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerProps)) try { val topicCountMap = Map(topic -> new Integer(1)) val consumerMap = consumer.createMessageStreams(topicCountMap) val stream = consumerMap.get(topic).get(0) val it = stream.iterator() val mess = it.next assert(new String(mess.message().map(_.toChar)) === message) } finally { consumer.shutdown() } } }
Example 140
Source File: LagomDevModePropertiesLoader.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.devmode.internal.util import java.io.File import java.io.FileInputStream import java.util.Properties object PropertiesLoader { def from(file: String): Properties = { val properties = new Properties() // First check if the file is on the classpath val is = { getClass.getResourceAsStream(file) match { case null => // Try and load it as a file val f = new File(file) if (f.isFile) { new FileInputStream(f) } else { throw new IllegalArgumentException(s"File $file not found as classpath resource or on the filesystem") } case found => found } } try { properties.load(is) properties } finally { is.close() } } }
Example 141
Source File: GangliaSink.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.ganglia.GangliaReporter import info.ganglia.gmetric4j.gmetric.GMetric import info.ganglia.gmetric4j.gmetric.GMetric.UDPAddressingMode import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem class GangliaSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GANGLIA_KEY_PERIOD = "period" val GANGLIA_DEFAULT_PERIOD = 10 val GANGLIA_KEY_UNIT = "unit" val GANGLIA_DEFAULT_UNIT: TimeUnit = TimeUnit.SECONDS val GANGLIA_KEY_MODE = "mode" val GANGLIA_DEFAULT_MODE: UDPAddressingMode = GMetric.UDPAddressingMode.MULTICAST // TTL for multicast messages. If listeners are X hops away in network, must be at least X. val GANGLIA_KEY_TTL = "ttl" val GANGLIA_DEFAULT_TTL = 1 val GANGLIA_KEY_HOST = "host" val GANGLIA_KEY_PORT = "port" val GANGLIA_KEY_DMAX = "dmax" val GANGLIA_DEFAULT_DMAX = 0 def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GANGLIA_KEY_HOST).isDefined) { throw new Exception("Ganglia sink requires 'host' property.") } if (!propertyToOption(GANGLIA_KEY_PORT).isDefined) { throw new Exception("Ganglia sink requires 'port' property.") } val host = propertyToOption(GANGLIA_KEY_HOST).get val port = propertyToOption(GANGLIA_KEY_PORT).get.toInt val ttl = propertyToOption(GANGLIA_KEY_TTL).map(_.toInt).getOrElse(GANGLIA_DEFAULT_TTL) val dmax = propertyToOption(GANGLIA_KEY_DMAX).map(_.toInt).getOrElse(GANGLIA_DEFAULT_DMAX) val mode: UDPAddressingMode = propertyToOption(GANGLIA_KEY_MODE) .map(u => GMetric.UDPAddressingMode.valueOf(u.toUpperCase)).getOrElse(GANGLIA_DEFAULT_MODE) val pollPeriod = propertyToOption(GANGLIA_KEY_PERIOD).map(_.toInt) .getOrElse(GANGLIA_DEFAULT_PERIOD) val pollUnit: TimeUnit = propertyToOption(GANGLIA_KEY_UNIT) .map(u => TimeUnit.valueOf(u.toUpperCase)) .getOrElse(GANGLIA_DEFAULT_UNIT) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val ganglia = new GMetric(host, port, mode, ttl) val reporter: GangliaReporter = GangliaReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .withDMax(dmax) .build(ganglia) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 142
Source File: SQLExecutionSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import java.util.Properties import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.sql.SparkSession class SQLExecutionSuite extends SparkFunSuite { test("concurrent query execution (SPARK-10548)") { // Try to reproduce the issue with the old SparkContext val conf = new SparkConf() .setMaster("local[*]") .setAppName("test") val badSparkContext = new BadSparkContext(conf) try { testConcurrentQueryExecution(badSparkContext) fail("unable to reproduce SPARK-10548") } catch { case e: IllegalArgumentException => assert(e.getMessage.contains(SQLExecution.EXECUTION_ID_KEY)) } finally { badSparkContext.stop() } // Verify that the issue is fixed with the latest SparkContext val goodSparkContext = new SparkContext(conf) try { testConcurrentQueryExecution(goodSparkContext) } finally { goodSparkContext.stop() } } test("concurrent query execution with fork-join pool (SPARK-13747)") { val spark = SparkSession.builder .master("local[*]") .appName("test") .getOrCreate() import spark.implicits._ try { // Should not throw IllegalArgumentException (1 to 100).par.foreach { _ => spark.sparkContext.parallelize(1 to 5).map { i => (i, i) }.toDF("a", "b").count() } } finally { spark.sparkContext.stop() } } private class BadSparkContext(conf: SparkConf) extends SparkContext(conf) { protected[spark] override val localProperties = new InheritableThreadLocal[Properties] { override protected def childValue(parent: Properties): Properties = new Properties(parent) override protected def initialValue(): Properties = new Properties() } }
Example 143
Source File: RowDataSourceStrategySuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.sql.DriverManager import java.util.Properties import org.scalatest.BeforeAndAfter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.sources._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ import org.apache.spark.util.Utils class RowDataSourceStrategySuite extends SparkFunSuite with BeforeAndAfter with SharedSQLContext { import testImplicits._ val url = "jdbc:h2:mem:testdb0" val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass" var conn: java.sql.Connection = null before { Utils.classForName("org.h2.Driver") // Extra properties that will be specified for our database. We need these to test // usage of parameters from OPTIONS clause in queries. val properties = new Properties() properties.setProperty("user", "testUser") properties.setProperty("password", "testPass") properties.setProperty("rowId", "false") conn = DriverManager.getConnection(url, properties) conn.prepareStatement("create schema test").executeUpdate() conn.prepareStatement("create table test.inttypes (a INT, b INT, c INT)").executeUpdate() conn.prepareStatement("insert into test.inttypes values (1, 2, 3)").executeUpdate() conn.commit() sql( s""" |CREATE TEMPORARY TABLE inttypes |USING org.apache.spark.sql.jdbc |OPTIONS (url '$url', dbtable 'TEST.INTTYPES', user 'testUser', password 'testPass') """.stripMargin.replaceAll("\n", " ")) } after { conn.close() } test("SPARK-17673: Exchange reuse respects differences in output schema") { val df = sql("SELECT * FROM inttypes") val df1 = df.groupBy("a").agg("b" -> "min") val df2 = df.groupBy("a").agg("c" -> "min") val res = df1.union(df2) assert(res.distinct().count() == 2) // would be 1 if the exchange was incorrectly reused } }
Example 144
Source File: CryptoStreamUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.security import java.io.{InputStream, OutputStream} import java.util.Properties import javax.crypto.KeyGenerator import javax.crypto.spec.{IvParameterSpec, SecretKeySpec} import org.apache.commons.crypto.random._ import org.apache.commons.crypto.stream._ import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ private[this] def createInitializationVector(properties: Properties): Array[Byte] = { val iv = new Array[Byte](IV_LENGTH_IN_BYTES) val initialIVStart = System.currentTimeMillis() CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv) val initialIVFinish = System.currentTimeMillis() val initialIVTime = initialIVFinish - initialIVStart if (initialIVTime > 2000) { logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " + s"used by CryptoStream") } iv } }
Example 145
Source File: CsvSink.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.io.File import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.{CsvReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CSV_KEY_PERIOD = "period" val CSV_KEY_UNIT = "unit" val CSV_KEY_DIR = "directory" val CSV_DEFAULT_PERIOD = 10 val CSV_DEFAULT_UNIT = "SECONDS" val CSV_DEFAULT_DIR = "/tmp/" val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CSV_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match { case Some(s) => s case None => CSV_DEFAULT_DIR } val reporter: CsvReporter = CsvReporter.forRegistry(registry) .formatFor(Locale.US) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(new File(pollDir)) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 146
Source File: HDFSSink.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import org.apache.spark.{HDFSReporter, SecurityManager} import org.apache.spark.metrics.MetricsSystem private[spark] class HDFSSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val HDFS_KEY_PERIOD = "period" val HDFS_KEY_UNIT = "unit" val HDFS_KEY_DIR = "dir" val HDFS_DEFAULT_PERIOD = 10 val HDFS_DEFAULT_UNIT = "SECONDS" val HDFS_DEFAULT_DIR = "hdfs://localhost:9000/custom-metrics/" val pollPeriod = Option(property.getProperty(HDFS_KEY_PERIOD)) match { case Some(s) => s.toInt case None => HDFS_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(HDFS_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(HDFS_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val pollDir = Option(property.getProperty(HDFS_KEY_DIR)) match { case Some(s) => s case None => HDFS_DEFAULT_DIR } val reporter: HDFSReporter = HDFSReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(pollDir) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 147
Source File: MetricsServlet.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import javax.servlet.http.HttpServletRequest import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.ui.JettyUtils._ private[spark] class MetricsServlet( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SERVLET_KEY_PATH = "path" val SERVLET_KEY_SAMPLE = "sample" val SERVLET_DEFAULT_SAMPLE = false val servletPath = property.getProperty(SERVLET_KEY_PATH) val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean) .getOrElse(SERVLET_DEFAULT_SAMPLE) val mapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample)) def getHandlers(conf: SparkConf): Array[ServletContextHandler] = { Array[ServletContextHandler]( createServletHandler(servletPath, new ServletParams(request => getMetricsSnapshot(request), "text/json"), securityMgr, conf) ) } def getMetricsSnapshot(request: HttpServletRequest): String = { mapper.writeValueAsString(registry) } override def start() { } override def stop() { } override def report() { } }
Example 148
Source File: MQTTSink.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import org.apache.spark.{MQTTReporter, SecurityManager} import org.apache.spark.metrics.MetricsSystem private[spark] class MQTTSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val MQTT_KEY_PERIOD = "pollPeriod" val MQTT_KEY_UNIT = "unit" val MQTT_KEY_HOST = "host" val MQTT_KEY_PORT = "port" val MQTT_DEFAULT_PERIOD = 10 val MQTT_DEFAULT_UNIT = "SECONDS" val MQTT_DEFAULT_HOST = "localhost" val MQTT_DEFAULT_PORT = 1883 val pollPeriod = Option(property.getProperty(MQTT_KEY_PERIOD)) match { case Some(s) => s.toInt case None => MQTT_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(MQTT_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(MQTT_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val masterHost = Option(property.getProperty(MQTT_KEY_HOST)) match { case Some(s) => s case None => MQTT_DEFAULT_HOST } val masterPort = Option(property.getProperty(MQTT_KEY_PORT)) match { case Some(s) => s.toInt case None => MQTT_DEFAULT_PORT } val reporter: MQTTReporter = MQTTReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(masterHost, masterPort) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 149
Source File: Slf4jSink.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.{MetricRegistry, Slf4jReporter} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class Slf4jSink( val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val SLF4J_DEFAULT_PERIOD = 10 val SLF4J_DEFAULT_UNIT = "SECONDS" val SLF4J_KEY_PERIOD = "period" val SLF4J_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(SLF4J_KEY_PERIOD)) match { case Some(s) => s.toInt case None => SLF4J_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(SLF4J_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(SLF4J_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: Slf4jReporter = Slf4jReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 150
Source File: ConsoleSink.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.{ConsoleReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CONSOLE_DEFAULT_PERIOD = 10 val CONSOLE_DEFAULT_UNIT = "SECONDS" val CONSOLE_KEY_PERIOD = "period" val CONSOLE_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CONSOLE_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 151
Source File: GraphiteSink.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.net.InetSocketAddress import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.graphite.{Graphite, GraphiteReporter, GraphiteUDP} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GRAPHITE_DEFAULT_PERIOD = 10 val GRAPHITE_DEFAULT_UNIT = "SECONDS" val GRAPHITE_DEFAULT_PREFIX = "" val GRAPHITE_KEY_HOST = "host" val GRAPHITE_KEY_PORT = "port" val GRAPHITE_KEY_PERIOD = "period" val GRAPHITE_KEY_UNIT = "unit" val GRAPHITE_KEY_PREFIX = "prefix" val GRAPHITE_KEY_PROTOCOL = "protocol" def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) { throw new Exception("Graphite sink requires 'host' property.") } if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) { throw new Exception("Graphite sink requires 'port' property.") } val host = propertyToOption(GRAPHITE_KEY_HOST).get val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match { case Some(s) => s.toInt case None => GRAPHITE_DEFAULT_PERIOD } val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT) } val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match { case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port)) case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port)) case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p") } val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .prefixedWith(prefix) .build(graphite) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 152
Source File: ResultTask.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.lang.management.ManagementFactory import java.nio.ByteBuffer import java.util.Properties import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.executor.TaskMetrics import org.apache.spark.rdd.RDD private[spark] class ResultTask[T, U]( stageId: Int, stageAttemptId: Int, taskBinary: Broadcast[Array[Byte]], partition: Partition, locs: Seq[TaskLocation], val outputId: Int, localProperties: Properties, metrics: TaskMetrics, jobId: Option[Int] = None, appId: Option[String] = None, appAttemptId: Option[String] = None) extends Task[U](stageId, stageAttemptId, partition.index, metrics, localProperties, jobId, appId, appAttemptId) with Serializable { @transient private[this] val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def runTask(context: TaskContext): U = { // Deserialize the RDD and the func using the broadcast variables. val threadMXBean = ManagementFactory.getThreadMXBean val deserializeStartTime = System.currentTimeMillis() val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime } else 0L val ser = SparkEnv.get.closureSerializer.newInstance() val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime } else 0L func(context, rdd.iterator(partition, context)) } // This is only callable on the driver side. override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ResultTask(" + stageId + ", " + partitionId + ")" }
Example 153
Source File: package.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache import java.util.Properties package object spark { private object SparkBuildInfo { val ( spark_version: String, spark_branch: String, spark_revision: String, spark_build_user: String, spark_repo_url: String, spark_build_date: String) = { val resourceStream = Thread.currentThread().getContextClassLoader. getResourceAsStream("spark-version-info.properties") try { val unknownProp = "<unknown>" val props = new Properties() props.load(resourceStream) ( props.getProperty("version", unknownProp), props.getProperty("branch", unknownProp), props.getProperty("revision", unknownProp), props.getProperty("user", unknownProp), props.getProperty("url", unknownProp), props.getProperty("date", unknownProp) ) } catch { case npe: NullPointerException => throw new SparkException("Error while locating file spark-version-info.properties", npe) case e: Exception => throw new SparkException("Error loading properties from spark-version-info.properties", e) } finally { if (resourceStream != null) { try { resourceStream.close() } catch { case e: Exception => throw new SparkException("Error closing spark build info resource stream", e) } } } } } val SPARK_VERSION = SparkBuildInfo.spark_version val SPARK_BRANCH = SparkBuildInfo.spark_branch val SPARK_REVISION = SparkBuildInfo.spark_revision val SPARK_BUILD_USER = SparkBuildInfo.spark_build_user val SPARK_REPO_URL = SparkBuildInfo.spark_repo_url val SPARK_BUILD_DATE = SparkBuildInfo.spark_build_date }
Example 154
Source File: TaskContextImpl.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.Logging import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util._ private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, localProperties: Properties, @transient private val metricsSystem: MetricsSystem, // The default value is only used in tests. override val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = false override def isInterrupted(): Boolean = interrupted override def getLocalProperty(key: String): String = localProperties.getProperty(key) override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) private[spark] override def registerAccumulator(a: AccumulatorV2[_, _]): Unit = { taskMetrics.registerAccumulator(a) } }
Example 155
Source File: MemoryTestingUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.memory import java.util.Properties import org.apache.spark.{SparkEnv, TaskContext, TaskContextImpl} object MemoryTestingUtils { def fakeTaskContext(env: SparkEnv): TaskContext = { val taskMemoryManager = new TaskMemoryManager(env.memoryManager, 0) new TaskContextImpl( stageId = 0, partitionId = 0, taskAttemptId = 0, attemptNumber = 0, taskMemoryManager = taskMemoryManager, localProperties = new Properties, metricsSystem = env.metricsSystem) } }
Example 156
Source File: ResetSystemProperties.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.util.Properties import org.apache.commons.lang3.SerializationUtils import org.scalatest.{BeforeAndAfterEach, Suite} private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite => var oldProperties: Properties = null override def beforeEach(): Unit = { // we need SerializationUtils.clone instead of `new Properties(System.getProperties())` because // the later way of creating a copy does not copy the properties but it initializes a new // Properties object with the given properties as defaults. They are not recognized at all // by standard Scala wrapper over Java Properties then. oldProperties = SerializationUtils.clone(System.getProperties) super.beforeEach() } override def afterEach(): Unit = { try { super.afterEach() } finally { System.setProperties(oldProperties) oldProperties = null } } }
Example 157
Source File: WmUserApp.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.utils import java.util.Properties class WmUserApp(val args: Array[String], val resourcePropertiesName: String, val filePropertiesKey: String = WmUserApp.properties) { def getPropertiesBuilder: PropertiesBuilder = { val resourcePropertiesBuilder = PropertiesBuilder.fromResource(resourcePropertiesName) val commandPropertiesBuilder = { val propertiesBuilder = PropertiesBuilder() args.foreach { arg => val key = StringUtils.beforeFirst(arg, '=', all = false) val value = StringUtils.afterFirst(arg, '=', all = false) if (key.nonEmpty && value.nonEmpty) propertiesBuilder.put(key, value) } propertiesBuilder } val filePropertiesBuilder = { val fileNameOpt: Option[String] = commandPropertiesBuilder .getProperty(WmUserApp.app, filePropertiesKey) .orElse(resourcePropertiesBuilder.getProperty(WmUserApp.app, filePropertiesKey)) .orElse(None) fileNameOpt .map(PropertiesBuilder.fromFile) .getOrElse(PropertiesBuilder()) } PropertiesBuilder() .putAll(resourcePropertiesBuilder) .putAll(filePropertiesBuilder) .putAll(commandPropertiesBuilder) } val propertiesBuilder: PropertiesBuilder = getPropertiesBuilder val appProperties: Properties = propertiesBuilder.filter("app").get val kafkaProperties: Properties = propertiesBuilder.filter("kafka").get val interactive: Boolean = appProperties.getProperty("interactive").toBoolean } object WmUserApp { val topic = "topic" val app = "app" val properties = "app.properties" }
Example 158
Source File: KafkaConsumer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.wmconsumer import java.io.File import java.time.Duration import java.util.Collections import java.util.ConcurrentModificationException import java.util.Properties import org.apache.kafka.clients.consumer.{KafkaConsumer => ApacheKafkaConsumer} import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser import org.clulab.wm.wmexchanger.utils.FileUtils import org.clulab.wm.wmexchanger.utils.FileEditor import org.json4s._ import org.slf4j.Logger import org.slf4j.LoggerFactory class KafkaConsumer(properties: Properties, closeDuration: Int, topic: String, outputDir: String) { import KafkaConsumer._ implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats logger.info("Opening consumer...") protected val consumer: ApacheKafkaConsumer[String, String] = { val consumer = new ApacheKafkaConsumer[String, String](properties) consumer.subscribe(Collections.singletonList(topic)) consumer } def poll(duration: Int): Unit = { val records = consumer.poll(Duration.ofSeconds(duration)) logger.info(s"Polling ${records.count} records...") records.forEach { record => val key = record.key val value = record.value // Imply an extension on the file so that it can be replaced. val file = FileEditor(new File(key + ".")).setDir(outputDir).setExt("json").get logger.info("Consuming " + file.getName) FileUtils.printWriterFromFile(file).autoClose { printWriter => printWriter.print(value) } } } def close(): Unit = { logger.info("Closing consumer...") try { consumer.close(Duration.ofSeconds(closeDuration)) } catch { case _: ConcurrentModificationException => // KafkaConsumer is not safe for multi-threaded access } } } object KafkaConsumer { val logger: Logger = LoggerFactory.getLogger(this.getClass) }
Example 159
Source File: KafkaConsumerApp.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.wmconsumer import java.util.Properties import org.clulab.wm.wmexchanger.utils.PropertiesBuilder import org.clulab.wm.wmexchanger.utils.WmUserApp import org.clulab.wm.wmexchanger.utils.SafeThread import org.slf4j.Logger import org.slf4j.LoggerFactory class KafkaConsumerApp(args: Array[String]) extends WmUserApp(args, "/kafkaconsumer.properties") { val localKafkaProperties: Properties = { // This allows the login to be contained in a file external to the project. val loginProperty = appProperties.getProperty("login") val loginPropertiesBuilder = PropertiesBuilder.fromFile(loginProperty) PropertiesBuilder(kafkaProperties).putAll(loginPropertiesBuilder).get } val topic: String = appProperties.getProperty("topic") val outputDir: String = appProperties.getProperty("outputDir") val pollDuration: Int = appProperties.getProperty("poll.duration").toInt val waitDuration: Long = appProperties.getProperty("wait.duration").toLong val closeDuration: Int = appProperties.getProperty("close.duration").toInt val thread: SafeThread = new SafeThread(KafkaConsumerApp.logger) { override def runSafely(): Unit = { val consumer = new KafkaConsumer(localKafkaProperties, closeDuration, topic, outputDir) // autoClose isn't executed if the thread is shot down, so this hook is used instead. sys.ShutdownHookThread { consumer.close() } while (!isInterrupted) consumer.poll(pollDuration) } } if (interactive) thread.waitSafely(waitDuration) } object KafkaConsumerApp extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) new KafkaConsumerApp(args) }
Example 160
Source File: MetricsConfig.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file setDefaultProperties(properties) // If spark.metrics.conf is not set, try to get file in class path val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse { try { Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)) } catch { case e: Exception => logError("Error loading default configuration file", e) None } } isOpt.foreach { is => try { properties.load(is) } finally { is.close() } } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { import scala.collection.JavaConversions._ val defaultProperty = propertyCategories(DEFAULT_PREFIX) for { (inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX) (k, v) <- defaultProperty if (prop.getProperty(k) == null) } { prop.setProperty(k, v) } } } def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] import scala.collection.JavaConversions._ prop.foreach { kv => if (regex.findPrefixOf(kv._1).isDefined) { val regex(prefix, suffix) = kv._1 subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } }
Example 161
Source File: CsvSink.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.io.File import java.util.{Locale, Properties} import java.util.concurrent.TimeUnit import com.codahale.metrics.{CsvReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CSV_KEY_PERIOD = "period" val CSV_KEY_UNIT = "unit" val CSV_KEY_DIR = "directory" val CSV_DEFAULT_PERIOD = 10 val CSV_DEFAULT_UNIT = "SECONDS" val CSV_DEFAULT_DIR = "/tmp/" val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CSV_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CSV_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match { case Some(s) => s case None => CSV_DEFAULT_DIR } val reporter: CsvReporter = CsvReporter.forRegistry(registry) .formatFor(Locale.US) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build(new File(pollDir)) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 162
Source File: ConsoleSink.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.{ConsoleReporter, MetricRegistry} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val CONSOLE_DEFAULT_PERIOD = 10 val CONSOLE_DEFAULT_UNIT = "SECONDS" val CONSOLE_KEY_PERIOD = "period" val CONSOLE_KEY_UNIT = "unit" val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match { case Some(s) => s.toInt case None => CONSOLE_DEFAULT_PERIOD } val pollUnit: TimeUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT) } MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .build() override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 163
Source File: GraphiteSink.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.net.InetSocketAddress import java.util.Properties import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.codahale.metrics.graphite.{GraphiteUDP, Graphite, GraphiteReporter} import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends Sink { val GRAPHITE_DEFAULT_PERIOD = 10 val GRAPHITE_DEFAULT_UNIT = "SECONDS" val GRAPHITE_DEFAULT_PREFIX = "" val GRAPHITE_KEY_HOST = "host" val GRAPHITE_KEY_PORT = "port" val GRAPHITE_KEY_PERIOD = "period" val GRAPHITE_KEY_UNIT = "unit" val GRAPHITE_KEY_PREFIX = "prefix" val GRAPHITE_KEY_PROTOCOL = "protocol" def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop)) if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) { throw new Exception("Graphite sink requires 'host' property.") } if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) { throw new Exception("Graphite sink requires 'port' property.") } val host = propertyToOption(GRAPHITE_KEY_HOST).get val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match { case Some(s) => s.toInt case None => GRAPHITE_DEFAULT_PERIOD } val pollUnit: TimeUnit = propertyToOption(GRAPHITE_KEY_UNIT) match { case Some(s) => TimeUnit.valueOf(s.toUpperCase()) case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT) } val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX) MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod) val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match { case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port)) case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port)) case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p") } val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry) .convertDurationsTo(TimeUnit.MILLISECONDS) .convertRatesTo(TimeUnit.SECONDS) .prefixedWith(prefix) .build(graphite) override def start() { reporter.start(pollPeriod, pollUnit) } override def stop() { reporter.stop() } override def report() { reporter.report() } }
Example 164
Source File: ActiveJob.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.Properties import org.apache.spark.TaskContext import org.apache.spark.util.CallSite private[spark] class ActiveJob( val jobId: Int, val finalStage: Stage, val func: (TaskContext, Iterator[_]) => _, val partitions: Array[Int], val callSite: CallSite, val listener: JobListener, val properties: Properties) { val numPartitions = partitions.length val finished = Array.fill[Boolean](numPartitions)(false) var numFinished = 0 }
Example 165
Source File: ResetSystemProperties.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.util.Properties import org.apache.commons.lang3.SerializationUtils import org.scalatest.{BeforeAndAfterEach, Suite} private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite => var oldProperties: Properties = null override def beforeEach(): Unit = { // we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because // the later way of creating a copy does not copy the properties but it initializes a new // Properties object with the given properties as defaults. They are not recognized at all // by standard Scala wrapper over Java Properties then. oldProperties = SerializationUtils.clone(System.getProperties) super.beforeEach() } override def afterEach(): Unit = { try { super.afterEach() } finally { System.setProperties(oldProperties) oldProperties = null } } }
Example 166
Source File: SimpleConsumer.scala From embedded-kafka with Apache License 2.0 | 5 votes |
package com.tuplejump.embedded.kafka import java.util.Properties import java.util.concurrent.{CountDownLatch, Executors} import scala.util.Try import kafka.serializer.StringDecoder import kafka.consumer.{ Consumer, ConsumerConfig } class SimpleConsumer( val latch: CountDownLatch, consumerConfig: Map[String, String], topic: String, groupId: String, partitions: Int, numThreads: Int) { val connector = Consumer.create(createConsumerConfig) val streams = connector .createMessageStreams(Map(topic -> partitions), new StringDecoder(), new StringDecoder()) .get(topic) val executor = Executors.newFixedThreadPool(numThreads) for (stream <- streams) { executor.submit(new Runnable() { def run(): Unit = { for (s <- stream) { while (s.iterator.hasNext) { latch.countDown() } } } }) } private def createConsumerConfig: ConsumerConfig = { import scala.collection.JavaConverters._ val props = new Properties() props.putAll(consumerConfig.asJava) new ConsumerConfig(props) } def shutdown(): Unit = Try { connector.shutdown() executor.shutdown() } }
Example 167
Source File: TableLoader.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.util.Properties import scala.collection.{immutable, mutable} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.CarbonProperties // scalastyle:off object TableLoader { def extractOptions(propertiesFile: String): immutable.Map[String, String] = { val props = new Properties val path = new Path(propertiesFile) val fs = path.getFileSystem(FileFactory.getConfiguration) props.load(fs.open(path)) val elments = props.entrySet().iterator() val map = new mutable.HashMap[String, String]() System.out.println("properties file:") while (elments.hasNext) { val elment = elments.next() System.out.println(s"${elment.getKey}=${elment.getValue}") map.put(elment.getKey.asInstanceOf[String], elment.getValue.asInstanceOf[String]) } immutable.Map(map.toSeq: _*) } def extractStorePath(map: immutable.Map[String, String]): String = { map.get(CarbonCommonConstants.STORE_LOCATION) match { case Some(path) => path case None => throw new Exception(s"${CarbonCommonConstants.STORE_LOCATION} can't be empty") } } def loadTable(spark: SparkSession, dbName: Option[String], tableName: String, inputPaths: String, options: scala.collection.immutable.Map[String, String]): Unit = { CarbonLoadDataCommand(dbName, tableName, inputPaths, Nil, options, false).run(spark) } def main(args: Array[String]): Unit = { if (args.length < 3) { System.err.println("Usage: TableLoader <properties file> <table name> <input files>") System.exit(1) } System.out.println("parameter list:") args.foreach(System.out.println) val map = extractOptions(TableAPIUtil.escape(args(0))) val storePath = extractStorePath(map) System.out.println(s"${CarbonCommonConstants.STORE_LOCATION}:$storePath") val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1))) System.out.println(s"table name: $dbName.$tableName") val inputPaths = TableAPIUtil.escape(args(2)) val spark = TableAPIUtil.spark(storePath, s"TableLoader: $dbName.$tableName") loadTable(spark, Option(dbName), tableName, inputPaths, map) } }
Example 168
Source File: KfProducer.scala From Adenium with Apache License 2.0 | 5 votes |
package com.adenium.externals.kafka import java.util.Properties import java.util.concurrent.Future import com.adenium.utils.May._ import org.apache.kafka.clients.producer._ object KfProducer { def apply( broker: String, props: Option[Properties] = None): KfProducer = { val prop = props.getOrElse { val p = new Properties() p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, broker) p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") p.put(ProducerConfig.ACKS_CONFIG, "0") p.put(ProducerConfig.RETRIES_CONFIG, "3") p.put(ProducerConfig.LINGER_MS_CONFIG, "0") //props.put(ProducerConfig.BATCH_SIZE_CONFIG, "1") p } val prod = new KafkaProducer[String, String](prop) new KfProducer( broker, prod ) } }
Example 169
Source File: PrometheusSink.scala From spark-metrics with Apache License 2.0 | 5 votes |
package org.apache.spark.banzaicloud.metrics.sink import java.net.URL import java.util.Properties import com.banzaicloud.spark.metrics.sink.PrometheusSink.SinkConfig import com.codahale.metrics.MetricRegistry import io.prometheus.client.exporter.PushGateway import org.apache.spark.banzaicloud.metrics.sink.PrometheusSink.SinkConfigProxy import org.apache.spark.internal.config import org.apache.spark.metrics.sink.Sink import org.apache.spark.{SecurityManager, SparkConf, SparkEnv} object PrometheusSink { class SinkConfigProxy extends SinkConfig { // SparkEnv may become available only after metrics sink creation thus retrieving // SparkConf from spark env here and not during the creation/initialisation of PrometheusSink. @transient private lazy val sparkConfig = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf(true)) // Don't use sparkConf.getOption("spark.metrics.namespace") as the underlying string won't be substituted. def metricsNamespace: Option[String] = sparkConfig.get(config.METRICS_NAMESPACE) def sparkAppId: Option[String] = sparkConfig.getOption("spark.app.id") def sparkAppName: Option[String] = sparkConfig.getOption("spark.app.name") def executorId: Option[String] = sparkConfig.getOption("spark.executor.id") } } class PrometheusSink(property: Properties, registry: MetricRegistry, securityMgr: SecurityManager, sinkConfig: SinkConfig, pushGatewayBuilder: URL => PushGateway) extends com.banzaicloud.spark.metrics.sink.PrometheusSink(property, registry, sinkConfig, pushGatewayBuilder) with Sink { // Constructor required by MetricsSystem::registerSinks() def this(property: Properties, registry: MetricRegistry, securityMgr: SecurityManager) = { this( property, registry, securityMgr, new SinkConfigProxy, new PushGateway(_) ) } }
Example 170
Source File: PropertiesConfig.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.utils.sumac import collection._ import java.util.Properties import java.io.{FileOutputStream, File, FileInputStream, BufferedInputStream} import collection.JavaConverters._ trait PropertiesConfig extends ExternalConfig { self: Args => var propertyFile: File = _ abstract override def readArgs(originalArgs: Map[String,String]): Map[String,String] = { parse(originalArgs, false) val props = new Properties() if (propertyFile != null) { val in = new BufferedInputStream(new FileInputStream(propertyFile)) props.load(in) in.close() } //append args we read from the property file to the args from the command line, and pass to next trait super.readArgs(ExternalConfigUtil.mapWithDefaults(originalArgs,props.asScala)) } abstract override def saveConfig() { PropertiesConfig.saveConfig(this, propertyFile) super.saveConfig() } } object PropertiesConfig { def saveConfig(args: Args, propertyFile: File) { val props = new Properties() args.getStringValues.foreach{case(k,v) => props.put(k,v)} val out = new FileOutputStream(propertyFile) props.store(out, "") out.close() } }
Example 171
Source File: PropertiesConfigTest.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.utils.sumac import org.scalatest.FunSuite import java.io.{PrintWriter, File} import java.util.Properties import org.scalatest.Matchers class PropertiesConfigTest extends FunSuite with Matchers { val testOutDir = new File("test_output/" + getClass.getSimpleName) testOutDir.mkdirs() test("load properties") { val propFile = new File(testOutDir, "load_properties_test.properties") val p = new Properties() p.put("x", "98") p.put("blah", "ooga booga") val out = new PrintWriter(propFile) p.store(out,null) out.close() val args = new PropertyArgs() args.parse(Array("--propertyFile", propFile.getAbsolutePath)) args.x should be (98) args.blah should be ("ooga booga") } test("roundtrip properties") { val propFile = new File(testOutDir, "roundtrip_properties_test.properties") val args = new PropertyArgs() args.x = 5 args.wakka = 93.4f args.propertyFile = propFile args.saveConfig() val args2 = new PropertyArgs() args2.propertyFile = propFile args2.parse(Map[String,String]()) args2.x should be (5) args2.wakka should be (93.4f) args2.blah should be (null) } class PropertyArgs extends FieldArgs with PropertiesConfig { var x: Int = _ var blah: String = _ var wakka: Float = _ } }
Example 172
Source File: H2Sandbox.scala From redshift-fake-driver with Apache License 2.0 | 5 votes |
package jp.ne.opt.redshiftfake import java.sql.{DriverManager, Connection} import java.util.Properties import jp.ne.opt.redshiftfake.util.Loan.using import org.scalatest.{Outcome, fixture} trait H2Sandbox { self: fixture.TestSuite => type FixtureParam = Connection override def withFixture(test: OneArgTest): Outcome = { val url = "jdbc:h2redshift:mem:redshift;MODE=PostgreSQL;DATABASE_TO_UPPER=false" val prop = new Properties() prop.setProperty("driver", "org.h2.jdbc.FakeH2Driver") prop.setProperty("user", "sa") Class.forName("org.h2.jdbc.FakeH2Driver") using(DriverManager.getConnection(url, prop))(test) } }
Example 173
Source File: VelocityUtils.scala From InteractiveGraph-neo4j with BSD 2-Clause "Simplified" License | 5 votes |
package org.grapheco.server.util import java.io.{File, FileOutputStream, StringWriter} import java.util.Properties import cn.pidb.blob.Blob import cn.pidb.engine.blob.{BlobIO, InlineBlob, RemoteBlob} import org.apache.velocity.app.VelocityEngine import org.apache.velocity.tools.ToolManager import org.apache.velocity.tools.config.DefaultKey import org.neo4j.values.storable.{BlobValue, ValueWriter} import org.springframework.util.ClassUtils import scala.collection.JavaConversions import java.io.FileOutputStream import java.io.IOException object VelocityUtils { val pro = new Properties(); val toolManager = new ToolManager(); toolManager.configure("tools.xml"); pro.setProperty("input.encoding", "UTF-8"); pro.setProperty("output.encoding", "UTF-8"); val ve = new VelocityEngine(pro); val props = new Properties() props.put("runtime.log.logsystem.class", "org.apache.velocity.runtime.log.SimpleLog4JLogSystem") props.put("runtime.log.logsystem.log4j.category", "velocity") props.put("runtime.log.logsystem.log4j.logger", "velocity") ve.init(props) def parse(expr: String, context: Map[String, Any]): Any = { val vc = toolManager.createContext(); val writer = new StringWriter(); context.foreach(kv => vc.put(kv._1, //is a scala Map? if (kv._2.isInstanceOf[Map[_, _]]) { JavaConversions.mapAsJavaMap(kv._2.asInstanceOf[Map[_, _]]) } else { kv._2 })); try { if (expr.startsWith("=")) { val expr1 = expr.substring(1); ve.evaluate(vc, writer, "", s"#set($$__VAR=$expr1)"); var value = vc.get("__VAR"); //if is a blob if(value.isInstanceOf[Blob]){ //get blob var result:String = "" try { val data = value.asInstanceOf[Blob].toBytes() val path = ClassUtils.getDefaultClassLoader.getResource("").getPath.replace("/WEB-INF/classes","") + "static/" val tool = new FileSystemTool() result = tool.filesave(data,path, System.currentTimeMillis.toString+".jpg") } catch{ case e:Throwable => print(e.toString) } //TODO url return "http://localhost:9999/graphserver/static/"+result } return value } else { ve.evaluate(vc, writer, "", expr); writer.getBuffer.toString.trim } } catch { case e: Throwable => throw new WrongExpressionException(expr, e); } } } class WrongExpressionException(msg: String, e: Throwable) extends RuntimeException(msg, e) { } @DefaultKey("fileTool") class FileSystemTool { def exists(path: String) = new File(path).exists(); @throws[IOException] def filesave(file: Array[Byte], filePath: String, fileName: String): String = { //目标目录 val targetfile = new File(filePath) if (!targetfile.exists) targetfile.mkdirs //二进制流写入 val out = new FileOutputStream(filePath + fileName) out.write(file) out.flush() out.close() return fileName } }
Example 174
Source File: BinaryTypeBenchmark.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import java.sql.{Connection, DriverManager} import java.util.Properties import com.github.mrpowers.spark.daria.sql.SparkSessionExt._ import com.memsql.spark.BatchInsertBenchmark.{df, executeQuery} import org.apache.spark.sql.types.{BinaryType, IntegerType} import org.apache.spark.sql.{SaveMode, SparkSession} import scala.util.Random // BinaryTypeBenchmark is written to writing of the BinaryType with CPU profiler // this feature is accessible in Ultimate version of IntelliJ IDEA // see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details object BinaryTypeBenchmark extends App { final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost") final val masterPort: String = sys.props.getOrElse("memsql.port", "5506") val spark: SparkSession = SparkSession .builder() .master("local") .config("spark.sql.shuffle.partitions", "1") .config("spark.driver.bindAddress", "localhost") .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}") .config("spark.datasource.memsql.database", "testdb") .getOrCreate() def jdbcConnection: Loan[Connection] = { val connProperties = new Properties() connProperties.put("user", "root") Loan( DriverManager.getConnection( s"jdbc:mysql://$masterHost:$masterPort", connProperties )) } def executeQuery(sql: String): Unit = { jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql))) } executeQuery("set global default_partitions_per_leaf = 2") executeQuery("drop database if exists testdb") executeQuery("create database testdb") def genRandomByte(): Byte = (Random.nextInt(256) - 128).toByte def genRandomRow(): Array[Byte] = Array.fill(1000)(genRandomByte()) val df = spark.createDF( List.fill(100000)(genRandomRow()).zipWithIndex, List(("data", BinaryType, true), ("id", IntegerType, true)) ) val start1 = System.nanoTime() df.write .format("memsql") .mode(SaveMode.Overwrite) .save("testdb.LoadData") println("Elapsed time: " + (System.nanoTime() - start1) + "ns [LoadData CSV]") val start2 = System.nanoTime() df.write .format("memsql") .option("tableKey.primary", "id") .option("onDuplicateKeySQL", "id = id") .mode(SaveMode.Overwrite) .save("testdb.BatchInsert") println("Elapsed time: " + (System.nanoTime() - start2) + "ns [BatchInsert]") val avroStart = System.nanoTime() df.write .format(DefaultSource.MEMSQL_SOURCE_NAME_SHORT) .mode(SaveMode.Overwrite) .option(MemsqlOptions.LOAD_DATA_FORMAT, "Avro") .save("testdb.AvroSerialization") println("Elapsed time: " + (System.nanoTime() - avroStart) + "ns [LoadData Avro] ") }
Example 175
Source File: LoadDataBenchmark.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import java.sql.{Connection, Date, DriverManager} import java.time.{Instant, LocalDate} import java.util.Properties import org.apache.spark.sql.types._ import com.github.mrpowers.spark.daria.sql.SparkSessionExt._ import org.apache.spark.sql.{SaveMode, SparkSession} import scala.util.Random // LoadDataBenchmark is written to test load data with CPU profiler // this feature is accessible in Ultimate version of IntelliJ IDEA // see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details object LoadDataBenchmark extends App { final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost") final val masterPort: String = sys.props.getOrElse("memsql.port", "5506") val spark: SparkSession = SparkSession .builder() .master("local") .config("spark.sql.shuffle.partitions", "1") .config("spark.driver.bindAddress", "localhost") .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}") .config("spark.datasource.memsql.database", "testdb") .getOrCreate() def jdbcConnection: Loan[Connection] = { val connProperties = new Properties() connProperties.put("user", "root") Loan( DriverManager.getConnection( s"jdbc:mysql://$masterHost:$masterPort", connProperties )) } def executeQuery(sql: String): Unit = { jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql))) } executeQuery("set global default_partitions_per_leaf = 2") executeQuery("drop database if exists testdb") executeQuery("create database testdb") def genRow(): (Long, Int, Double, String) = (Random.nextLong(), Random.nextInt(), Random.nextDouble(), Random.nextString(20)) val df = spark.createDF( List.fill(1000000)(genRow()), List(("LongType", LongType, true), ("IntType", IntegerType, true), ("DoubleType", DoubleType, true), ("StringType", StringType, true)) ) val start = System.nanoTime() df.write .format("memsql") .mode(SaveMode.Append) .save("testdb.batchinsert") val diff = System.nanoTime() - start println("Elapsed time: " + diff + "ns [CSV serialization] ") executeQuery("truncate testdb.batchinsert") val avroStart = System.nanoTime() df.write .format(DefaultSource.MEMSQL_SOURCE_NAME_SHORT) .mode(SaveMode.Append) .option(MemsqlOptions.LOAD_DATA_FORMAT, "Avro") .save("testdb.batchinsert") val avroDiff = System.nanoTime() - avroStart println("Elapsed time: " + avroDiff + "ns [Avro serialization] ") }
Example 176
Source File: BatchInsertBenchmark.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import java.sql.{Connection, Date, DriverManager} import java.time.LocalDate import java.util.Properties import org.apache.spark.sql.types._ import com.github.mrpowers.spark.daria.sql.SparkSessionExt._ import org.apache.spark.sql.{SaveMode, SparkSession} import scala.util.Random // BatchInsertBenchmark is written to test batch insert with CPU profiler // this feature is accessible in Ultimate version of IntelliJ IDEA // see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details object BatchInsertBenchmark extends App { final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost") final val masterPort: String = sys.props.getOrElse("memsql.port", "5506") val spark: SparkSession = SparkSession .builder() .master("local") .config("spark.sql.shuffle.partitions", "1") .config("spark.driver.bindAddress", "localhost") .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}") .config("spark.datasource.memsql.database", "testdb") .getOrCreate() def jdbcConnection: Loan[Connection] = { val connProperties = new Properties() connProperties.put("user", "root") Loan( DriverManager.getConnection( s"jdbc:mysql://$masterHost:$masterPort", connProperties )) } def executeQuery(sql: String): Unit = { jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql))) } executeQuery("set global default_partitions_per_leaf = 2") executeQuery("drop database if exists testdb") executeQuery("create database testdb") def genDate() = Date.valueOf(LocalDate.ofEpochDay(LocalDate.of(2001, 4, 11).toEpochDay + Random.nextInt(10000))) def genRow(): (Long, Int, Double, String, Date) = (Random.nextLong(), Random.nextInt(), Random.nextDouble(), Random.nextString(20), genDate()) val df = spark.createDF( List.fill(1000000)(genRow()), List(("LongType", LongType, true), ("IntType", IntegerType, true), ("DoubleType", DoubleType, true), ("StringType", StringType, true), ("DateType", DateType, true)) ) val start = System.nanoTime() df.write .format("memsql") .option("tableKey.primary", "IntType") .option("onDuplicateKeySQL", "IntType = IntType") .mode(SaveMode.Append) .save("testdb.batchinsert") val diff = System.nanoTime() - start println("Elapsed time: " + diff + "ns") }
Example 177
Source File: CircleUtils.scala From Scala-Design-Patterns-Second-Edition with MIT License | 5 votes |
package com.ivan.nikolov.creational.lazy_init import java.util.Properties object CircleUtils { val basicPi = 3.14 lazy val precisePi: Double = { System.out.println("Reading properties for the precise PI.") val props = new Properties() props.load(getClass.getResourceAsStream("pi.properties")) props.getProperty("pi.high").toDouble } def area(radius: Double, isPrecise: Boolean = false): Double = { val pi: Double = if (isPrecise) precisePi else basicPi pi * Math.pow(radius, 2) } } object Example { def main(args: Array[String]): Unit = { System.out.println(s"The basic area for a circle with radius 2.5 is ${CircleUtils.area(2.5)}") System.out.println(s"The precise area for a circle with radius 2.5 is ${CircleUtils.area(2.5, true)}") System.out.println(s"The basic area for a circle with radius 6.78 is ${CircleUtils.area(6.78)}") System.out.println(s"The precise area for a circle with radius 6.78 is ${CircleUtils.area(6.78, true)}") } }
Example 178
Source File: CircleUtils.scala From Scala-Design-Patterns-Second-Edition with MIT License | 5 votes |
package com.ivan.nikolov.creational.lazy_init import java.util.Properties object CircleUtils { val basicPi = 3.14 lazy val precisePi: Double = { System.out.println("Reading properties for the precise PI.") val props = new Properties() props.load(getClass.getResourceAsStream("pi.properties")) props.getProperty("pi.high").toDouble } def area(radius: Double, isPrecise: Boolean = false): Double = { val pi: Double = if (isPrecise) precisePi else basicPi pi * Math.pow(radius, 2) } } object Example { def main(args: Array[String]): Unit = { System.out.println(s"The basic area for a circle with radius 2.5 is ${CircleUtils.area(2.5)}") System.out.println(s"The precise area for a circle with radius 2.5 is ${CircleUtils.area(2.5, true)}") System.out.println(s"The basic area for a circle with radius 6.78 is ${CircleUtils.area(6.78)}") System.out.println(s"The precise area for a circle with radius 6.78 is ${CircleUtils.area(6.78, true)}") } }
Example 179
Source File: ConfigConversions.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.utils import java.util.Properties import com.typesafe.config.Config import scala.collection.JavaConversions._ object ConfigConversions { implicit class RichConfig(config: Config) { def asProperties: Properties = { val props = new Properties() for (entry <- config.entrySet) { props.put(entry.getKey, entry.getValue.unwrapped) } props } def asMap[T <: Any]: Map[String, String] = { config.entrySet.collect { case entry => entry.getKey -> entry.getValue.unwrapped().toString }.toMap } } }
Example 180
Source File: KafkaPublisher.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite import java.util.Properties import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} class KafkaPublisher { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("partition.assignment.strategy", "range") props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer") props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer") val producer = new KafkaProducer[Array[Byte], Array[Byte]](props) def send(topic: String, event: Array[Byte]): Unit = { producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event)) } def send(topic: String, events: List[Array[Byte]]): Unit = { for (event <- events) { producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, event)) } } }
Example 181
Source File: SonarRunnerPlugin.scala From sbt-sonarrunner-plugin with MIT License | 5 votes |
package com.aol.sbt.sonar import java.io.{File, FileOutputStream} import java.util.Properties import org.sonar.runner.Main import sbt.Keys._ import sbt._ import scala.collection.JavaConversions ; object SonarRunnerPlugin extends AutoPlugin { object autoImport { val sonarProperties = settingKey[Map[String, String]]("SonarRunner configuration properties. See http://docs.codehaus.org/display/SONAR/Analysis+Parameters.") val sonar = taskKey[Unit]("Runs Sonar agent") val generateSonarConfiguration = taskKey[File]("Generates Sonar configuration") val sonarRunnerOptions = settingKey[Seq[String]]("Extra options for sonar runner") } import com.aol.sbt.sonar.SonarRunnerPlugin.autoImport._ override def projectSettings: Seq[Setting[_]] = Seq( generateSonarConfiguration := makeConfiguration(target.value + "/sonar-project.properties", sonarProperties.value), sonarProperties := Map( "sonar.projectName" -> name.value, "sonar.projectVersion" -> version.value, "sonar.projectKey" -> "%s:%s".format(organization.value, name.value), "sonar.binaries" -> filePathsToString(Seq((classDirectory in Compile).value)), "sonar.sources" -> filePathsToString((unmanagedSourceDirectories in Compile).value), "sonar.tests" -> filePathsToString((unmanagedSourceDirectories in Test).value), "sonar.projectBaseDir" -> file(".").absolutePath, "sonar.sourceEncoding" -> "UTF-8", "sonar.host.url" -> "http://localhost:9000", "sonar.jdbc.url" -> "jdbc:mysql://localhost:3306/sonar", "sonar.jdbc.username" -> "sonar", "sonar.jdbc.password" -> "sonar" ), sonarRunnerOptions := Seq.empty, sonar := { lazy val logger: TaskStreams = streams.value runSonarAgent(generateSonarConfiguration.value, logger, sonarRunnerOptions.value) } ) def runSonarAgent(configFile: File, logger: TaskStreams, sonarRunnerOptions: Seq[String]) = { logger.log.info("**********************************") logger.log.info("Publishing reports to SonarQube...") logger.log.info("**********************************") Main.main(Array[String]("-D", "project.settings=" + configFile.getCanonicalPath, "-D", "project.home=" + file(".").absolutePath) ++ sonarRunnerOptions) } private[this] def filePathsToString(files: Seq[File]) = files.filter(_.exists).map(_.getAbsolutePath).toSet.mkString(",") private[this] def makeConfiguration(configPath: String, props: Map[String, String]): File = { val p = new Properties() p.putAll(JavaConversions.mapAsJavaMap(props)) p.store(new FileOutputStream(configPath), null) file(configPath) } }
Example 182
Source File: StreamHQL.scala From spark-cep with Apache License 2.0 | 5 votes |
import java.util.Properties import kafka.consumer.ConsumerConfig import org.I0Itec.zkclient.ZkClient import org.apache.log4j.{Level, Logger} import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.sql.streaming.sources.MessageDelimiter import org.apache.spark.streaming.dstream.ConstantInputDStream import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkContext} import redis.RedisManager import scala.util.parsing.json.JSON class TabDelimiter extends MessageDelimiter { override val delimiter = "\t" } object StreamDDL { def main(args: Array[String]): Unit = { Logger.getRootLogger.setLevel(Level.WARN) val query = args(0) val sc = new SparkContext(new SparkConf()) val ssc = new StreamingContext(sc, Seconds(1)) val streamSqlContext = new StreamSQLContext(ssc, new HiveContext(sc)) streamSqlContext.command(query) new ConstantInputDStream[Int](ssc, sc.parallelize(Seq(1))).print ssc.start() ssc.awaitTerminationOrTimeout(100) ssc.stop() } } object StreamHQL { object Redis { var initialized = false var manager: RedisManager = _ def init(confMap: Map[String, String]) { if (initialized == false) { manager = new RedisManager( confMap("redis.shards"), confMap("redis.sentinels"), confMap("redis.database").toInt) manager.init initialized = true } } } def removeConsumerGroup(zkQuorum: String, groupId: String) { val properties = new Properties() properties.put("zookeeper.connect", zkQuorum) properties.put("group.id", groupId) val conf = new ConsumerConfig(properties) val zkClient = new ZkClient(conf.zkConnect) zkClient.deleteRecursive(s"/consumers/${conf.groupId}") zkClient.close() } def main(args: Array[String]): Unit = { Logger.getRootLogger.setLevel(Level.WARN) val confMap = JSON.parseFull(args(0)).get.asInstanceOf[Map[String, String]] val qid = args(1) val query = args(2) val sc = new SparkContext(new SparkConf()) val ssc = new StreamingContext(sc, Seconds(1)) val hc = new HiveContext(sc) val streamSqlContext = new StreamSQLContext(ssc, hc) val redisExpireSec = confMap("redis.expire.sec").toInt ssc.checkpoint(s"checkpoint/$qid") hc.setConf("spark.streaming.query.id", qid) hc.setConf("spark.sql.shuffle.partitions", confMap("spark.sql.shuffle.partitions")) removeConsumerGroup(confMap("kafka.zookeeper.quorum"), qid) val result = streamSqlContext.sql(query) val schema = result.schema result.foreachRDD((rdd, time) => { rdd.foreachPartition(partition => { Redis.init(confMap) val jedis = Redis.manager.getResource val pipe = jedis.pipelined partition.foreach(record => { val seq = record.toSeq(schema) val ts = time.milliseconds / 1000 val hkey = seq.take(seq.size - 1).mkString(".") pipe.hset(qid + "." + ts, hkey, seq(seq.size - 1).toString) pipe.expire(qid + "." + ts, redisExpireSec) }) pipe.sync Redis.manager.returnResource(jedis) }) }) ssc.start() ssc.awaitTermination() ssc.stop() } }
Example 183
Source File: MQTTUtils.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.sql.streaming.mqtt import java.util.Properties import org.eclipse.paho.client.mqttv3.{MqttClient, MqttClientPersistence, MqttConnectOptions} import org.eclipse.paho.client.mqttv3.persist.{MemoryPersistence, MqttDefaultFilePersistence} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.bahir.utils.Logging object MQTTUtils extends Logging { // Since data source configuration properties are case-insensitive, // we have to introduce our own keys. Also, good for vendor independence. private[mqtt] val sslParamMapping = Map( "ssl.protocol" -> "com.ibm.ssl.protocol", "ssl.key.store" -> "com.ibm.ssl.keyStore", "ssl.key.store.password" -> "com.ibm.ssl.keyStorePassword", "ssl.key.store.type" -> "com.ibm.ssl.keyStoreType", "ssl.key.store.provider" -> "com.ibm.ssl.keyStoreProvider", "ssl.trust.store" -> "com.ibm.ssl.trustStore", "ssl.trust.store.password" -> "com.ibm.ssl.trustStorePassword", "ssl.trust.store.type" -> "com.ibm.ssl.trustStoreType", "ssl.trust.store.provider" -> "com.ibm.ssl.trustStoreProvider", "ssl.ciphers" -> "com.ibm.ssl.enabledCipherSuites", "ssl.key.manager" -> "com.ibm.ssl.keyManager", "ssl.trust.manager" -> "com.ibm.ssl.trustManager" ) def parseConfigParams(config: Map[String, String]): (String, String, String, MqttClientPersistence, MqttConnectOptions, Int, Long, Long, Int) = { def e(s: String) = new IllegalArgumentException(s) val parameters = CaseInsensitiveMap(config) val brokerUrl: String = parameters.getOrElse("brokerUrl", parameters.getOrElse("path", throw e("Please provide a `brokerUrl` by specifying path or .options(\"brokerUrl\",...)"))) val persistence: MqttClientPersistence = parameters.get("persistence") match { case Some("memory") => new MemoryPersistence() case _ => val localStorage: Option[String] = parameters.get("localStorage") localStorage match { case Some(x) => new MqttDefaultFilePersistence(x) case None => new MqttDefaultFilePersistence() } } // if default is subscribe everything, it leads to getting lot unwanted system messages. val topic: String = parameters.getOrElse("topic", throw e("Please specify a topic, by .options(\"topic\",...)")) val clientId: String = parameters.getOrElse("clientId", { log.warn("If `clientId` is not set, a random value is picked up." + "\nRecovering from failure is not supported in such a case.") MqttClient.generateClientId()}) val username: Option[String] = parameters.get("username") val password: Option[String] = parameters.get("password") val connectionTimeout: Int = parameters.getOrElse("connectionTimeout", MqttConnectOptions.CONNECTION_TIMEOUT_DEFAULT.toString).toInt val keepAlive: Int = parameters.getOrElse("keepAlive", MqttConnectOptions .KEEP_ALIVE_INTERVAL_DEFAULT.toString).toInt val mqttVersion: Int = parameters.getOrElse("mqttVersion", MqttConnectOptions .MQTT_VERSION_DEFAULT.toString).toInt val cleanSession: Boolean = parameters.getOrElse("cleanSession", "false").toBoolean val qos: Int = parameters.getOrElse("QoS", "1").toInt val autoReconnect: Boolean = parameters.getOrElse("autoReconnect", "false").toBoolean val maxInflight: Int = parameters.getOrElse("maxInflight", "60").toInt val maxBatchMessageNum = parameters.getOrElse("maxBatchMessageNum", s"${Long.MaxValue}").toLong val maxBatchMessageSize = parameters.getOrElse("maxBatchMessageSize", s"${Long.MaxValue}").toLong val maxRetryNumber = parameters.getOrElse("maxRetryNum", "3").toInt val mqttConnectOptions: MqttConnectOptions = new MqttConnectOptions() mqttConnectOptions.setAutomaticReconnect(autoReconnect) mqttConnectOptions.setCleanSession(cleanSession) mqttConnectOptions.setConnectionTimeout(connectionTimeout) mqttConnectOptions.setKeepAliveInterval(keepAlive) mqttConnectOptions.setMqttVersion(mqttVersion) mqttConnectOptions.setMaxInflight(maxInflight) (username, password) match { case (Some(u: String), Some(p: String)) => mqttConnectOptions.setUserName(u) mqttConnectOptions.setPassword(p.toCharArray) case _ => } val sslProperties = new Properties() config.foreach(e => { if (e._1.startsWith("ssl.")) { sslProperties.setProperty(sslParamMapping(e._1), e._2) } }) mqttConnectOptions.setSSLProperties(sslProperties) (brokerUrl, clientId, topic, persistence, mqttConnectOptions, qos, maxBatchMessageNum, maxBatchMessageSize, maxRetryNumber) } }
Example 184
Source File: EmbeddedZookeeper.scala From pizza-auth-3 with MIT License | 5 votes |
package moe.pizza.auth.queue import java.util.Properties import org.apache.zookeeper.server.{ServerConfig, ZooKeeperServerMain} import org.apache.zookeeper.server.quorum.QuorumPeerConfig class EmbeddedZookeeper(config: Properties) { val quorumconfig = new QuorumPeerConfig quorumconfig.parseProperties(config) val server = new ZooKeeperServerMain val zkconfig = new ServerConfig zkconfig.readFrom(quorumconfig) var zkthread: Option[Thread] = None def start(): Thread = { val thread = new Thread() { override def run(): Unit = { try { server.runFromConfig(zkconfig) } catch { case e: Throwable => e.printStackTrace(System.err) } } } thread.start() zkthread = Some(thread) thread } def stop() = { zkthread match { case Some(t) => t.destroy() case None => () } } }
Example 185
Source File: Project.scala From ScalaClean with Apache License 2.0 | 5 votes |
package scalaclean.model.impl import scalaclean.model._ import java.io.File import java.net.{URL, URLClassLoader} import java.nio.file.{Files, Path, Paths} import java.util.Properties import java.util.concurrent.ConcurrentHashMap import scalafix.v1.SymbolInformation import scala.meta.internal.symtab.{GlobalSymbolTable, SymbolTable} import scala.meta.io.{AbsolutePath, Classpath} object Project { import org.scalaclean.analysis.PropertyNames._ def apply(propsPath: Path, projects: ProjectSet): Project = { val props = new Properties() println("PropsPath = " + propsPath) props.load(Files.newBufferedReader(propsPath)) val classpathValue = props.getProperty(prop_classpath) val outputPath = props.getProperty(prop_outputDir) val elementsFilePath = props.getProperty(prop_elementsFile) val relationshipsFilePath = props.getProperty(prop_relationshipsFile) val extensionsFilePath = props.getProperty(prop_extensionsFile) val src = props.getProperty(prop_src) val srcBuildBase = props.getProperty(prop_srcBuildBase) val srcFiles = props.getProperty(prop_srcFiles, "").split(File.pathSeparatorChar).toSet val srcRoots = props.getProperty(prop_srcRoots).split(File.pathSeparatorChar).toList.sortWith((s1, s2) => s1.length > s1.length || s1 < s2).map(AbsolutePath(_)) println("srcRoots = " + srcRoots) assert(classpathValue ne null, props.keys) assert(outputPath ne null, props.keys) val classPath = Classpath.apply(classpathValue) new Project(projects, classPath, outputPath, src, srcRoots, srcBuildBase, elementsFilePath, relationshipsFilePath, extensionsFilePath, srcFiles) } } class Project private( val projects: ProjectSet, val classPath: Classpath, val outputPath: String, val src: String, val srcRoots: List[AbsolutePath], val srcBuildBase: String, elementsFilePath: String, relationshipsFilePath: String, extensionsFilePath: String, val srcFiles: Set[String]) { def symbolTable: SymbolTable = GlobalSymbolTable(classPath, includeJdk = true) lazy val classloader: ClassLoader = new URLClassLoader(Array(new URL("file:" + outputPath + "/")), null) private val infos = new ConcurrentHashMap[LegacyElementId, SymbolInformation]() def symbolInfo(viewedFrom: ElementModelImpl, symbol: LegacyElementId): SymbolInformation = { infos.computeIfAbsent(symbol, s => //any doc in the project would do though viewedFrom.source.doc.info(s.symbol).orNull) } def read: (Vector[ElementModelImpl], BasicRelationshipInfo) = ModelReader.read(this, elementsFilePath, relationshipsFilePath, extensionsFilePath) private val sourcesMap = new ConcurrentHashMap[String, SourceData]() def source(name: String): SourceData = { sourcesMap.computeIfAbsent(name, p => SourceData(this, Paths.get(p))) } }
Example 186
Source File: BlazegraphClientFixture.scala From nexus with Apache License 2.0 | 5 votes |
package ch.epfl.bluebrain.nexus.commons.sparql.client import java.util.Properties import akka.http.scaladsl.model.Uri import ch.epfl.bluebrain.nexus.commons.sparql.client.BlazegraphClientFixture._ import ch.epfl.bluebrain.nexus.util.Randomness._ import scala.jdk.CollectionConverters._ trait BlazegraphClientFixture { val namespace: String = genString(8) val rand: String = genString(length = 8) val graph: Uri = s"http://$localhost:8080/graphs/$rand" val id: String = genString() val label: String = genString() val value: String = genString() } object BlazegraphClientFixture { val localhost = "127.0.0.1" def properties(file: String = "/commons/sparql/index.properties"): Map[String, String] = { val props = new Properties() props.load(getClass.getResourceAsStream(file)) props.asScala.toMap } }
Example 187
Source File: Conf.scala From CkoocNLP with Apache License 2.0 | 5 votes |
package config import java.io.{File, FileInputStream, InputStreamReader} import java.util.Properties import scala.collection.mutable def loadConf(filePath: String): mutable.LinkedHashMap[String, String] = { val kvMap = mutable.LinkedHashMap[String, String]() val properties = new Properties() properties.load(new InputStreamReader(new FileInputStream(filePath), "UTF-8")) val propertyNameArray = properties.stringPropertyNames().toArray(new Array[String](0)) val fileName = new File(filePath).getName println(s"============ 加载配置文件 $fileName ================") for (propertyName <- propertyNameArray) { val property = properties.getProperty(propertyName).replaceAll("\"", "").trim println(propertyName + ": " + property) kvMap.put(propertyName, property) } println("==========================================================") kvMap } }
Example 188
Source File: AppConfiguration.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends.config import java.util.Properties import com.expedia.www.haystack.commons.config.ConfigurationLoader import com.expedia.www.haystack.commons.entities.encoders.EncoderFactory import com.expedia.www.haystack.trends.config.entities.{KafkaConfiguration, TransformerConfiguration} import com.typesafe.config.Config import org.apache.kafka.streams.StreamsConfig import org.apache.kafka.streams.Topology.AutoOffsetReset import org.apache.kafka.streams.processor.TimestampExtractor import scala.collection.JavaConverters._ import scala.util.matching.Regex class AppConfiguration { private val config = ConfigurationLoader.loadConfigFileWithEnvOverrides() val healthStatusFilePath: String = config.getString("health.status.path") def kafkaConfig: KafkaConfiguration = { // verify if the applicationId and bootstrap server config are non empty def verifyRequiredProps(props: Properties): Unit = { require(props.getProperty(StreamsConfig.APPLICATION_ID_CONFIG).nonEmpty) require(props.getProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG).nonEmpty) } def addProps(config: Config, props: Properties, prefix: (String) => String = identity): Unit = { config.entrySet().asScala.foreach(kv => { val propKeyName = prefix(kv.getKey) props.setProperty(propKeyName, kv.getValue.unwrapped().toString) }) } val kafka = config.getConfig("kafka") val producerConfig = kafka.getConfig("producer") val consumerConfig = kafka.getConfig("consumer") val streamsConfig = kafka.getConfig("streams") val props = new Properties // add stream specific properties addProps(streamsConfig, props) // validate props verifyRequiredProps(props) val timestampExtractor = Class.forName(props.getProperty("timestamp.extractor", "org.apache.kafka.streams.processor.WallclockTimestampExtractor")) KafkaConfiguration(new StreamsConfig(props), produceTopic = producerConfig.getString("topic"), consumeTopic = consumerConfig.getString("topic"), if (streamsConfig.hasPath("auto.offset.reset")) AutoOffsetReset.valueOf(streamsConfig.getString("auto.offset.reset").toUpperCase) else AutoOffsetReset.LATEST , timestampExtractor.newInstance().asInstanceOf[TimestampExtractor], kafka.getLong("close.timeout.ms")) } }
Example 189
Source File: FeatureSpec.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends.feature import java._ import java.util.Properties import com.expedia.metrics.MetricData import com.expedia.open.tracing.Span import com.expedia.www.haystack.commons.entities.encoders.Base64Encoder import com.expedia.www.haystack.trends.config.AppConfiguration import com.expedia.www.haystack.trends.config.entities.{KafkaConfiguration, TransformerConfiguration} import org.apache.kafka.streams.StreamsConfig import org.easymock.EasyMock import org.scalatest.easymock.EasyMockSugar import org.scalatest.{FeatureSpecLike, GivenWhenThen, Matchers} trait FeatureSpec extends FeatureSpecLike with GivenWhenThen with Matchers with EasyMockSugar { protected val METRIC_TYPE = "gauge" def generateTestSpan(duration: Long): Span = { val operationName = "testSpan" val serviceName = "testService" Span.newBuilder() .setDuration(duration) .setOperationName(operationName) .setServiceName(serviceName) .build() } protected def mockAppConfig: AppConfiguration = { val kafkaConsumeTopic = "test-consume" val kafkaProduceTopic = "test-produce" val streamsConfig = new Properties() streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-app") streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "test-kafka-broker") val kafkaConfig = KafkaConfiguration(new StreamsConfig(streamsConfig), kafkaProduceTopic, kafkaConsumeTopic, null, null, 0l) val transformerConfig = TransformerConfiguration(new Base64Encoder, enableMetricPointServiceLevelGeneration = true, List()) val appConfiguration = mock[AppConfiguration] expecting { appConfiguration.kafkaConfig.andReturn(kafkaConfig).anyTimes() appConfiguration.transformerConfiguration.andReturn(transformerConfig).anyTimes() } EasyMock.replay(appConfiguration) appConfiguration } protected def getMetricDataTags(metricData : MetricData): util.Map[String, String] = { metricData.getMetricDefinition.getTags.getKv } }
Example 190
Source File: FeatureSpec.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends.feature import java.util import java.util.Properties import com.expedia.metrics.{MetricData, MetricDefinition, TagCollection} import com.expedia.www.haystack.commons.entities.encoders.PeriodReplacementEncoder import com.expedia.www.haystack.trends.config.AppConfiguration import com.expedia.www.haystack.trends.config.entities.{KafkaConfiguration, KafkaProduceConfiguration, KafkaSinkTopic, StateStoreConfiguration} import org.apache.kafka.streams.StreamsConfig import org.apache.kafka.streams.Topology.AutoOffsetReset import org.apache.kafka.streams.processor.WallclockTimestampExtractor import org.easymock.EasyMock import org.scalatest._ import org.scalatest.easymock.EasyMockSugar import org.mockito.Mockito._ import scala.collection.JavaConverters._ trait FeatureSpec extends FeatureSpecLike with GivenWhenThen with Matchers with EasyMockSugar { def currentTimeInSecs: Long = { System.currentTimeMillis() / 1000l } protected def mockAppConfig: AppConfiguration = { val kafkaConsumeTopic = "test-consume" val kafkaProduceTopic = "test-produce" val kafkaMetricTankProduceTopic = "test-mdm-produce" val streamsConfig = new Properties() streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-app") streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "test-kafka-broker") val kafkaSinkTopics = List(KafkaSinkTopic("metrics","com.expedia.www.haystack.commons.kstreams.serde.metricdata.MetricDataSerde",true), KafkaSinkTopic("mdm","com.expedia.www.haystack.commons.kstreams.serde.metricdata.MetricTankSerde",true)) val kafkaConfig = KafkaConfiguration(new StreamsConfig(streamsConfig), KafkaProduceConfiguration(kafkaSinkTopics, None, "mdm", false), kafkaConsumeTopic, AutoOffsetReset.EARLIEST, new WallclockTimestampExtractor, 30000) val projectConfiguration = mock[AppConfiguration] expecting { projectConfiguration.kafkaConfig.andReturn(kafkaConfig).anyTimes() projectConfiguration.encoder.andReturn(new PeriodReplacementEncoder).anyTimes() projectConfiguration.stateStoreConfig.andReturn(StateStoreConfiguration(128, false, 60, Map())).anyTimes() projectConfiguration.additionalTags.andReturn(Map("k1"->"v1", "k2"-> "v2")).anyTimes() } EasyMock.replay(projectConfiguration) projectConfiguration } protected def getMetricData(metricKey: String, tags: Map[String, String], value: Double, timeStamp: Long): MetricData = { val tagsMap = new java.util.LinkedHashMap[String, String] { if (tags != null) putAll(tags.asJava) put(MetricDefinition.MTYPE, "gauge") put(MetricDefinition.UNIT, "short") } val metricDefinition = new MetricDefinition(metricKey, new TagCollection(tagsMap), TagCollection.EMPTY) new MetricData(metricDefinition, value, timeStamp) } protected def containsTagInMetricData(metricData: MetricData, tagKey: String, tagValue: String): Boolean = { val tags = getTagsFromMetricData(metricData) tags.containsKey(tagKey) && tags.get(tagKey).equalsIgnoreCase(tagValue) } protected def getTagsFromMetricData(metricData: MetricData): util.Map[String, String] = { metricData.getMetricDefinition.getTags.getKv } }
Example 191
Source File: NetezzaRDD.scala From spark-netezza with Apache License 2.0 | 5 votes |
package com.ibm.spark.netezza import java.sql.Connection import java.util.Properties import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.{Partition, SparkContext, TaskContext} override def compute(thePart: Partition, context: TaskContext): Iterator[Row] = new Iterator[Row] { var closed = false var finished = false var gotNext = false var nextValue: Row = null context.addTaskCompletionListener { context => close() } val part = thePart.asInstanceOf[NetezzaPartition] val conn = getConnection() val reader = new NetezzaDataReader(conn, table, columns, filters, part, schema) reader.startExternalTableDataUnload() def getNext(): Row = { if (reader.hasNext) { reader.next() } else { finished = true null.asInstanceOf[Row] } } def close() { if (closed) return try { if (null != reader) { reader.close() } } catch { case e: Exception => logWarning("Exception closing Netezza record reader", e) } try { if (null != conn) { conn.close() } logInfo("closed connection") } catch { case e: Exception => logWarning("Exception closing connection", e) } } override def hasNext: Boolean = { if (!finished) { if (!gotNext) { nextValue = getNext() if (finished) { close() } gotNext = true } } !finished } override def next(): Row = { if (!hasNext) { throw new NoSuchElementException("End of stream") } gotNext = false nextValue } } }
Example 192
Source File: DefaultSource.scala From spark-netezza with Apache License 2.0 | 5 votes |
package com.ibm.spark.netezza import java.util.Properties import org.apache.spark.sql.{SQLContext} import org.apache.spark.sql.sources.{DataSourceRegister, BaseRelation, RelationProvider} override def createRelation( sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { val url = parameters.getOrElse("url", sys.error("Option 'Netezza database url' not specified")) val (table, isQuery) = parameters.get("dbtable").map(table => (table, false)).orElse { parameters.get("query") .map(q => (s"($q) as src", true)) .orElse(sys.error("Option 'dbtable/query' should be specified.")) }.get // TODO: Have to set it to the system default. // For query default is 1, when fetching from a table defauilt is 4. Data slice ca // can be used for partitioning when table is specified. val numPartitions = parameters.getOrElse("numPartitions", if (isQuery) "1" else "4").toInt val partitionCol = parameters.get("partitioncol") val lowerBound = parameters.get("lowerbound") val upperBound = parameters.get("upperbound") val properties = new Properties() // Additional properties that we will pass to getConnection parameters.foreach { case (k, v) => properties.setProperty(k, v) } val conn = NetezzaJdbcUtils.getConnector(url, properties)() val parts = try { if (partitionCol.isDefined || isQuery) { if (isQuery && numPartitions > 1 && !partitionCol.isDefined) { throw new IllegalArgumentException("Partition column should be specified or" + " number of partitions should be set to 1 with the query option.") } val partnInfo = PartitioningInfo(partitionCol, lowerBound, upperBound, numPartitions) NetezzaInputFormat.getColumnPartitions(conn, table, partnInfo) } else { // Partitions based on the data slices. NetezzaInputFormat.getDataSlicePartition(conn, numPartitions) } } finally { conn.close() } NetezzaRelation(url, table, parts, properties, numPartitions)(sqlContext) } }
Example 193
Source File: Version.scala From apalache with Apache License 2.0 | 5 votes |
package at.forsyte.apalache.tla.tooling import java.io.IOException import java.util.Properties object Version { private val pomProps: Properties = loadProperties("META-INF/maven/at.forsyte.apalache/tool/pom.properties") private val gitProps: Properties = loadProperties("at/forsyte/apalache/tla/tooling/git.properties") def version: String = { pomProps.getProperty("version", "version-dev") } def build: String = { gitProps.getProperty("git.commit.id.describe", "unknown-build") } private def loadProperties(name: String): Properties = { val resourceStream = ClassLoader.getSystemClassLoader.getResourceAsStream(name) var props = new Properties() try { if (resourceStream != null) { props.load(resourceStream) } } catch { case _: IOException => () // ignore and set defaults, this is not a critical function case e: Throwable => throw e } props } }
Example 194
Source File: HoconMessagesApi.scala From play-i18n-hocon with Apache License 2.0 | 5 votes |
package com.marcospereira.play.i18n import java.net.URL import java.util.Properties import javax.inject.{ Inject, Singleton } import com.typesafe.config.ConfigFactory import play.api.http.HttpConfiguration import play.api.i18n._ import play.api.inject.Module import play.api.{ Configuration, Environment } import play.utils.Resources import scala.collection.JavaConverters._ @Singleton class HoconMessagesApiProvider @Inject() ( environment: Environment, config: Configuration, langs: Langs, httpConfiguration: HttpConfiguration ) extends DefaultMessagesApiProvider(environment, config, langs, httpConfiguration) { override lazy val get: MessagesApi = { new DefaultMessagesApi( loadAllMessages, langs, langCookieName = langCookieName, langCookieSecure = langCookieSecure, langCookieHttpOnly = langCookieHttpOnly, httpConfiguration = httpConfiguration ) } override protected def loadMessages(file: String): Map[String, String] = { getResources(file) .filterNot(url => Resources.isDirectory(environment.classLoader, url)).reverse .map(getMessages) .foldLeft(Map.empty[String, String]) { _ ++ _ } } override protected def loadAllMessages: Map[String, Map[String, String]] = { langs.availables.map(_.code).map { lang => (lang, loadMessages(s"messages.$lang.conf")) }.toMap ++ Map( "default" -> loadMessages("messages.conf"), "default.play" -> loadMessages("messages.default") ) } override protected def joinPaths(first: Option[String], second: String) = first match { case Some(parent) => new java.io.File(parent, second).getPath case None => second } private def getResources(file: String): List[URL] = { environment.classLoader.getResources(joinPaths(messagesPrefix, file)).asScala.toList } private def getMessages(url: URL): Map[String, String] = { // messages.default is bundled with play and it is a properties file val config = if (url.toString.endsWith("messages.default")) { ConfigFactory.parseProperties(getProperties(url)) } else { ConfigFactory.parseURL(url) } config.resolve().entrySet().asScala .map(e => e.getKey -> String.valueOf(e.getValue.unwrapped())) .toMap } private def getProperties(url: URL): Properties = { val properties = new Properties() val input = url.openStream() try { properties.load(input) } finally { input.close() } properties } } trait HoconI18nComponents extends I18nComponents { def environment: Environment def configuration: Configuration def httpConfiguration: HttpConfiguration def langs: Langs override lazy val messagesApi: MessagesApi = new HoconMessagesApiProvider(environment, configuration, langs, httpConfiguration).get }
Example 195
Source File: KafkaProducer.scala From spark-ref-architecture with Apache License 2.0 | 5 votes |
package com.stc.spark.streaming.kafka import java.util.{Date, Properties} import kafka.producer.{KeyedMessage, Producer, ProducerConfig} import scala.util.Random object KafkaProducer extends App { val events = args(0).toInt val topic = args(1) val brokers = args(2) val rnd = new Random() val producer = new Producer[String, String](KafkaConfig.config) val t = System.currentTimeMillis() for (nEvents <- Range(0, events)) { val runtime = new Date().getTime(); val ip = "192.168.2." + rnd.nextInt(255); val msg = runtime + "," + nEvents + ",www.example.com," + ip; val data = new KeyedMessage[String, String](topic, ip, msg); producer.send(data); } System.out.println("sent per second: " + events * 1000 / (System.currentTimeMillis() - t)); producer.close(); }
Example 196
Source File: UtilTests.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.paths import java.io.File import java.nio.file.{Files, Path} import java.util.Properties import coursier.paths.Util import utest._ import scala.collection.JavaConverters._ object UtilTests extends TestSuite { private def deleteRecursive(f: File): Unit = { if (f.isDirectory) f.listFiles().foreach(deleteRecursive) if (f.exists()) f.delete() } val tests = Tests { "createDirectories fine with sym links" - { var tmpDir: Path = null try { tmpDir = Files.createTempDirectory("coursier-paths-tests") val dir = Files.createDirectories(tmpDir.resolve("dir")) val link = Files.createSymbolicLink(tmpDir.resolve("link"), dir) Util.createDirectories(link) // should not throw } finally { deleteRecursive(tmpDir.toFile) } } "property expansion" - { "simple" - { val map = Map("something" -> "value", "other" -> "a") val sysProps = new Properties sysProps.setProperty("foo", "FOO") val toSet = Util.expandProperties(sysProps, map.asJava) .asScala .toVector .sorted val expected = map.toVector.sorted assert(toSet == expected) } "substitution" - { val map = Map("something" -> "value ${foo}", "other" -> "a") val sysProps = new Properties sysProps.setProperty("foo", "FOO") val toSet = Util.expandProperties(sysProps, map.asJava) .asScala .toVector .sorted val expected = Seq("something" -> "value FOO", "other" -> map("other")).sorted assert(toSet == expected) } "optional value" - { val map = Map("something" -> "value", "foo?" -> "A") val sysProps = new Properties sysProps.setProperty("foo", "FOO") val toSet = Util.expandProperties(sysProps, map.asJava) .asScala .toVector .sorted val expected = Seq("something" -> "value") assert(toSet == expected) } } } }
Example 197
Source File: FileCredentials.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.credentials import java.io.{File, FileInputStream, StringReader} import java.nio.charset.Charset import java.nio.file.{Files, Paths} import java.util.Properties import dataclass.data import scala.collection.JavaConverters._ @data class FileCredentials( path: String, optional: Boolean = true ) extends Credentials { def get(): Seq[DirectCredentials] = { val f = Paths.get(path) if (Files.isRegularFile(f)) { val content = new String(Files.readAllBytes(f), Charset.defaultCharset()) FileCredentials.parse(content, path) } else if (optional) Nil else throw new Exception(s"Credential file $path not found") } } object FileCredentials { def parse(content: String, origin: String): Seq[DirectCredentials] = { val props = new Properties props.load(new StringReader(content)) val userProps = props .propertyNames() .asScala .map(_.asInstanceOf[String]) .filter(_.endsWith(".username")) .toVector userProps.map { userProp => val prefix = userProp.stripSuffix(".username") val user = props.getProperty(userProp) val password = Option(props.getProperty(s"$prefix.password")).getOrElse { throw new Exception(s"Property $prefix.password not found in $origin") } val host = Option(props.getProperty(s"$prefix.host")).getOrElse { throw new Exception(s"Property $prefix.host not found in $origin") } val realmOpt = Option(props.getProperty(s"$prefix.realm")) // filter if empty? val matchHost = Option(props.getProperty(s"$prefix.auto")).fold(DirectCredentials.defaultMatchHost)(_.toBoolean) val httpsOnly = Option(props.getProperty(s"$prefix.https-only")).fold(DirectCredentials.defaultHttpsOnly)(_.toBoolean) val passOnRedirect = Option(props.getProperty(s"$prefix.pass-on-redirect")).fold(false)(_.toBoolean) DirectCredentials(host, user, password) .withRealm(realmOpt) .withMatchHost(matchHost) .withHttpsOnly(httpsOnly) .withPassOnRedirect(passOnRedirect) } } }
Example 198
Source File: Bench.scala From akka-nbench with Apache License 2.0 | 5 votes |
package bench import akka.actor._ import akka.pattern.ask import akka.util.Timeout import scala.concurrent.duration._ import scala.reflect.runtime.universe._ import scala.concurrent.Await import com.typesafe.config._ import net.ceedubs.ficus.Ficus._ import java.util.Properties import java.nio.file._ import java.util.Date import java.text.SimpleDateFormat import java.util.Date import Tapper._ object Bench extends App { def prepareOutputDirs(): String = { val csvDateTimeDir = FileSystems.getDefault().getPath( "tests/" + new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date())) Files.createDirectories(csvDateTimeDir) val csvSymlink = FileSystems.getDefault().getPath("tests/current") if(Files.isSymbolicLink(csvSymlink)){ Files.delete(csvSymlink) } else if (Files.exists(csvSymlink)) { throw new NotASymbolicLinkException(s"test/current is not a symbolic link. Path: $csvSymlink") } Files.createSymbolicLink(csvSymlink, csvDateTimeDir.toAbsolutePath) csvDateTimeDir.toAbsolutePath.toString } def parseOptions(): String = { val usage = """ Usage: activator -mem 4096 "run-main bench.Bench scenario_name" """ if (args.length != 1) println(usage) return args(0) } val scenario = parseOptions val config = ConfigFactory.load().getConfig(scenario) val duration = config.getInt("duration") val concurrent = config.getInt("concurrent") val csvDateTimeDir = prepareOutputDirs val system = ActorSystem("bench") val actorProps = Props(classOf[StatsCollector], csvDateTimeDir, config) val statsCollector = system.actorOf(actorProps, name = "statscollector") val operationsWithRatio: Map[String, Int] = config.as[Map[String, Int]]("operations") val numer = operationsWithRatio.values.sum if (concurrent < numer){ val msg = s"concurrent($concurrent) must greater than sum of operations ratio($numer)" System.err.println(msg) throw new ApplicationConfigException(msg) } val operations = for((key, value) <- operationsWithRatio) yield { List.range(0, concurrent * operationsWithRatio(key) / numer).map(_ => key) } implicit val timeout = Timeout(duration * 2, SECONDS) var driverClz = Class.forName(config.getString("driver")) val drivers = operations.flatten.zipWithIndex.map{ case (operation, i) => system.actorOf(Props(driverClz, operation, statsCollector, config).withDispatcher("my-dispatcher"), name = s"driver_$i") } drivers.par.map(actor => actor ? Ready()).foreach{ f => Await.result(f, timeout.duration).asInstanceOf[OK] } val startAt = new Date() val doUntil = new Date(startAt.getTime + duration * 1000) drivers.par.map(actor => actor ? Go(doUntil)).foreach { f => Await.result(f, timeout.duration).asInstanceOf[OK] } (statsCollector ? TearDown()).tap { f => Await.result(f, timeout.duration).asInstanceOf[OK] } drivers.par.map(actor => actor ? TearDown()).foreach { f => Await.result(f, timeout.duration).asInstanceOf[OK] } (drivers.head ? TearDown()).tap { f => Await.result(f, timeout.duration).asInstanceOf[OK] } system.awaitTermination() }
Example 199
Source File: WordCount.scala From kafka-streams with Apache License 2.0 | 5 votes |
import java.time.Duration import java.util.Properties import org.apache.kafka.streams.kstream.Materialized import org.apache.kafka.streams.scala.ImplicitConversions._ import org.apache.kafka.streams.scala._ import org.apache.kafka.streams.scala.kstream._ import org.apache.kafka.streams.{KafkaStreams, StreamsConfig} object WordCount extends App { import Serdes._ val props: Properties = { val p = new Properties() p.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-modified") p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092") p } val builder: StreamsBuilder = new StreamsBuilder val textLines: KStream[String, String] = builder.stream[String, String]("text_lines") val wordCounts: KTable[String, Long] = textLines .flatMapValues(textLine => textLine.toLowerCase.split("\\W+")) .groupBy((_, word) => word) .count() wordCounts.toStream.to("word_count_results") val streams: KafkaStreams = new KafkaStreams(builder.build(), props) streams.start() sys.ShutdownHookThread { streams.close(Duration.ofSeconds(10)) } }
Example 200
Source File: WordCountTestable.scala From kafka-streams with Apache License 2.0 | 5 votes |
package com.supergloo import java.time.Duration import java.util.Properties import org.apache.kafka.streams.kstream.Materialized import org.apache.kafka.streams.scala.ImplicitConversions._ import org.apache.kafka.streams.{KafkaStreams, StreamsConfig, Topology} import org.apache.kafka.streams.scala.{Serdes, StreamsBuilder} import org.apache.kafka.streams.scala.kstream.{KStream, KTable} class WordCountTestable { import Serdes._ val props: Properties = { val p = new Properties() p.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-modified") p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092") p } def countNumberOfWords(inputTopic: String, outputTopic: String, storeName: String): Topology = { val builder: StreamsBuilder = new StreamsBuilder val textLines: KStream[String, String] = builder.stream[String, String](inputTopic) val wordCounts: KTable[String, Long] = textLines .flatMapValues(textLine => textLine.toLowerCase.split("\\W+")) .groupBy((_, word) => word) .count()(Materialized.as("counts-store")) wordCounts.toStream.to(outputTopic) builder.build() } def toLowerCaseStream(inputTopic: String, outputTopic: String): Topology = { val builder: StreamsBuilder = new StreamsBuilder() val textLines: KStream[String, String] = builder.stream(inputTopic) val wordCounts: KStream[String, String] = textLines .mapValues(textLine => textLine.toLowerCase) wordCounts.to(outputTopic) builder.build() } } object WordCountTestable extends WordCountTestable { def main(args: Array[String]): Unit = { val builder: Topology = countNumberOfWords("input-topic", "output-topic", "counts-store") val streams: KafkaStreams = new KafkaStreams(builder, props) streams.start() sys.ShutdownHookThread { streams.close(Duration.ofSeconds(10)) } } }