java.util.Collections Scala Examples
The following examples show how to use java.util.Collections.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: FlumeTestUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.flume import java.net.{InetSocketAddress, ServerSocket} import java.nio.ByteBuffer import java.util.{List => JList} import java.util.Collections import scala.collection.JavaConverters._ import com.google.common.base.Charsets.UTF_8 import org.apache.avro.ipc.NettyTransceiver import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.commons.lang3.RandomUtils import org.apache.flume.source.avro import org.apache.flume.source.avro.{AvroSourceProtocol, AvroFlumeEvent} import org.jboss.netty.channel.ChannelPipeline import org.jboss.netty.channel.socket.SocketChannel import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory import org.jboss.netty.handler.codec.compression.{ZlibDecoder, ZlibEncoder} import org.apache.spark.util.Utils import org.apache.spark.SparkConf private class CompressionChannelFactory(compressionLevel: Int) extends NioClientSocketChannelFactory { override def newChannel(pipeline: ChannelPipeline): SocketChannel = { val encoder = new ZlibEncoder(compressionLevel) pipeline.addFirst("deflater", encoder) pipeline.addFirst("inflater", new ZlibDecoder()) super.newChannel(pipeline) } } }
Example 2
Source File: HiveSourceConfig.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.config import java.util.Collections import cats.data.NonEmptyList import com.landoop.streamreactor.connect.hive.{DatabaseName, HadoopConfiguration, TableName, Topic} import com.landoop.streamreactor.connect.hive.kerberos.Kerberos import scala.collection.JavaConverters._ case class ProjectionField(name: String, alias: String) case class HiveSourceConfig(dbName: DatabaseName, kerberos: Option[Kerberos], hadoopConfiguration: HadoopConfiguration, tableOptions: Set[SourceTableOptions] = Set.empty, pollSize: Int = 1024) case class SourceTableOptions( tableName: TableName, topic: Topic, projection: Option[NonEmptyList[ProjectionField]] = None, limit: Int = Int.MaxValue ) object HiveSourceConfig { def fromProps(props: Map[String, String]): HiveSourceConfig = { val config = HiveSourceConfigDefBuilder(props.asJava) val tables = config.getKCQL.map { kcql => val fields = Option(kcql.getFields) .getOrElse(Collections.emptyList) .asScala .toList .map { field => ProjectionField(field.getName, field.getAlias) } val projection = fields match { case Nil => None case ProjectionField("*", "*") :: Nil => None case _ => NonEmptyList.fromList(fields) } SourceTableOptions( TableName(kcql.getSource), Topic(kcql.getTarget), projection, limit = if (kcql.getLimit < 1) Int.MaxValue else kcql.getLimit ) } HiveSourceConfig( dbName = DatabaseName(props(HiveSourceConfigConstants.DatabaseNameKey)), tableOptions = tables, kerberos = Kerberos.from(config, HiveSourceConfigConstants), hadoopConfiguration = HadoopConfiguration.from(config, HiveSourceConfigConstants), pollSize = props .getOrElse(HiveSourceConfigConstants.PollSizeKey, 1024) .toString .toInt ) } }
Example 3
Source File: HiveSinkConfig.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.config import java.util.Collections import cats.data.NonEmptyList import com.datamountaineer.kcql.{Field, PartitioningStrategy, SchemaEvolution} import com.landoop.streamreactor.connect.hive._ import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, ParquetHiveFormat} import com.landoop.streamreactor.connect.hive.kerberos.Kerberos import com.landoop.streamreactor.connect.hive.sink.evolution.{AddEvolutionPolicy, EvolutionPolicy, IgnoreEvolutionPolicy, StrictEvolutionPolicy} import com.landoop.streamreactor.connect.hive.sink.partitioning.{DynamicPartitionHandler, PartitionHandler, StrictPartitionHandler} import com.landoop.streamreactor.connect.hive.sink.staging._ import scala.collection.JavaConverters._ case class HiveSinkConfig(dbName: DatabaseName, filenamePolicy: FilenamePolicy = DefaultFilenamePolicy, stageManager: StageManager = new StageManager(DefaultFilenamePolicy), tableOptions: Set[TableOptions] = Set.empty, kerberos: Option[Kerberos], hadoopConfiguration: HadoopConfiguration) case class TableOptions(tableName: TableName, topic: Topic, createTable: Boolean = false, overwriteTable: Boolean = false, partitioner: PartitionHandler = new DynamicPartitionHandler(), evolutionPolicy: EvolutionPolicy = IgnoreEvolutionPolicy, projection: Option[NonEmptyList[Field]] = None, // when creating a new table, the table will be partitioned with the fields set below partitions: Seq[PartitionField] = Nil, // the format used when creating a new table, if the table exists // then the format will be derived from the table parameters format: HiveFormat = ParquetHiveFormat, commitPolicy: CommitPolicy = DefaultCommitPolicy(Some(1000 * 1000 * 128), None, None), location: Option[String] = None) object HiveSinkConfig { def fromProps(props: Map[String, String]): HiveSinkConfig = { import scala.concurrent.duration._ val config = HiveSinkConfigDefBuilder(props.asJava) val tables = config.getKCQL.map { kcql => val fields = Option(kcql.getFields).getOrElse(Collections.emptyList).asScala.toList val projection = if (fields.size == 1 && fields.head.getName == "*") None else NonEmptyList.fromList(fields) val flushSize = Option(kcql.getWithFlushSize).filter(_ > 0) val flushInterval = Option(kcql.getWithFlushInterval).filter(_ > 0).map(_.seconds) val flushCount = Option(kcql.getWithFlushCount).filter(_ > 0) // we must have at least one way of committing files val finalFlushSize = Some(flushSize.fold(1000L * 1000 * 128)(identity)) //if (flushSize.isEmpty ) Some(1000L * 1000 * 128) else flushSize val format: HiveFormat = HiveFormat(Option(kcql.getStoredAs).map(_.toLowerCase).getOrElse("parquet")) TableOptions( TableName(kcql.getTarget), Topic(kcql.getSource), kcql.isAutoCreate, kcql.getWithOverwrite, Option(kcql.getWithPartitioningStrategy).getOrElse(PartitioningStrategy.DYNAMIC) match { case PartitioningStrategy.DYNAMIC => new DynamicPartitionHandler() case PartitioningStrategy.STRICT => StrictPartitionHandler }, format = format, projection = projection, evolutionPolicy = Option(kcql.getWithSchemaEvolution).getOrElse(SchemaEvolution.MATCH) match { case SchemaEvolution.ADD => AddEvolutionPolicy case SchemaEvolution.IGNORE => IgnoreEvolutionPolicy case SchemaEvolution.MATCH => StrictEvolutionPolicy }, partitions = Option(kcql.getPartitionBy).map(_.asScala).getOrElse(Nil).map(name => PartitionField(name)).toVector, commitPolicy = DefaultCommitPolicy( fileSize = finalFlushSize, interval = flushInterval, fileCount = flushCount ), location = Option(kcql.getWithTableLocation) ) } HiveSinkConfig( dbName = DatabaseName(props(SinkConfigSettings.DatabaseNameKey)), filenamePolicy = DefaultFilenamePolicy, stageManager = new StageManager(DefaultFilenamePolicy), tableOptions = tables, kerberos = Kerberos.from(config, SinkConfigSettings), hadoopConfiguration = HadoopConfiguration.from(config, SinkConfigSettings) ) } }
Example 4
Source File: Consumer.scala From fusion-data with Apache License 2.0 | 5 votes |
package kafkasample.demo import java.util.{ Collections, Properties } import java.util.concurrent.TimeUnit import org.apache.kafka.clients.consumer.KafkaConsumer object Consumer { @volatile private var isStop = false def main(args: Array[String]): Unit = { val props = new Properties() props.put("bootstrap.servers", "localhost:9092") props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("group.id", "CountryCounter") val consumer = new KafkaConsumer[String, String](props) val thread = new Thread() { override def run(): Unit = Consumer.run(consumer) } try { thread.start() } finally { TimeUnit.SECONDS.sleep(50) isStop = true thread.join() consumer.close() } } private def run(consumer: KafkaConsumer[String, String]): Unit = { consumer.subscribe(Collections.singleton("customerCountries")) while (!isStop && !Thread.currentThread().isInterrupted) { val records = consumer.poll(java.time.Duration.ofMillis(100)) records.forEach { record => println(s"topic = ${record.topic()}, partition = ${record.partition()}, offset = ${record .offset()}, key: ${record.key()}, value = ${record.value()}") } consumer.commitAsync() } } }
Example 5
Source File: EventManager.scala From Mycat-spider with Apache License 2.0 | 5 votes |
package turbo.crawler.power import java.util.ArrayList import java.util.Hashtable import java.util.concurrent.Callable import java.util.concurrent.FutureTask import java.util.concurrent.ScheduledThreadPoolExecutor import turbo.crawler.Lifecycle import turbo.crawler.Logable import turbo.crawler.StringAdapter import java.util.Collections /** * Event manager * @author mclaren * */ object EventManager extends Lifecycle with Logable with StringAdapter with MessageDriven { /** * 线程池 */ private val exec = new ScheduledThreadPoolExecutor(sysprop("fetch.threads", "100").toInt) /** * 事件处理器 */ private val handlers = new Hashtable[String, java.util.List[Evt => Unit]]() /** * 获取JVM配置参数 */ private def sysprop(key: String, default: String) = { var matched = System.getProperty(key) if (isNotEmpty(matched)) matched else default } /** * 卸载系统 */ override def shutdown = { try { while (true) { if (exec.getActiveCount == 0) { exec.shutdown() throw new RuntimeException() } } } catch { case e: Exception => logger.info("Fetch completed and shutdown concurrenty fetchers.") } } /** * 向系统注册事件监听 */ def attachEvent(eventId: String, handler: Evt => Unit): Unit = { handlers.synchronized { var hds = handlers.get(eventId) if (hds == null) hds = new ArrayList[Evt => Unit]() hds.add(handler) handlers.put(eventId, hds) } } /** * 处理事件分发 */ override def fireEvent(evt: Evt): Unit = { if (handlers.containsKey(evt.eventId)) new WrapList[Evt => Unit](handlers.get(evt.eventId)).foreach(fd => dispatchEventConcurrently(evt, fd)) else logger.error("No handlers for event" + evt) } /** * 并行分发事件 */ private def dispatchEventConcurrently(evt: Evt, f: Evt => Unit) = { var task = new FutureTask[Unit](new Callable[Unit]() { def call: Unit = f(evt) }) this.exec.submit(task) } /** * 包装Java列表为SCALA风格 */ private class WrapList[T](list: java.util.List[T]) { def foreach(f: T => Unit) = for (i <- 0 to list.size() - 1) f(list.get(i)) } }
Example 6
Source File: CustomActivationExample.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.examples.misc.activationfunctions import java.util.{Collections, Random} import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator import org.deeplearning4j.nn.api.OptimizationAlgorithm import org.deeplearning4j.nn.conf.layers.{DenseLayer, OutputLayer} import org.deeplearning4j.nn.conf.{NeuralNetConfiguration, Updater} import org.deeplearning4j.nn.multilayer.MultiLayerNetwork import org.deeplearning4j.nn.weights.WeightInit import org.deeplearning4j.optimize.listeners.ScoreIterationListener import org.nd4j.linalg.activations.Activation import org.nd4j.linalg.api.ndarray.INDArray import org.nd4j.linalg.dataset.DataSet import org.nd4j.linalg.dataset.api.iterator.DataSetIterator import org.nd4j.linalg.factory.Nd4j import org.nd4j.linalg.lossfunctions.LossFunctions object CustomActivationExample { val seed = 12345 val iterations = 1 val nEpochs = 500 val nSamples = 1000 val batchSize = 100 val learningRate = 0.001 var MIN_RANGE = 0 var MAX_RANGE = 3 val rng = new Random(seed) def main(args: Array[String]): Unit = { val iterator = getTrainingData(batchSize, rng) // Create the network val numInput = 2 val numOutputs = 1 val nHidden = 10 val net = new MultiLayerNetwork(new NeuralNetConfiguration.Builder() .seed(seed) .iterations(iterations) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .learningRate(learningRate) .weightInit(WeightInit.XAVIER) .updater(Updater.NESTEROVS) .list() //INSTANTIATING CUSTOM ACTIVATION FUNCTION here as follows //Refer to CustomActivation class for more details on implementation .layer(0, new DenseLayer.Builder().nIn(numInput).nOut(nHidden) .activation(new CustomActivation()) .build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(nHidden).nOut(numOutputs).build()) .pretrain(false).backprop(true).build() ) net.init() net.setListeners(new ScoreIterationListener(100)) (0 until nEpochs).foreach{_ => iterator.reset() net.fit(iterator) } // Test the addition of 2 numbers (Try different numbers here) val input: INDArray = Nd4j.create(Array[Double](0.111111, 0.3333333333333), Array[Int](1, 2)) val out: INDArray = net.output(input, false) System.out.println(out) } private def getTrainingData(batchSize: Int, rand: Random): DataSetIterator = { val sum = new Array[Double](nSamples) val input1 = new Array[Double](nSamples) val input2 = new Array[Double](nSamples) (0 until nSamples).foreach{i => input1(i) = MIN_RANGE + (MAX_RANGE - MIN_RANGE) * rand.nextDouble input2(i) = MIN_RANGE + (MAX_RANGE - MIN_RANGE) * rand.nextDouble sum(i) = input1(i) + input2(i) } val inputNDArray1 = Nd4j.create(input1, Array[Int](nSamples, 1)) val inputNDArray2 = Nd4j.create(input2, Array[Int](nSamples, 1)) val inputNDArray = Nd4j.hstack(inputNDArray1, inputNDArray2) val outPut = Nd4j.create(sum, Array[Int](nSamples, 1)) val dataSet = new DataSet(inputNDArray, outPut) val listDs = dataSet.asList Collections.shuffle(listDs, rng) new ListDataSetIterator(listDs, batchSize) } }
Example 7
Source File: EarlyStoppingMNIST.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.examples.misc.earlystopping import java.util.Collections import java.util.concurrent.TimeUnit import org.apache.commons.io.FilenameUtils import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration import org.deeplearning4j.earlystopping.saver.LocalFileModelSaver import org.deeplearning4j.earlystopping.scorecalc.DataSetLossCalculator import org.deeplearning4j.earlystopping.termination.{MaxEpochsTerminationCondition, MaxTimeIterationTerminationCondition} import org.deeplearning4j.earlystopping.trainer.EarlyStoppingTrainer import org.deeplearning4j.nn.api.OptimizationAlgorithm import org.deeplearning4j.nn.conf.inputs.InputType import org.deeplearning4j.nn.conf.layers.{ConvolutionLayer, DenseLayer, OutputLayer, SubsamplingLayer} import org.deeplearning4j.nn.conf.{NeuralNetConfiguration, Updater} import org.deeplearning4j.nn.weights.WeightInit import org.nd4j.linalg.activations.Activation import org.nd4j.linalg.lossfunctions.LossFunctions import scala.collection.JavaConverters._ import java.util object EarlyStoppingMNIST { def main(args: Array[String]): Unit = { // Configure network://Configure network: val nChannels = 1 val outputNum = 10 val batchSize = 25 val iterations = 1 val seed = 123 val configuration = new NeuralNetConfiguration.Builder() .seed(seed) .iterations(iterations) .regularization(true).l2(0.0005) .learningRate(0.02) .weightInit(WeightInit.XAVIER) .activation(Activation.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.NESTEROVS) .list() .layer(0, new ConvolutionLayer.Builder(5, 5) .nIn(nChannels) .stride(1, 1) .nOut(20).dropOut(0.5) .build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build()) .layer(2, new DenseLayer.Builder() .nOut(500).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum) .activation(Activation.SOFTMAX) .build()) .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note in LenetMnistExample .backprop(true).pretrain(false).build() // Get data: val mnistTrain1024 = new MnistDataSetIterator(batchSize, 1024, false, true, true, 12345) val mnistTest512 = new MnistDataSetIterator(batchSize, 512, false, false, true, 12345) val tempDir = System.getProperty("java.io.tmpdir") val exampleDirectory = FilenameUtils.concat(tempDir, "DL4JEarlyStoppingExample/") val saver = new LocalFileModelSaver(exampleDirectory) val esConf = new EarlyStoppingConfiguration.Builder() .epochTerminationConditions(new MaxEpochsTerminationCondition(50)) //Max of 50 epochs .evaluateEveryNEpochs(1) .iterationTerminationConditions(new MaxTimeIterationTerminationCondition(20, TimeUnit.MINUTES)) //Max of 20 minutes .scoreCalculator(new DataSetLossCalculator(mnistTest512, true)) //Calculate test set score .modelSaver(saver) .build() val trainer = new EarlyStoppingTrainer(esConf, configuration, mnistTrain1024) //Conduct early stopping training://Conduct early stopping training: val result = trainer.fit() println("Termination reason: " + result.getTerminationReason) println("Termination details: " + result.getTerminationDetails) println("Total epochs: " + result.getTotalEpochs) println("Best epoch number: " + result.getBestModelEpoch) println("Score at best epoch: " + result.getBestModelScore) //Print score vs. epoch val scoreVsEpoch = result.getScoreVsEpoch val list = new util.ArrayList[Integer](scoreVsEpoch.keySet) Collections.sort(list) System.out.println("Score vs. Epoch:") for (i <- list.asScala) { println(i + "\t" + scoreVsEpoch.get(i)) } } }
Example 8
Source File: ZipkinModuleSpec.scala From play-zipkin-tracing with Apache License 2.0 | 5 votes |
package brave.play.module import java.util.Collections import akka.actor.CoordinatedShutdown import brave.Tracing import brave.play.{ZipkinTraceService, ZipkinTraceServiceLike} import org.scalatest.AsyncFlatSpec import play.api.inject.guice.GuiceApplicationBuilder import zipkin2.reporter.Sender import zipkin2.reporter.okhttp3.OkHttpSender class ZipkinModuleSpec extends AsyncFlatSpec { val injector = new GuiceApplicationBuilder() .bindings(new ZipkinModule) .injector() it should "provide an okhttp sender" in { val sender = injector.instanceOf[Sender] assert(sender.isInstanceOf[OkHttpSender]) } it should "eventually close the sender" in { // provisioning the sender so we can tell if it is closed on shutdown val sender = injector.instanceOf[Sender] // stopping the application should close the sender! injector.instanceOf[CoordinatedShutdown].run(CoordinatedShutdown.UnknownReason) map { _ => val thrown = intercept[Exception] { sender.sendSpans(Collections.emptyList[Array[Byte]]).execute() } assert(thrown.getMessage === "closed") } } it should "provide a tracing component" in instanceOfTracing { tracing => assert(Tracing.current() != null) assert(Tracing.current() == tracing) } it should "eventually close the tracing component" in instanceOfTracing { tracing => // stopping the application should close the tracing component! injector.instanceOf[CoordinatedShutdown].run(CoordinatedShutdown.UnknownReason) map { _ => assert(Tracing.current() == null) } } private def instanceOfTracing[A](test: Tracing => A): A = { val tracing = injector.instanceOf[Tracing] try { test(tracing) } finally { // Ensures there is no active Tracing object tracing.close() } } it should "provide a zipkin trace service" in { // TODO: dies due to missing dispatcher val service = injector.instanceOf[ZipkinTraceServiceLike] assert(service.isInstanceOf[ZipkinTraceService]) } }
Example 9
Source File: PrometheusControllerSpec.scala From play-prometheus-filters with MIT License | 5 votes |
package com.github.stijndehaes.playprometheusfilters.controllers import java.util.Collections import io.prometheus.client.Collector.MetricFamilySamples import io.prometheus.client.{Collector, CollectorRegistry} import org.mockito.Mockito._ import org.scalatest.mockito.MockitoSugar import org.scalatestplus.play.PlaySpec import play.api.mvc.Results import play.api.test.FakeRequest import play.api.test.Helpers._ class PrometheusControllerSpec extends PlaySpec with Results with MockitoSugar { "Get metrics method" should { "Return the prometheus metrics" in { val collectorRegistry = mock[CollectorRegistry] val metricsFamilySample = new MetricFamilySamples("test", Collector.Type.COUNTER, "help", Collections.emptyList()) when(collectorRegistry.metricFamilySamples()).thenReturn(new java.util.Vector(Collections.singleton(metricsFamilySample)).elements) val client = new PrometheusController(collectorRegistry, stubControllerComponents()) val request = FakeRequest(GET, "/metrics") val result = client.getMetrics.apply(request) status(result) mustBe OK contentAsString(result) mustBe "# HELP test help\n# TYPE test counter\n" } } }
Example 10
Source File: HadoopVersionSuite.scala From cloud-integration with Apache License 2.0 | 5 votes |
package com.cloudera.spark.cloud.common import java.util import java.util.Collections import scala.collection.JavaConverters._ import CloudSuite._ import org.scalatest.{FunSuite, Matchers} import org.apache.spark.internal.Logging class HadoopVersionSuite extends FunSuite with Logging with Matchers { test("Sysprops") { val props = System.getProperties val list = new util.ArrayList[String](props.stringPropertyNames()) Collections.sort(list) val plist = list.asScala .filter(k => (!k.startsWith("java.") && !k.startsWith("sun."))) .map( key => s"$key = ${props.getProperty(key)}" ) .mkString("\n") logInfo(s"Properties:\n$plist") } test("PropagatedValues") { val mapped = loadConfiguration().asScala .filter{entry => val k = entry.getKey k.startsWith("fs.s3a") && !k.contains("key") } .map( entry => s"${entry.getKey} = ${entry.getValue}").toList.sorted val list = mapped.mkString("\n") logInfo(s"S3A config options:\n${list}") } }
Example 11
Source File: FiltersSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.{Logging, SparkFunSuite} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name){ val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 12
Source File: HiveSourceTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.hive.it import java.util.Collections import java.util.concurrent.TimeUnit import org.scalatest.concurrent.Eventually import org.scalatest.matchers.should.Matchers import org.scalatest.time.{Millis, Span} import org.scalatest.wordspec.AnyWordSpec import scala.io.Source class HiveSourceTest extends AnyWordSpec with Matchers with PersonTestData with Eventually with HiveTests { private implicit val patience: PatienceConfig = PatienceConfig(Span(60000, Millis), Span(5000, Millis)) "Hive" should { "read non partitioned table" in { val count = 2000L val inputTopic = createTopic() val sinkTaskDef = Source.fromInputStream(getClass.getResourceAsStream("/hive_sink_task_no_partitions.json")).getLines().mkString("\n") .replace("{{TOPIC}}", inputTopic) .replace("{{TABLE}}", inputTopic) .replace("{{NAME}}", inputTopic) postTask(sinkTaskDef) val producer = stringStringProducer() writeRecords(producer, inputTopic, JacksonSupport.mapper.writeValueAsString(person), count) producer.close(30, TimeUnit.SECONDS) // we now should have 1000 records in hive which we can test via jdbc eventually { withConn { conn => val stmt = conn.createStatement val rs = stmt.executeQuery(s"select count(*) from $inputTopic") rs.next() rs.getLong(1) shouldBe count } } stopTask(inputTopic) // now we can read them back in val outputTopic = createTopic() val sourceTaskDef = Source.fromInputStream(getClass.getResourceAsStream("/hive_source_task.json")).getLines().mkString("\n") .replace("{{TOPIC}}", outputTopic) .replace("{{TABLE}}", inputTopic) .replace("{{NAME}}", outputTopic) postTask(sourceTaskDef) // we should have 1000 records on the outputTopic var records = 0L val consumer = stringStringConsumer("earliest") consumer.subscribe(Collections.singleton(outputTopic)) eventually { records = records + readRecords(consumer, outputTopic, 2, TimeUnit.SECONDS).size records shouldBe count } stopTask(outputTopic) } } }
Example 13
Source File: ExecutionContextExecutorServiceBridge.scala From odinson with Apache License 2.0 | 5 votes |
package ai.lum.odinson.utils import java.util.Collections import java.util.concurrent.{ AbstractExecutorService, TimeUnit } import scala.concurrent.{ ExecutionContext, ExecutionContextExecutorService } object ExecutionContextExecutorServiceBridge { def apply(ec: ExecutionContext): ExecutionContextExecutorService = ec match { case null => throw null case eces: ExecutionContextExecutorService => eces case other => new AbstractExecutorService with ExecutionContextExecutorService { override def prepare(): ExecutionContext = other override def isShutdown = false override def isTerminated = false override def shutdown() = () override def shutdownNow() = Collections.emptyList[Runnable] override def execute(runnable: Runnable): Unit = other execute runnable override def reportFailure(t: Throwable): Unit = other reportFailure t override def awaitTermination(length: Long, unit: TimeUnit): Boolean = false } } }
Example 14
Source File: FiltersSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") filterTest("SPARK-19912 String literals should be escaped for Hive metastore partition pruning", (a("stringcol", StringType) === Literal("p1\" and q=\"q1")) :: (Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil, """stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail(s"Expected ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } } test("turn on/off ADVANCED_PARTITION_PREDICATE_PUSHDOWN") { import org.apache.spark.sql.catalyst.dsl.expressions._ Seq(true, false).foreach { enabled => withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> enabled.toString) { val filters = (Literal(1) === a("intcol", IntegerType) || Literal(2) === a("intcol", IntegerType)) :: Nil val converted = shim.convertFilters(testTable, filters) if (enabled) { assert(converted == "(1 = intcol or 2 = intcol)") } else { assert(converted.isEmpty) } } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 15
Source File: FlumeTestUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.flume import java.net.{InetSocketAddress, ServerSocket} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.util.{List => JList} import java.util.Collections import scala.collection.JavaConverters._ import org.apache.avro.ipc.NettyTransceiver import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.commons.lang3.RandomUtils import org.apache.flume.source.avro import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol} import org.jboss.netty.channel.ChannelPipeline import org.jboss.netty.channel.socket.SocketChannel import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory import org.jboss.netty.handler.codec.compression.{ZlibDecoder, ZlibEncoder} import org.apache.spark.SparkConf import org.apache.spark.util.Utils private class CompressionChannelFactory(compressionLevel: Int) extends NioClientSocketChannelFactory { override def newChannel(pipeline: ChannelPipeline): SocketChannel = { val encoder = new ZlibEncoder(compressionLevel) pipeline.addFirst("deflater", encoder) pipeline.addFirst("inflater", new ZlibDecoder()) super.newChannel(pipeline) } } }
Example 16
Source File: Decoding.scala From avro4s with Apache License 2.0 | 5 votes |
package benchmarks import java.io.ByteArrayOutputStream import java.nio.ByteBuffer import java.util.Collections import benchmarks.record._ import com.sksamuel.avro4s._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.util.ByteBufferInputStream import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole object Decoding extends BenchmarkHelpers { @State(Scope.Thread) class Setup { val avroBytes = { import benchmarks.record.generated.AttributeValue._ import benchmarks.record.generated._ new RecordWithUnionAndTypeField(new ValidInt(255, t)).toByteBuffer } val avro4sBytes = encode(RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t))) val (handrolledDecoder, handrolledReader) = { import benchmarks.handrolled_codecs._ implicit val codec: Codec[AttributeValue[Int]] = AttributeValueCodec[Int] implicit val schemaFor: SchemaFor[AttributeValue[Int]] = SchemaFor[AttributeValue[Int]](codec.schema) val recordSchemaFor = SchemaFor[RecordWithUnionAndTypeField] val decoder = Decoder[RecordWithUnionAndTypeField].withSchema(recordSchemaFor) val reader = new GenericDatumReader[GenericRecord](recordSchemaFor.schema) (decoder, reader) } val (avro4sDecoder, avro4sReader) = { val decoder = Decoder[RecordWithUnionAndTypeField] val reader = new GenericDatumReader[GenericRecord](decoder.schema) (decoder, reader) } } def encode[T: Encoder: SchemaFor](value: T): ByteBuffer = { val outputStream = new ByteArrayOutputStream(512) val encoder = Encoder[T] val schema = AvroSchema[T] val record = encoder.encode(value).asInstanceOf[GenericRecord] val writer = new GenericDatumWriter[GenericRecord](schema) val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null) writer.write(record, enc) ByteBuffer.wrap(outputStream.toByteArray) } } class Decoding extends CommonParams with BenchmarkHelpers { import Decoding._ def decode[T](bytes: ByteBuffer, decoder: Decoder[T], reader: GenericDatumReader[GenericRecord]): T = { val dec = DecoderFactory.get().binaryDecoder(new ByteBufferInputStream(Collections.singletonList(bytes.duplicate)), null) val record = reader.read(null, dec) decoder.decode(record) } @Benchmark def avroSpecificRecord(setup: Setup, blackhole: Blackhole) = { import benchmarks.record.generated._ blackhole.consume(RecordWithUnionAndTypeField.fromByteBuffer(setup.avroBytes.duplicate)) } @Benchmark def avro4sHandrolled(setup: Setup, blackhole: Blackhole) = blackhole.consume(decode(setup.avro4sBytes, setup.handrolledDecoder, setup.handrolledReader)) @Benchmark def avro4sGenerated(setup: Setup, blackhole: Blackhole) = blackhole.consume(decode(setup.avro4sBytes, setup.avro4sDecoder, setup.avro4sReader)) }
Example 17
Source File: ElasticSearchResultHandlerSpec.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.indexer.unit import java.util import java.util.Collections import com.codahale.metrics.Timer import com.expedia.www.haystack.commons.metrics.MetricsSupport import com.expedia.www.haystack.commons.retries.RetryOperation import com.expedia.www.haystack.trace.indexer.metrics.AppMetricNames import com.expedia.www.haystack.trace.indexer.writers.es.ElasticSearchResultHandler import com.google.gson.Gson import io.searchbox.core.BulkResult import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException import org.scalatest.easymock.EasyMockSugar import org.scalatest.{FunSpec, Matchers} class ElasticSearchResultHandlerSpec extends FunSpec with Matchers with EasyMockSugar with MetricsSupport { private val esWriteFailureMeter = metricRegistry.meter(AppMetricNames.ES_WRITE_FAILURE) describe("Trace Index Result Handler") { it("should complete with success if no failures reported") { val retryCallback = mock[RetryOperation.Callback] val timer = mock[Timer.Context] val bulkResult = mock[BulkResult] expecting { retryCallback.onResult(bulkResult) timer.close() bulkResult.getFailedItems.andReturn(Collections.emptyList()).anyTimes() } whenExecuting(retryCallback, timer, bulkResult) { val handler = new ElasticSearchResultHandler(timer, esWriteFailureMeter, retryCallback) handler.completed(bulkResult) esWriteFailureMeter.getCount shouldBe 0 } } it("should complete with success but mark the failures if happen") { val retryCallback = mock[RetryOperation.Callback] val timer = mock[Timer.Context] val bulkResult = mock[BulkResult] val outer = new BulkResult(new Gson()) val resultItem = new outer.BulkResultItem("op", "index", "type", "1", 400, "error", 1, "errorType", "errorReason") expecting { retryCallback.onResult(bulkResult) timer.close() bulkResult.getFailedItems.andReturn(util.Arrays.asList(resultItem)).anyTimes() } whenExecuting(retryCallback, timer, bulkResult) { val handler = new ElasticSearchResultHandler(timer, esWriteFailureMeter, retryCallback) val initialFailures = esWriteFailureMeter.getCount handler.completed(bulkResult) esWriteFailureMeter.getCount - initialFailures shouldBe 1 } } it("should report failure and mark the number of failures, and perform retry on any exception") { val retryCallback = mock[RetryOperation.Callback] val timer = mock[Timer.Context] val bulkResult = mock[BulkResult] val error = new RuntimeException expecting { retryCallback.onError(error, retry = true) timer.close() } whenExecuting(retryCallback, timer, bulkResult) { val handler = new ElasticSearchResultHandler(timer, esWriteFailureMeter, retryCallback) val initialFailures = esWriteFailureMeter.getCount handler.failed(error) esWriteFailureMeter.getCount - initialFailures shouldBe 1 } } it("should report failure and mark the number of failures and perform function on elastic search specific exception") { val retryCallback = mock[RetryOperation.Callback] val timer = mock[Timer.Context] val bulkResult = mock[BulkResult] val error = new EsRejectedExecutionException("too many requests") expecting { retryCallback.onError(error, retry = true) timer.close() } whenExecuting(retryCallback, timer, bulkResult) { val handler = new ElasticSearchResultHandler(timer, esWriteFailureMeter, retryCallback) val initialFailures = esWriteFailureMeter.getCount handler.failed(error) esWriteFailureMeter.getCount - initialFailures shouldBe 1 } } } }
Example 18
Source File: AwsNodeDiscoverer.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.storage.backends.cassandra.client import java.util.Collections import com.amazonaws.regions.{Region, Regions} import com.amazonaws.services.ec2.AmazonEC2Client import com.amazonaws.services.ec2.model.{DescribeInstancesRequest, Filter, Instance, InstanceStateName} import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ object AwsNodeDiscoverer { private val LOGGER = LoggerFactory.getLogger(AwsNodeDiscoverer.getClass) private[haystack] def discover(client: AmazonEC2Client, tags: Map[String, String]): Seq[String] = { val filters = tags.map { case (key, value) => new Filter("tag:" + key, Collections.singletonList(value)) } val request = new DescribeInstancesRequest().withFilters(filters.asJavaCollection) val result = client.describeInstances(request) val nodes = result.getReservations .asScala .flatMap(_.getInstances.asScala) .filter(isValidInstance) .map(_.getPrivateIpAddress) LOGGER.info("EC2 nodes discovered [{}]", nodes.mkString(",")) nodes } // check if an ec2 instance is in running state private def isValidInstance(instance: Instance): Boolean = { // instance should be in running state InstanceStateName.Running.toString.equals(instance.getState.getName) } }
Example 19
Source File: MockHelpers.scala From guardrail with MIT License | 5 votes |
package helpers import com.fasterxml.jackson.databind.ObjectMapper import io.netty.handler.codec.http.EmptyHttpHeaders import java.io.ByteArrayInputStream import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.util.Collections import java.util.concurrent.CompletableFuture import javax.ws.rs.container.AsyncResponse import org.asynchttpclient.Response import org.asynchttpclient.uri.Uri import org.mockito.{ ArgumentMatchersSugar, MockitoSugar } import org.scalatest.Assertions import scala.reflect.ClassTag object MockHelpers extends Assertions with MockitoSugar with ArgumentMatchersSugar { def mockAsyncResponse[T](future: CompletableFuture[T])(implicit cls: ClassTag[T]): AsyncResponse = { val asyncResponse = mock[AsyncResponse] when(asyncResponse.resume(any[T])) thenAnswer [AnyRef] { response => response match { case t: Throwable => future.completeExceptionally(t) case other: T => future.complete(other) case other => fail(s"AsyncResponse.resume expected an object of type ${cls.runtimeClass.getName}, but got ${other.getClass.getName} instead") } } asyncResponse } def mockAHCResponse[T](uri: String, status: Int, maybeBody: Option[T] = None)(implicit mapper: ObjectMapper): Response = { val response = mock[Response] when(response.getUri) thenReturn Uri.create(uri) when(response.hasResponseStatus) thenReturn true when(response.getStatusCode) thenReturn status when(response.getStatusText) thenReturn "Some Status" when(response.hasResponseHeaders) thenReturn true when(response.getHeaders) thenReturn EmptyHttpHeaders.INSTANCE when(response.getHeader(any)) thenReturn null when(response.getHeaders(any)) thenReturn Collections.emptyList() maybeBody match { case None => when(response.hasResponseBody) thenReturn true case Some(body) => val responseBytes = mapper.writeValueAsBytes(body) val responseStr = new String(responseBytes, StandardCharsets.UTF_8) when(response.hasResponseBody) thenReturn true when(response.getResponseBody(any)) thenReturn responseStr when(response.getResponseBody) thenReturn responseStr when(response.getResponseBodyAsStream) thenReturn new ByteArrayInputStream(responseBytes) when(response.getResponseBodyAsByteBuffer) thenReturn ByteBuffer.wrap(responseBytes) when(response.getResponseBodyAsBytes) thenReturn responseBytes } response } }
Example 20
Source File: KsqlConnectionSpec.scala From ksql-jdbc-driver with Apache License 2.0 | 5 votes |
package com.github.mmolimar.ksql.jdbc import java.sql.{Connection, SQLException, SQLFeatureNotSupportedException} import java.util.{Collections, Properties} import com.github.mmolimar.ksql.jdbc.utils.TestUtils._ import io.confluent.ksql.rest.client.{KsqlRestClient, MockableKsqlRestClient, RestResponse} import io.confluent.ksql.rest.entity._ import org.eclipse.jetty.http.HttpStatus.Code import org.scalamock.scalatest.MockFactory import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class KsqlConnectionSpec extends AnyWordSpec with Matchers with MockFactory { "A KsqlConnection" when { "validating specs" should { val values = KsqlConnectionValues("localhost", 8080, None, None, Map.empty[String, String]) val mockKsqlRestClient = mock[MockableKsqlRestClient] val ksqlConnection = new KsqlConnection(values, new Properties) { override def init: KsqlRestClient = mockKsqlRestClient } "throw not supported exception if not supported" in { val methods = implementedMethods[KsqlConnection] reflectMethods[KsqlConnection](methods = methods, implemented = false, obj = ksqlConnection) .foreach(method => { assertThrows[SQLFeatureNotSupportedException] { method() } }) } "work if implemented" in { assertThrows[SQLException] { ksqlConnection.isClosed } ksqlConnection.getTransactionIsolation should be(Connection.TRANSACTION_NONE) ksqlConnection.setClientInfo(new Properties) (mockKsqlRestClient.makeKsqlRequest(_: String)).expects(*) .returns(RestResponse.successful[KsqlEntityList](Code.OK, new KsqlEntityList)) ksqlConnection.setClientInfo("", "") assertThrows[SQLException] { (mockKsqlRestClient.makeKsqlRequest(_: String)).expects(*) .returns(RestResponse.erroneous(Code.INTERNAL_SERVER_ERROR, new KsqlErrorMessage(-1, "", Collections.emptyList[String]))) ksqlConnection.setClientInfo("", "") } ksqlConnection.isReadOnly should be(false) (mockKsqlRestClient.makeStatusRequest _: () => RestResponse[CommandStatuses]).expects .returns(RestResponse.successful[CommandStatuses] (Code.OK, new CommandStatuses(Collections.emptyMap[CommandId, CommandStatus.Status]))) ksqlConnection.isValid(0) should be(true) Option(ksqlConnection.getMetaData) should not be None Option(ksqlConnection.createStatement) should not be None assertThrows[SQLFeatureNotSupportedException] { ksqlConnection.createStatement(-1, -1) } ksqlConnection.setAutoCommit(true) ksqlConnection.setAutoCommit(false) ksqlConnection.getAutoCommit should be(false) ksqlConnection.getSchema should be(None.orNull) ksqlConnection.getWarnings should be(None.orNull) ksqlConnection.getCatalog should be(None.orNull) ksqlConnection.setCatalog("test") ksqlConnection.getCatalog should be(None.orNull) (mockKsqlRestClient.close _).expects ksqlConnection.close() ksqlConnection.isClosed should be(true) ksqlConnection.commit() } } } "A ConnectionNotSupported" when { "validating specs" should { "throw not supported exception if not supported" in { val resultSet = new ConnectionNotSupported reflectMethods[ConnectionNotSupported](methods = Seq.empty, implemented = false, obj = resultSet) .foreach(method => { assertThrows[SQLFeatureNotSupportedException] { method() } }) } } } }
Example 21
Source File: FiltersSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 22
Source File: FlumeTestUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.flume import java.net.{InetSocketAddress, ServerSocket} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.util.{List => JList} import java.util.Collections import scala.collection.JavaConverters._ import org.apache.avro.ipc.NettyTransceiver import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.commons.lang3.RandomUtils import org.apache.flume.source.avro import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol} import org.jboss.netty.channel.ChannelPipeline import org.jboss.netty.channel.socket.SocketChannel import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory import org.jboss.netty.handler.codec.compression.{ZlibDecoder, ZlibEncoder} import org.apache.spark.util.Utils import org.apache.spark.SparkConf private class CompressionChannelFactory(compressionLevel: Int) extends NioClientSocketChannelFactory { override def newChannel(pipeline: ChannelPipeline): SocketChannel = { val encoder = new ZlibEncoder(compressionLevel) pipeline.addFirst("deflater", encoder) pipeline.addFirst("inflater", new ZlibDecoder()) super.newChannel(pipeline) } } }
Example 23
Source File: PluginXmlDetector.scala From sbt-idea-plugin with Apache License 2.0 | 5 votes |
package org.jetbrains.sbtidea.download import java.net.URI import java.nio.file.{FileSystems, Files, Path} import java.util.Collections import java.util.function.Predicate private class PluginXmlDetector extends Predicate[Path] { import org.jetbrains.sbtidea.packaging.artifact._ private val MAP = Collections.emptyMap[String, Any]() var result: String = _ override def test(t: Path): Boolean = { if (!t.toString.endsWith(".jar")) return false val uri = URI.create(s"jar:${t.toUri}") try { using(FileSystems.newFileSystem(uri, MAP)) { fs => val maybePluginXml = fs.getPath("META-INF", "plugin.xml") if (Files.exists(maybePluginXml)) { result = new String(Files.readAllBytes(maybePluginXml)) true } else { false } } } catch { case e: java.util.zip.ZipError => throw new RuntimeException(s"Corrupt zip file: $t", e) } } }
Example 24
Source File: TestUtilsBase.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect import java.util import java.util.Collections import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.apache.kafka.connect.source.SourceTaskContext import org.apache.kafka.connect.storage.OffsetStorageReader import org.mockito.Mockito._ import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfter import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ //set up partition val partition: util.Map[String, String] = Collections.singletonMap(lookupPartitionKey, table) //as a list to search for val partitionList: util.List[util.Map[String, String]] = List(partition).asJava //set up the offset val offset: util.Map[String, Object] = (Collections.singletonMap(offsetColumn,offsetValue )) //create offsets to initialize from val offsets :util.Map[util.Map[String, String],util.Map[String, Object]] = Map(partition -> offset).asJava //mock out reader and task context val taskContext = mock[SourceTaskContext] val reader = mock[OffsetStorageReader] when(reader.offsets(partitionList)).thenReturn(offsets) when(taskContext.offsetStorageReader()).thenReturn(reader) taskContext } }
Example 25
Source File: JsonConverterWithSchemaEvolutionTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import com.sksamuel.avro4s.{RecordFormat, SchemaFor} import io.confluent.connect.avro.AvroData import org.apache.avro.Schema import org.apache.kafka.connect.data.Struct import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class JsonConverterWithSchemaEvolutionTest extends AnyWordSpec with Matchers { val topic = "the_real_topic" val sourceTopic = "source_topic" val avroData = new AvroData(4) "JsonConverter" should { "throw IllegalArgumentException if payload is null" in { intercept[IllegalArgumentException] { val converter = new JsonConverterWithSchemaEvolution val record = converter.convert("topic", "somesource", "1000", null) } } "handle a simple json" in { val json = JacksonJson.toJson(Car("LaFerrari", "Ferrari", 2015, 963, 0.0001)) val converter = new JsonConverterWithSchemaEvolution val record = converter.convert(topic, sourceTopic, "100", json.getBytes) record.keySchema() shouldBe MsgKey.schema record.key().asInstanceOf[Struct].getString("topic") shouldBe sourceTopic record.key().asInstanceOf[Struct].getString("id") shouldBe "100" val schema = new Schema.Parser().parse( SchemaFor[CarOptional]().toString .replace("\"name\":\"CarOptional\"", s"""\"name\":\"$sourceTopic\"""") .replace(s""",\"namespace\":\"${getClass.getCanonicalName.dropRight(getClass.getSimpleName.length+1)}\"""", "") ) val format = RecordFormat[CarOptional] val carOptional = format.to(CarOptional(Option("LaFerrari"), Option("Ferrari"), Option(2015), Option(963), Option(0.0001))) record.valueSchema() shouldBe avroData.toConnectSchema(schema) record.value() shouldBe avroData.toConnectData(schema, carOptional).value() record.sourcePartition() shouldBe null record.sourceOffset() shouldBe Collections.singletonMap(JsonConverterWithSchemaEvolution.ConfigKey, avroData.fromConnectSchema(avroData.toConnectSchema(schema)).toString()) } } } case class Car(name: String, manufacturer: String, model: Long, bhp: Long, price: Double) case class CarOptional(name: Option[String], manufacturer: Option[String], model: Option[Long], bhp: Option[Long], price: Option[Double])
Example 26
Source File: JsonSimpleConverterTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import com.sksamuel.avro4s.{RecordFormat, SchemaFor} import io.confluent.connect.avro.AvroData import org.apache.avro.Schema import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class JsonSimpleConverterTest extends AnyWordSpec with Matchers { val topic = "the_real_topic" val sourceTopic = "source_topic" val avroData = new AvroData(4) "JsonSimpleConverter" should { "convert from json to the struct" in { val car = Car("LaFerrari", "Ferrari", 2015, 963, 0.0001) val json = JacksonJson.toJson(car) val converter = new JsonSimpleConverter val record = converter.convert(topic, sourceTopic, "100", json.getBytes) record.keySchema() shouldBe MsgKey.schema record.key() shouldBe MsgKey.getStruct(sourceTopic, "100") val schema = new Schema.Parser().parse( SchemaFor[Car]().toString .replace("\"name\":\"Car\"", s"""\"name\":\"$sourceTopic\"""") .replace(s"""\"namespace\":\"${getClass.getCanonicalName.dropRight(getClass.getSimpleName.length+1)}\",""", "") ) val format = RecordFormat[Car] val avro = format.to(car) record.valueSchema() shouldBe avroData.toConnectSchema(schema) record.value() shouldBe avroData.toConnectData(schema, avro).value() record.sourcePartition() shouldBe Collections.singletonMap(Converter.TopicKey, sourceTopic) record.sourceOffset() shouldBe null } } }
Example 27
Source File: JsonPassThroughConverterTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class JsonPassThroughConverterTest extends AnyWordSpec with Matchers { val topic = "the_real_topic" val sourceTopic = "source_topic" "JsonPassThroughConverter" should { "pass single message with no key through as json" in { val car = Car("LaFerrari", "Ferrari", 2015, 963, 0.0001) val json = JacksonJson.toJson(car) val converter = new JsonPassThroughConverter val record = converter.convert(topic, sourceTopic, "100", json.getBytes) record.keySchema() shouldBe null record.key() shouldBe "source_topic.100" record.valueSchema() shouldBe null record.value() shouldBe json record.sourcePartition() shouldBe Collections.singletonMap(Converter.TopicKey, sourceTopic) record.sourceOffset() shouldBe null } "pass single message with key through as json" in { val car = Car("LaFerrari", "Ferrari", 2015, 963, 0.0001) val json = JacksonJson.toJson(car) val converter = new JsonPassThroughConverter val keys = List("name", "manufacturer") val record = converter.convert(topic, sourceTopic, "100", json.getBytes, keys) record.keySchema() shouldBe null record.key() shouldBe "LaFerrari.Ferrari" record.valueSchema() shouldBe null record.value() shouldBe json record.sourcePartition() shouldBe Collections.singletonMap(Converter.TopicKey, sourceTopic) record.sourceOffset() shouldBe null } } }
Example 28
Source File: BytesConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord class BytesConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys: Seq[String] = Seq.empty, keyDelimiter: String = "."): SourceRecord = { new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), Schema.BYTES_SCHEMA, bytes) } }
Example 29
Source File: JsonOptNullConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.nio.charset.Charset import java.util import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data._ import org.apache.kafka.connect.source.SourceRecord class JsonOptNullConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys:Seq[String] = Seq.empty, keyDelimiter:String = "."): SourceRecord = { require(bytes != null, s"Invalid $bytes parameter") val json = new String(bytes, Charset.defaultCharset) val schemaAndValue = JsonOptNullConverter.convert(sourceTopic, json) val value = schemaAndValue.value() value match { case s:Struct if keys.nonEmpty => val keysValue = keys.flatMap { key => Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, Schema.STRING_SCHEMA, keysValue, schemaAndValue.schema(), schemaAndValue.value()) case _=> new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), schemaAndValue.schema(), schemaAndValue.value()) } } } object JsonOptNullConverter { import org.json4s._ import org.json4s.native.JsonMethods._ def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str)) def convert(name: String, value: JValue): SchemaAndValue = { value match { case JArray(arr) => val values = new util.ArrayList[AnyRef]() val sv = convert(name, arr.head) values.add(sv.value()) arr.tail.foreach { v => values.add(convert(name, v).value()) } val schema = SchemaBuilder.array(sv.schema()).optional().build() new SchemaAndValue(schema, values) case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b) case JDecimal(d) => val schema = Decimal.builder(d.scale).optional().build() new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal)) case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d) case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries) case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l) case JNull | JNothing => new SchemaAndValue(Schema.OPTIONAL_STRING_SCHEMA, null) case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s) case JObject(values) => val builder = SchemaBuilder.struct().name(name.replace("/", "_")) val fields = values.map { case (n, v) => val schemaAndValue = convert(n, v) builder.field(n, schemaAndValue.schema()) n -> schemaAndValue.value() }.toMap val schema = builder.build() val struct = new Struct(schema) fields.foreach { case (field, v) => struct.put(field, v) } new SchemaAndValue(schema, struct) } } }
Example 30
Source File: JsonSimpleConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.nio.charset.Charset import java.util import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data._ import org.apache.kafka.connect.source.SourceRecord class JsonSimpleConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys:Seq[String] = Seq.empty, keyDelimiter:String = "."): SourceRecord = { require(bytes != null, s"Invalid $bytes parameter") val json = new String(bytes, Charset.defaultCharset) val schemaAndValue = JsonSimpleConverter.convert(sourceTopic, json) val value = schemaAndValue.value() value match { case s:Struct if keys.nonEmpty => val keysValue = keys.flatMap { key => Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, Schema.STRING_SCHEMA, keysValue, schemaAndValue.schema(), schemaAndValue.value()) case _=> new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), schemaAndValue.schema(), schemaAndValue.value()) } } } object JsonSimpleConverter { import org.json4s._ import org.json4s.native.JsonMethods._ def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str)) def convert(name: String, value: JValue): SchemaAndValue = { value match { case JArray(arr) => val values = new util.ArrayList[AnyRef]() val sv = convert(name, arr.head) values.add(sv.value()) arr.tail.foreach { v => values.add(convert(name, v).value()) } val schema = SchemaBuilder.array(sv.schema()).optional().build() new SchemaAndValue(schema, values) case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b) case JDecimal(d) => val schema = Decimal.builder(d.scale).optional().build() new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal)) case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d) case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries) case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l) case JNull | JNothing => new SchemaAndValue(Schema.STRING_SCHEMA, null) case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s) case JObject(values) => val builder = SchemaBuilder.struct().name(name.replace("/", "_")) val fields = values.map { case (n, v) => val schemaAndValue = convert(n, v) builder.field(n, schemaAndValue.schema()) n -> schemaAndValue.value() }.toMap val schema = builder.build() val struct = new Struct(schema) fields.foreach { case (field, v) => struct.put(field, v) } new SchemaAndValue(schema, struct) } } }
Example 31
Source File: AvroConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.io.File import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import io.confluent.connect.avro.AvroData import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import org.apache.avro.{Schema => AvroSchema} import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.source.SourceRecord import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException class AvroConverter extends Converter { private val avroData = new AvroData(8) private var sourceToSchemaMap: Map[String, AvroSchema] = Map.empty private var avroReadersMap: Map[String, GenericDatumReader[GenericRecord]] = Map.empty override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys: Seq[String] = Seq.empty, keyDelimiter: String = "."): SourceRecord = { Option(bytes) match { case None => new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, avroData.toConnectSchema(sourceToSchemaMap(sourceTopic)), null) case Some(_) => val reader = avroReadersMap.getOrElse(sourceTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $sourceTopic")) val decoder = DecoderFactory.get().binaryDecoder(bytes, null) val record = reader.read(null, decoder) val schemaAndValue = avroData.toConnectData(sourceToSchemaMap(sourceTopic.toLowerCase), record) val value = schemaAndValue.value() value match { case s: Struct if keys.nonEmpty => val keysValue = keys.flatMap { key => Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) new SourceRecord( Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, Schema.STRING_SCHEMA, keysValue, schemaAndValue.schema(), schemaAndValue.value()) case _ => new SourceRecord( Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), schemaAndValue.schema(), schemaAndValue.value()) } } } override def initialize(config: Map[String, String]): Unit = { sourceToSchemaMap = AvroConverter.getSchemas(config) avroReadersMap = sourceToSchemaMap.map { case (key, schema) => key -> new GenericDatumReader[GenericRecord](schema) } } } object AvroConverter { val SCHEMA_CONFIG = "connect.source.converter.avro.schemas" def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = { config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided")) .toString .split(';') .filter(_.trim.nonEmpty) .map(_.split("=")) .map { case Array(source, path) => val file = new File(path) if (!file.exists()) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!") } val s = source.trim.toLowerCase() if (s.isEmpty) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path") } s -> new AvroSchema.Parser().parse(file) case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Source->AVRO_FILE") }.toMap } }
Example 32
Source File: JsonPassThroughConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.landoop.json.sql.JacksonJson import org.apache.kafka.connect.source.SourceRecord class JsonPassThroughConverter extends Converter { override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys: Seq[String] = Seq.empty, keyDelimiter: String = "."): SourceRecord = { require(bytes != null, s"Invalid $bytes parameter") val json = new String(bytes, "utf-8") val jsonNode = JacksonJson.asJson(json) var keysValue = keys.flatMap { key => Option(KeyExtractor.extract(jsonNode, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) // If keys are not provided, default one will be constructed if (keysValue == "") { keysValue = s"$sourceTopic$keyDelimiter$messageId" } new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, null, keysValue, null, json) } }
Example 33
Source File: OffsetHandler.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.offsets import java.util import java.util.Collections import org.apache.kafka.connect.source.SourceTaskContext import scala.collection.JavaConverters._ def recoverOffset[T](offsets: util.Map[util.Map[String, String],util.Map[String, Object]], lookupPartitionKey: String, partitionKeyValue: String, lookupOffsetCol: String ) : Option[T] = { val partition = Collections.singletonMap(lookupPartitionKey, partitionKeyValue) val offset = offsets.get(partition) if (offset != null && offset.get(lookupOffsetCol) != null) { Some(offset.get(lookupOffsetCol).asInstanceOf[T]) } else { None } } }
Example 34
Source File: Utils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster.mesos import java.util.Collections import scala.collection.JavaConverters._ import org.apache.mesos.Protos._ import org.apache.mesos.Protos.Value.{Range => MesosRange, Ranges, Scalar} import org.apache.mesos.SchedulerDriver import org.mockito.{ArgumentCaptor, Matchers} import org.mockito.Mockito._ object Utils { def createOffer( offerId: String, slaveId: String, mem: Int, cpus: Int, ports: Option[(Long, Long)] = None, gpus: Int = 0): Offer = { val builder = Offer.newBuilder() builder.addResourcesBuilder() .setName("mem") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(mem)) builder.addResourcesBuilder() .setName("cpus") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(cpus)) ports.foreach { resourcePorts => builder.addResourcesBuilder() .setName("ports") .setType(Value.Type.RANGES) .setRanges(Ranges.newBuilder().addRange(MesosRange.newBuilder() .setBegin(resourcePorts._1).setEnd(resourcePorts._2).build())) } if (gpus > 0) { builder.addResourcesBuilder() .setName("gpus") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(gpus)) } builder.setId(createOfferId(offerId)) .setFrameworkId(FrameworkID.newBuilder() .setValue("f1")) .setSlaveId(SlaveID.newBuilder().setValue(slaveId)) .setHostname(s"host${slaveId}") .build() } def verifyTaskLaunched(driver: SchedulerDriver, offerId: String): List[TaskInfo] = { val captor = ArgumentCaptor.forClass(classOf[java.util.Collection[TaskInfo]]) verify(driver, times(1)).launchTasks( Matchers.eq(Collections.singleton(createOfferId(offerId))), captor.capture()) captor.getValue.asScala.toList } def createOfferId(offerId: String): OfferID = { OfferID.newBuilder().setValue(offerId).build() } def createSlaveId(slaveId: String): SlaveID = { SlaveID.newBuilder().setValue(slaveId).build() } def createExecutorId(executorId: String): ExecutorID = { ExecutorID.newBuilder().setValue(executorId).build() } def createTaskId(taskId: String): TaskID = { TaskID.newBuilder().setValue(taskId).build() } }
Example 35
Source File: cache.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.util.{Collections, WeakHashMap} import java.lang.ref.WeakReference class Cache(initial_size: Int = 131071, max_string: Int = 100) { private val table = Collections.synchronizedMap(new WeakHashMap[Any, WeakReference[Any]](initial_size)) def size: Int = table.size override def toString: String = "Cache(" + size + ")" protected def lookup[A](x: A): Option[A] = { val ref = table.get(x) if (ref == null) None else { val y = ref.asInstanceOf[WeakReference[A]].get if (y == null) None else Some(y) } } protected def store[A](x: A): A = { table.put(x, new WeakReference[Any](x)) x } protected def cache_int(x: Int): Int = lookup(x) getOrElse store(x) protected def cache_string(x: String): String = { if (x == "") "" else if (x == "true") "true" else if (x == "false") "false" else if (x == "0.0") "0.0" else if (Library.is_small_int(x)) Library.signed_string_of_int(Integer.parseInt(x)) else { lookup(x) match { case Some(y) => y case None => val z = Library.isolate_substring(x) if (z.length > max_string) z else store(z) } } } // main methods def int(x: Int): Int = synchronized { cache_int(x) } def string(x: String): String = synchronized { cache_string(x) } }
Example 36
Source File: cache.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.util.{Collections, WeakHashMap} import java.lang.ref.WeakReference class Cache(initial_size: Int = 131071, max_string: Int = 100) { private val table = Collections.synchronizedMap(new WeakHashMap[Any, WeakReference[Any]](initial_size)) def size: Int = table.size override def toString: String = "Cache(" + size + ")" protected def lookup[A](x: A): Option[A] = { val ref = table.get(x) if (ref == null) None else { val y = ref.asInstanceOf[WeakReference[A]].get if (y == null) None else Some(y) } } protected def store[A](x: A): A = { table.put(x, new WeakReference[Any](x)) x } protected def cache_int(x: Int): Int = lookup(x) getOrElse store(x) protected def cache_string(x: String): String = { if (x == "") "" else if (x == "true") "true" else if (x == "false") "false" else if (x == "0.0") "0.0" else if (Library.is_small_int(x)) Library.signed_string_of_int(Integer.parseInt(x)) else { lookup(x) match { case Some(y) => y case None => val z = Library.isolate_substring(x) if (z.length > max_string) z else store(z) } } } // main methods def int(x: Int): Int = synchronized { cache_int(x) } def string(x: String): String = synchronized { cache_string(x) } }
Example 37
Source File: ItUtils.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.testing.util import java.util.{Collections, UUID} import com.google.api.client.http.HttpRequestInitializer import com.google.api.services.cloudresourcemanager.CloudResourceManager import com.google.auth.Credentials import com.google.auth.http.HttpCredentialsAdapter import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer import org.apache.beam.sdk.extensions.gcp.auth.NullCredentialInitializer import org.apache.beam.sdk.extensions.gcp.options.GcpOptions.DefaultProjectFactory import org.apache.beam.sdk.extensions.gcp.options._ import org.apache.beam.sdk.options.PipelineOptionsFactory import org.apache.beam.sdk.extensions.gcp.util.{RetryHttpRequestInitializer, Transport} def gcpTempLocation(prefix: String): String = { val opts = PipelineOptionsFactory.as(classOf[GcpOptions]) opts.setProject(project) val bucket = DefaultBucket.tryCreateDefaultBucket( opts, newCloudResourceManagerClient(opts.as(classOf[CloudResourceManagerOptions])) ) val uuid = UUID.randomUUID().toString s"$bucket/$prefix-$uuid" } private def newCloudResourceManagerClient( options: CloudResourceManagerOptions ): CloudResourceManager = { val credentials = options.getGcpCredential if (credentials == null) { NullCredentialInitializer.throwNullCredentialException() } new CloudResourceManager.Builder( Transport.getTransport, Transport.getJsonFactory, chainHttpRequestInitializer( credentials, // Do not log 404. It clutters the output and is possibly even required by the caller. new RetryHttpRequestInitializer(Collections.singletonList(404)) ) ).setApplicationName(options.getAppName) .setGoogleClientRequestInitializer(options.getGoogleApiTrace) .build() } private def chainHttpRequestInitializer( credential: Credentials, httpRequestInitializer: HttpRequestInitializer ): HttpRequestInitializer = if (credential == null) { new ChainingHttpRequestInitializer(new NullCredentialInitializer(), httpRequestInitializer) } else { new ChainingHttpRequestInitializer( new HttpCredentialsAdapter(credential), httpRequestInitializer ) } }
Example 38
Source File: LocalMemoryDataBlock.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.data import java.io.IOException import java.util import java.util.Collections import com.tencent.angel.ml.math2.utils.{DataBlock, LabeledData} import org.apache.commons.logging.{Log, LogFactory} import org.ehcache.sizeof.SizeOf class LocalMemoryDataBlock(initSize: Int, maxUseMemroy: Long) extends DataBlock[LabeledData] { private val LOG: Log = LogFactory.getLog(classOf[LocalMemoryDataBlock]) private var estimateSampleNumber: Int = 100 val initCapacity = if (initSize > 0) { estimateSampleNumber = initSize initSize } else { estimateSampleNumber } private val vList = new util.ArrayList[LabeledData]() private var isFull: Boolean = false @throws[IOException] override def read(): LabeledData = { if (readIndex < writeIndex) { val value = vList.get(readIndex) readIndex += 1 value } else { null.asInstanceOf[LabeledData] } } @throws[IOException] override protected def hasNext: Boolean = readIndex < writeIndex @throws[IOException] override def get(index: Int): LabeledData = { if (index < 0 || index >= writeIndex) { throw new IOException("index not in range[0," + writeIndex + ")") } vList.get(index) } @throws[IOException] override def put(value: LabeledData): Unit = { if (writeIndex < estimateSampleNumber) { vList.add(value) writeIndex += 1 if (writeIndex == estimateSampleNumber && !isFull) { estimateAndResizeVList() } } else { LOG.info("Over maxUseMemroy, No value added!") } } override def resetReadIndex(): Unit = { readIndex = 0 } override def clean(): Unit = { readIndex = 0 writeIndex = 0 vList.clear() } override def shuffle(): Unit = Collections.shuffle(vList) override def flush(): Unit = {} override def slice(startIndex: Int, length: Int): DataBlock[LabeledData] = ??? private def estimateAndResizeVList(): Unit = { val avgDataItemSize = (SizeOf.newInstance().deepSizeOf(vList) + vList.size - 1) / vList.size val maxStoreNum = (maxUseMemroy / avgDataItemSize).toInt val capacity = if (maxStoreNum < 2 * vList.size) { isFull = true maxStoreNum } else { 2 * vList.size } estimateSampleNumber = (0.8 * capacity).toInt vList.ensureCapacity(capacity) LOG.debug("estimate sample number=" + vList.size + ", avgDataItemSize=" + avgDataItemSize + ", maxStoreNum=" + maxStoreNum + ", maxUseMemroy=" + maxUseMemroy) } }
Example 39
Source File: package.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com import java.util.Collections import cakesolutions.kafka.{KafkaConsumer, KafkaProducer} import cakesolutions.kafka.KafkaProducer.Conf import org.apache.kafka.clients.consumer.OffsetResetStrategy import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} package object example { val topic = "sample_topic" val kafkaProducer = KafkaProducer( Conf(new StringSerializer(), new StringSerializer(), bootstrapServers = "localhost:9092") ) val kafkaProducerConf = KafkaProducer.Conf( new StringSerializer, new StringSerializer, bootstrapServers = "localhost:9092" ) val kafkaConsumerConf = KafkaConsumer.Conf( new StringDeserializer, new StringDeserializer, groupId = "test_group", enableAutoCommit = false, autoOffsetReset = OffsetResetStrategy.EARLIEST, bootstrapServers = "localhost:9092") }
Example 40
Source File: ExecutionContextExecutorServiceBridge.scala From firebase4s with MIT License | 5 votes |
package com.firebase4s.util import scala.concurrent.{ExecutionContext, ExecutionContextExecutorService} import java.util.concurrent.{AbstractExecutorService, TimeUnit} import java.util.Collections object ExecutionContextExecutorServiceBridge { private[firebase4s] def apply(ec: ExecutionContext): ExecutionContextExecutorService = ec match { case null => throw null case eces: ExecutionContextExecutorService => eces case other => new AbstractExecutorService with ExecutionContextExecutorService { override def prepare(): ExecutionContext = other override def isShutdown = false override def isTerminated = false override def shutdown() = () override def shutdownNow() = Collections.emptyList[Runnable] override def execute(runnable: Runnable): Unit = other.execute(runnable) override def reportFailure(t: Throwable): Unit = other.reportFailure(t) override def awaitTermination(length: Long, unit: TimeUnit): Boolean = false } } }
Example 41
Source File: HiveSourceTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.hive.it import java.util.Collections import java.util.concurrent.TimeUnit import org.scalatest.concurrent.Eventually import org.scalatest.matchers.should.Matchers import org.scalatest.time.{Millis, Span} import org.scalatest.wordspec.AnyWordSpec import scala.io.Source class HiveSourceTest extends AnyWordSpec with Matchers with PersonTestData with Eventually with HiveTests { private implicit val patience: PatienceConfig = PatienceConfig(Span(60000, Millis), Span(5000, Millis)) "Hive" should { "read non partitioned table" in { val count = 2000L val inputTopic = createTopic() val sinkTaskDef = Source.fromInputStream(getClass.getResourceAsStream("/hive_sink_task_no_partitions.json")).getLines().mkString("\n") .replace("{{TOPIC}}", inputTopic) .replace("{{TABLE}}", inputTopic) .replace("{{NAME}}", inputTopic) postTask(sinkTaskDef) val producer = stringStringProducer() writeRecords(producer, inputTopic, JacksonSupport.mapper.writeValueAsString(person), count) producer.close(30, TimeUnit.SECONDS) // we now should have 1000 records in hive which we can test via jdbc eventually { withConn { conn => val stmt = conn.createStatement val rs = stmt.executeQuery(s"select count(*) from $inputTopic") rs.next() rs.getLong(1) shouldBe count } } stopTask(inputTopic) // now we can read them back in val outputTopic = createTopic() val sourceTaskDef = Source.fromInputStream(getClass.getResourceAsStream("/hive_source_task.json")).getLines().mkString("\n") .replace("{{TOPIC}}", outputTopic) .replace("{{TABLE}}", inputTopic) .replace("{{NAME}}", outputTopic) postTask(sourceTaskDef) // we should have 1000 records on the outputTopic var records = 0L val consumer = stringStringConsumer("earliest") consumer.subscribe(Collections.singleton(outputTopic)) eventually { records = records + readRecords(consumer, outputTopic, 2, TimeUnit.SECONDS).size records shouldBe count } stopTask(outputTopic) } } }
Example 42
Source File: HiveSourceConfig.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.config import java.util.Collections import cats.data.NonEmptyList import com.landoop.streamreactor.connect.hive.{DatabaseName, HadoopConfiguration, TableName, Topic} import com.landoop.streamreactor.connect.hive.kerberos.Kerberos import scala.collection.JavaConverters._ case class ProjectionField(name: String, alias: String) case class HiveSourceConfig(dbName: DatabaseName, kerberos: Option[Kerberos], hadoopConfiguration: HadoopConfiguration, tableOptions: Set[SourceTableOptions] = Set.empty, pollSize: Int = 1024) case class SourceTableOptions( tableName: TableName, topic: Topic, projection: Option[NonEmptyList[ProjectionField]] = None, limit: Int = Int.MaxValue ) object HiveSourceConfig { def fromProps(props: Map[String, String]): HiveSourceConfig = { val config = HiveSourceConfigDefBuilder(props.asJava) val tables = config.getKCQL.map { kcql => val fields = Option(kcql.getFields) .getOrElse(Collections.emptyList) .asScala .toList .map { field => ProjectionField(field.getName, field.getAlias) } val projection = fields match { case Nil => None case ProjectionField("*", "*") :: Nil => None case _ => NonEmptyList.fromList(fields) } SourceTableOptions( TableName(kcql.getSource), Topic(kcql.getTarget), projection, limit = if (kcql.getLimit < 1) Int.MaxValue else kcql.getLimit ) } HiveSourceConfig( dbName = DatabaseName(props(HiveSourceConfigConstants.DatabaseNameKey)), tableOptions = tables, kerberos = Kerberos.from(config, HiveSourceConfigConstants), hadoopConfiguration = HadoopConfiguration.from(config, HiveSourceConfigConstants), pollSize = props .getOrElse(HiveSourceConfigConstants.PollSizeKey, 1024) .toString .toInt ) } }
Example 43
Source File: HiveSinkConfig.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.config import java.util.Collections import cats.data.NonEmptyList import com.datamountaineer.kcql.{Field, PartitioningStrategy, SchemaEvolution} import com.landoop.streamreactor.connect.hive._ import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, ParquetHiveFormat} import com.landoop.streamreactor.connect.hive.kerberos.Kerberos import com.landoop.streamreactor.connect.hive.sink.evolution.{AddEvolutionPolicy, EvolutionPolicy, IgnoreEvolutionPolicy, StrictEvolutionPolicy} import com.landoop.streamreactor.connect.hive.sink.partitioning.{DynamicPartitionHandler, PartitionHandler, StrictPartitionHandler} import com.landoop.streamreactor.connect.hive.sink.staging._ import scala.collection.JavaConverters._ case class HiveSinkConfig(dbName: DatabaseName, filenamePolicy: FilenamePolicy = DefaultFilenamePolicy, stageManager: StageManager = new StageManager(DefaultFilenamePolicy), tableOptions: Set[TableOptions] = Set.empty, kerberos: Option[Kerberos], hadoopConfiguration: HadoopConfiguration) case class TableOptions(tableName: TableName, topic: Topic, createTable: Boolean = false, overwriteTable: Boolean = false, partitioner: PartitionHandler = new DynamicPartitionHandler(), evolutionPolicy: EvolutionPolicy = IgnoreEvolutionPolicy, projection: Option[NonEmptyList[Field]] = None, // when creating a new table, the table will be partitioned with the fields set below partitions: Seq[PartitionField] = Nil, // the format used when creating a new table, if the table exists // then the format will be derived from the table parameters format: HiveFormat = ParquetHiveFormat, commitPolicy: CommitPolicy = DefaultCommitPolicy(Some(1000 * 1000 * 128), None, None), location: Option[String] = None) object HiveSinkConfig { def fromProps(props: Map[String, String]): HiveSinkConfig = { import scala.concurrent.duration._ val config = HiveSinkConfigDefBuilder(props.asJava) val tables = config.getKCQL.map { kcql => val fields = Option(kcql.getFields).getOrElse(Collections.emptyList).asScala.toList val projection = if (fields.size == 1 && fields.head.getName == "*") None else NonEmptyList.fromList(fields) val flushSize = Option(kcql.getWithFlushSize).filter(_ > 0) val flushInterval = Option(kcql.getWithFlushInterval).filter(_ > 0).map(_.seconds) val flushCount = Option(kcql.getWithFlushCount).filter(_ > 0) // we must have at least one way of committing files val finalFlushSize = Some(flushSize.fold(1000L * 1000 * 128)(identity)) //if (flushSize.isEmpty ) Some(1000L * 1000 * 128) else flushSize val format: HiveFormat = HiveFormat(Option(kcql.getStoredAs).map(_.toLowerCase).getOrElse("parquet")) TableOptions( TableName(kcql.getTarget), Topic(kcql.getSource), kcql.isAutoCreate, kcql.getWithOverwrite, Option(kcql.getWithPartitioningStrategy).getOrElse(PartitioningStrategy.DYNAMIC) match { case PartitioningStrategy.DYNAMIC => new DynamicPartitionHandler() case PartitioningStrategy.STRICT => StrictPartitionHandler }, format = format, projection = projection, evolutionPolicy = Option(kcql.getWithSchemaEvolution).getOrElse(SchemaEvolution.MATCH) match { case SchemaEvolution.ADD => AddEvolutionPolicy case SchemaEvolution.IGNORE => IgnoreEvolutionPolicy case SchemaEvolution.MATCH => StrictEvolutionPolicy }, partitions = Option(kcql.getPartitionBy).map(_.asScala).getOrElse(Nil).map(name => PartitionField(name)).toVector, commitPolicy = DefaultCommitPolicy( fileSize = finalFlushSize, interval = flushInterval, fileCount = flushCount ), location = Option(kcql.getWithTableLocation) ) } HiveSinkConfig( dbName = DatabaseName(props(SinkConfigSettings.DatabaseNameKey)), filenamePolicy = DefaultFilenamePolicy, stageManager = new StageManager(DefaultFilenamePolicy), tableOptions = tables, kerberos = Kerberos.from(config, SinkConfigSettings), hadoopConfiguration = HadoopConfiguration.from(config, SinkConfigSettings) ) } }
Example 44
Source File: PulseKafkaConsumer.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pulse.logcollector import java.util.{ Collections, Properties } import org.apache.kafka.clients.consumer.KafkaConsumer import com.typesafe.scalalogging.LazyLogging import spray.json.JsonParser.ParsingException import scala.collection.JavaConverters._ import spray.json._ class PulseKafkaConsumer(solrCloudStream: SolrCloudStream) extends JsonSupport with LazyLogging { val MAX_TIMEOUT = 100 def read(consumerProperties: Properties, topic: String): Unit = { val consumer = new KafkaConsumer[String, String](consumerProperties) consumer.subscribe(Collections.singletonList(topic)) while (true) { try { val records = consumer.poll(MAX_TIMEOUT) for (record <- records.asScala) { logger.trace("KAFKA: Consuming " + record.value() + " from topic: " + topic) val logEventMap = record.value().parseJson.convertTo[Map[String, String]] solrCloudStream.put(logEventMap.getOrElse("application", ""), logEventMap) // TODO: Add proper error handling when application isn't supplied } } catch { case p: ParsingException => logger.error("Error parsing message from kafka broker", p) case e: Exception => logger.error("Error consuming messages from kafka broker", e) } } } }
Example 45
Source File: ServiceRegistryInteropSpec.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.registry.impl import java.net.URI import java.util.Collections import java.util.Optional import akka.actor.ActorSystem import akka.testkit.TestKit import akka.util.ByteString import com.lightbend.lagom.devmode.internal.scaladsl.registry.RegisteredService import com.lightbend.lagom.devmode.internal.scaladsl.registry.ServiceRegistryService import com.lightbend.lagom.internal.javadsl.registry.{ RegisteredService => jRegisteredService } import com.lightbend.lagom.internal.javadsl.registry.{ ServiceRegistryService => jServiceRegistryService } import com.lightbend.lagom.devmode.internal.scaladsl.registry.{ RegisteredService => sRegisteredService } import com.lightbend.lagom.devmode.internal.scaladsl.registry.{ ServiceRegistryService => sServiceRegistryService } import com.lightbend.lagom.javadsl.api.ServiceAcl import com.lightbend.lagom.javadsl.api.deser.MessageSerializer import com.lightbend.lagom.javadsl.api.deser.StrictMessageSerializer import com.lightbend.lagom.javadsl.api.transport.MessageProtocol import com.lightbend.lagom.javadsl.api.transport.Method import com.lightbend.lagom.javadsl.jackson.JacksonSerializerFactory import org.scalatest.BeforeAndAfterAll import org.scalatest.concurrent.Futures import play.api.libs.json.Format import play.api.libs.json.Json import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class ServiceRegistryInteropSpec extends AnyFlatSpec with Matchers with Futures with BeforeAndAfterAll { val system = ActorSystem() val jacksonSerializerFactory = new JacksonSerializerFactory(system) protected override def afterAll(): Unit = { TestKit.shutdownActorSystem(actorSystem = system, verifySystemShutdown = true) } behavior.of("ServiceRegistry serializers") it should "should interop between java and scala (RegisteredService)" in { val msg = jRegisteredService.of("inventory", URI.create("https://localhost:123/asdf"), Optional.of("https")) roundTrip(msg) should be(msg) } it should "should interop between java and scala when optional fields are empty (RegisteredService)" in { val msg = jRegisteredService.of("inventory", URI.create("https://localhost:123/asdf"), Optional.empty[String]) roundTrip(msg) should be(msg) } it should "should interop between java and scala (ServiceRegistryService)" in { val msg = jServiceRegistryService.of( URI.create("https://localhost:123/asdf"), Collections.singletonList(ServiceAcl.methodAndPath(Method.GET, "/items")) ) roundTrip(msg) should be(msg) } it should "should interop between java and scala when optional fields are empty (ServiceRegistryService)" in { val msg = jServiceRegistryService.of(URI.create("https://localhost:123/asdf"), Collections.emptyList[ServiceAcl]) roundTrip(msg) should be(msg) } private def roundTrip(input: jServiceRegistryService): jServiceRegistryService = { roundTrip( input, jacksonSerializerFactory.messageSerializerFor[jServiceRegistryService](classOf[jServiceRegistryService]), com.lightbend.lagom.scaladsl.playjson.JsonSerializer[ServiceRegistryService].format )(sServiceRegistryService.format) } private def roundTrip(input: jRegisteredService): jRegisteredService = { roundTrip( input, jacksonSerializerFactory.messageSerializerFor[jRegisteredService](classOf[jRegisteredService]), com.lightbend.lagom.scaladsl.playjson.JsonSerializer[RegisteredService].format )(sRegisteredService.format) } private def roundTrip[J, S]( input: J, jacksonSerializer: StrictMessageSerializer[J], playJsonFormatter: Format[S] )(implicit format: Format[S]): J = { val byteString: ByteString = jacksonSerializer.serializerForRequest().serialize(input) val scalaValue: S = playJsonFormatter.reads(Json.parse(byteString.toArray)).get val str: String = playJsonFormatter.writes(scalaValue).toString() val jacksonDeserializer: MessageSerializer.NegotiatedDeserializer[J, ByteString] = jacksonSerializer.deserializer( new MessageProtocol(Optional.of("application/json"), Optional.empty[String], Optional.empty[String]) ) jacksonDeserializer.deserialize(ByteString(str)) } }
Example 46
Source File: ExceptionCountAccumulator.scala From spark-distcp with Apache License 2.0 | 5 votes |
package com.coxautodata.objects import java.util import java.util.Collections import java.util.function.{BiConsumer, BiFunction} import org.apache.spark.util.AccumulatorV2 class ExceptionCountAccumulator extends AccumulatorV2[String, java.util.Map[String, Long]] { private val _map: java.util.Map[String, Long] = Collections.synchronizedMap(new util.HashMap[String, Long]()) override def isZero: Boolean = _map.isEmpty override def copyAndReset(): ExceptionCountAccumulator = new ExceptionCountAccumulator override def copy(): ExceptionCountAccumulator = { val newAcc = new ExceptionCountAccumulator _map.synchronized { newAcc._map.putAll(_map) } newAcc } override def reset(): Unit = _map.clear() def add(e: Throwable): Unit = add(e.getClass.getName.stripSuffix("$")) override def add(k: String): Unit = { add(k, 1) } private def add(k: String, v: Long): Unit = { _map.merge(k, v, CombineCounts) } override def merge(other: AccumulatorV2[String, util.Map[String, Long]]): Unit = { other match { case e: ExceptionCountAccumulator => e._map.forEach { new BiConsumer[String, Long] { override def accept(k: String, v: Long): Unit = add(k, v) } } case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } } override def value: util.Map[String, Long] = _map } object CombineCounts extends BiFunction[Long, Long, Long] { override def apply(t: Long, u: Long): Long = t + u }
Example 47
Source File: KafkaStringEventBatchTest.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2017, Yahoo Holdings Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package com.yahoo.maha.log import java.util.Collections import com.yahoo.maha.data.{Compressor, StringEventBatch} import org.junit.{Assert, Test} class KafkaStringEventBatchTest { @Test def testWithNoConfiguration(): Unit = { val encoder = new KafkaStringEventBatchSerializer() encoder.configure(Collections.emptyMap(), false) val decoder = new KafkaStringEventBatchDeserializer() decoder.configure(Collections.emptyMap(), false) val builder = new StringEventBatch.Builder(3) builder.add("one") builder.add("two") builder.add("three") val recordList = builder.build.asInstanceOf[StringEventBatch] val encoded = encoder.serialize("blah", recordList) val decoded = decoder.deserialize("blah", encoded) Assert.assertEquals(recordList.getEvents.size(), decoded.getEvents.size()) Assert.assertEquals(recordList.getEvents.get(0), decoded.getEvents.get(0)) Assert.assertEquals(recordList.getEvents.get(1), decoded.getEvents.get(1)) Assert.assertEquals(recordList.getEvents.get(2), decoded.getEvents.get(2)) } @Test def testWithConfiguration(): Unit = { import scala.collection.JavaConverters._ val config: java.util.Map[String, _] = Map( Compressor.COMPRESSOR_CODEC_PROPERTY -> "lz4hc" , KafkaStringEventBatchSerializer.BUFFER_MB_PROPERTY -> "2" , KafkaStringEventBatchDeserializer.BUFFER_MB_PROPERTY -> "2" ).asJava val encoder = new KafkaStringEventBatchSerializer() encoder.configure(config, false) val decoder = new KafkaStringEventBatchDeserializer() decoder.configure(config, false) val builder = new StringEventBatch.Builder(3) builder.add("one") builder.add("two") builder.add("three") val recordList = builder.build.asInstanceOf[StringEventBatch] val encoded = encoder.serialize("blah", recordList) val decoded = decoder.deserialize("blah", encoded) Assert.assertEquals(recordList.getEvents.size(), decoded.getEvents.size()) Assert.assertEquals(recordList.getEvents.get(0), decoded.getEvents.get(0)) Assert.assertEquals(recordList.getEvents.get(1), decoded.getEvents.get(1)) Assert.assertEquals(recordList.getEvents.get(2), decoded.getEvents.get(2)) } @Test def testWithBadConfiguration(): Unit = { import scala.collection.JavaConverters._ val config: java.util.Map[String, _] = Map( Compressor.COMPRESSOR_CODEC_PROPERTY -> "blah" , KafkaStringEventBatchSerializer.BUFFER_MB_PROPERTY -> "abc" , KafkaStringEventBatchDeserializer.BUFFER_MB_PROPERTY -> "-1" ).asJava val encoder = new KafkaStringEventBatchSerializer() encoder.configure(config, false) val decoder = new KafkaStringEventBatchDeserializer() decoder.configure(config, false) val builder = new StringEventBatch.Builder(3) builder.add("one") builder.add("two") builder.add("three") val recordList = builder.build.asInstanceOf[StringEventBatch] val encoded = encoder.serialize("blah", recordList) val decoded = decoder.deserialize("blah", encoded) Assert.assertEquals(recordList.getEvents.size(), decoded.getEvents.size()) Assert.assertEquals(recordList.getEvents.get(0), decoded.getEvents.get(0)) Assert.assertEquals(recordList.getEvents.get(1), decoded.getEvents.get(1)) Assert.assertEquals(recordList.getEvents.get(2), decoded.getEvents.get(2)) } }
Example 48
Source File: FiltersSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") filterTest("SPARK-19912 String literals should be escaped for Hive metastore partition pruning", (a("stringcol", StringType) === Literal("p1\" and q=\"q1")) :: (Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil, """stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""") filterTest("SPARK-24879 null literals should be ignored for IN constructs", (a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil, "(intcol = 1)") // Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization // will be applied by Catalyst, this filter converter does not need to account for this. filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE", (a("intcol", IntegerType) in Literal(null)) :: Nil, "") filterTest("typecast null literals should not be pushed down in simple predicates", (a("intcol", IntegerType) === Literal(null, IntegerType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail(s"Expected ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } } test("turn on/off ADVANCED_PARTITION_PREDICATE_PUSHDOWN") { import org.apache.spark.sql.catalyst.dsl.expressions._ Seq(true, false).foreach { enabled => withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> enabled.toString) { val filters = (Literal(1) === a("intcol", IntegerType) || Literal(2) === a("intcol", IntegerType)) :: Nil val converted = shim.convertFilters(testTable, filters) if (enabled) { assert(converted == "(1 = intcol or 2 = intcol)") } else { assert(converted.isEmpty) } } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 49
Source File: SystemEnvironment.scala From mango with Apache License 2.0 | 5 votes |
package com.kakao.mango.reflect import java.util import java.util.Collections object SystemEnvironment { val env = Class.forName("java.lang.ProcessEnvironment") val field = Accessible.field(env, "theUnmodifiableEnvironment") def set(key: String, value: String): Unit = { set(key -> value) } def set(entries: (String, String)*): Unit = { val map = new util.HashMap[String, String]() map.putAll(System.getenv()) for ( (key, value) <- entries ) { map.put(key, value) } field.set(null, Collections.unmodifiableMap[String, String](map)) } }
Example 50
Source File: CompositeClassLoader.scala From meteorite-core with Apache License 2.0 | 5 votes |
package bi.meteorite.core.security.tokenprovider import java.util import java.util.Collections def add(classLoader: ClassLoader) { if (classLoader != null) { classLoaders.add(0, classLoader) } } @throws(classOf[ClassNotFoundException]) override def loadClass(name: String): Class[_] = { import scala.collection.JavaConversions._ for (classLoader1 <- classLoaders) { val classLoader: ClassLoader = classLoader1.asInstanceOf[ClassLoader] try { return classLoader.loadClass(name) } catch { case notFound: ClassNotFoundException => } } val contextClassLoader: ClassLoader = Thread.currentThread.getContextClassLoader if (contextClassLoader != null) { contextClassLoader.loadClass(name) } else { throw new ClassNotFoundException(name) } } }
Example 51
Source File: KinesisTestConsumer.scala From reactive-kinesis with Apache License 2.0 | 5 votes |
package com.weightwatchers.reactive.kinesis.common import java.util.Collections import com.amazonaws.ClientConfiguration import com.amazonaws.auth.AWSCredentialsProvider import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord import com.amazonaws.services.kinesis.model._ import com.amazonaws.services.kinesis.{AmazonKinesisAsyncClient, _} import com.weightwatchers.reactive.kinesis.consumer.KinesisConsumer.ConsumerConf import scala.collection.JavaConverters._ import scala.concurrent.duration.FiniteDuration object KinesisTestConsumer { def retrieveRecords(streamName: String, batchSize: Int): List[String] = { getShards(streamName) .flatMap { shard => val getRecordsRequest = new GetRecordsRequest getRecordsRequest.setShardIterator(getShardIterator(streamName, shard)) getRecordsRequest.setLimit(batchSize) client.getRecords(getRecordsRequest).getRecords.asScala.toList } .flatMap { record: Record => UserRecord .deaggregate(Collections.singletonList(record)) .asScala .map { ur => new String(ur.getData.array(), java.nio.charset.StandardCharsets.UTF_8) } } } private def getShardIterator(streamName: String, shard: Shard) = { client .getShardIterator(streamName, shard.getShardId, "TRIM_HORIZON") .getShardIterator } private def getShards(streamName: String) = { client .describeStream(streamName) .getStreamDescription .getShards .asScala .toList } def shutdown(): Unit = client.shutdown() }
Example 52
Source File: RMCallbackHandler.scala From DataXServer with Apache License 2.0 | 5 votes |
package org.tianlangstudio.data.hamal.yarn import java.io.File import java.util.{Collections, List} import org.tianlangstudio.data.hamal.core.{Constants, HamalConf} import org.tianlangstudio.data.hamal.core.HamalConf //import java.util.Collections import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path, FileContext} import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.{AMRMClient, NMClient} import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import scala.jdk.CollectionConverters._ //import scala.collection.JavaConverters._ /** * Created by zhuhq on 2016/4/29. */ class RMCallbackHandler(nmClient:NMClient,containerCmd:Container => String,hamalConf: HamalConf,yarnConfiguration: Configuration) extends AMRMClientAsync.CallbackHandler { private val logging = org.slf4j.LoggerFactory.getLogger(classOf[RMCallbackHandler]) override def onContainersCompleted(statuses: List[ContainerStatus]): Unit = { for(containerStatus <- statuses.asScala) { logging.info(s"containerId:${containerStatus} exitStatus:${containerStatus}") } } override def onError(e: Throwable): Unit = { logging.error("on error",e) } override def getProgress: Float = { 0 } override def onShutdownRequest(): Unit = { logging.info("on shutdown request") } override def onNodesUpdated(updatedNodes: List[NodeReport]): Unit = { logging.info("on nodes updated") for(nodeReport <- updatedNodes.asScala) { logging.info(s"node id:${nodeReport} node labels:${nodeReport}"); } } override def onContainersAllocated(containers: List[Container]): Unit = { logging.info("on containers allocated"); for (container:Container <- containers.asScala) { try { // Launch container by create ContainerLaunchContext val ctx = Records.newRecord(classOf[ContainerLaunchContext]); //ctx.setCommands(Collections.singletonList(""" echo "begin";sleep 900;echo "end"; """)) ctx.setCommands(Collections.singletonList(containerCmd(container))) val packagePath = hamalConf.getString(Constants.DATAX_EXECUTOR_FILE,"executor.zip"); val archiveStat = FileSystem.get(yarnConfiguration).getFileStatus(new Path(packagePath)) val packageUrl = ConverterUtils.getYarnUrlFromPath( FileContext.getFileContext.makeQualified(new Path(packagePath))); val packageResource = Records.newRecord[LocalResource](classOf[LocalResource]) packageResource.setResource(packageUrl); packageResource.setSize(archiveStat.getLen); packageResource.setTimestamp(archiveStat.getModificationTime); packageResource.setType(LocalResourceType.ARCHIVE); packageResource.setVisibility(LocalResourceVisibility.APPLICATION) ctx.setLocalResources(Collections.singletonMap(Constants.DATAX_EXECUTOR_ARCHIVE_FILE_NAME,packageResource)) logging.info("[AM] Launching container " + container.getId()); nmClient.startContainer(container, ctx); } catch { case ex:Exception => logging.info("[AM] Error launching container " + container.getId() + " " + ex); } } } }
Example 53
Source File: User.scala From keycloak-benchmark with Apache License 2.0 | 5 votes |
package org.jboss.perf.model import java.util import java.util.Collections import org.keycloak.representations.idm.{RoleRepresentation, CredentialRepresentation, UserRepresentation} import scala.collection.JavaConverters._ case class User(val username: String, val password: String, var id: String, val active: Boolean, val realmRoles: List[String]) { def this(map: Map[String, String]) { this(map("username"), map("password"), map("id"), true, List()) } def getCredentials: CredentialRepresentation = { var credentials = new CredentialRepresentation credentials.setType(CredentialRepresentation.PASSWORD) credentials.setTemporary(false) credentials.setValue(password) credentials } def toMap: Map[String, String] = Map(("username", username), ("password", password), ("id", id)) def toRepresentation: UserRepresentation = { var representation = new UserRepresentation // Id is ignored representation.setUsername(username) if (active) { representation.setFirstName("Johny"); representation.setLastName("Active"); } else { representation.setFirstName("Bob"); representation.setLastName("Sleepy") } representation.setEnabled(active) // Actually the credentials will be ignored on server representation.setCredentials(Collections.singletonList(getCredentials)) representation.setRealmRoles(realmRoles.asJava) representation } def getRealmRoles(roleIds : Map[String, RoleRepresentation]): util.List[RoleRepresentation] = { realmRoles.map(r => roleIds.get(r).orNull).asJava } }
Example 54
Source File: Authorize.scala From keycloak-benchmark with Apache License 2.0 | 5 votes |
package io.gatling.keycloak import java.text.SimpleDateFormat import java.util.{Date, Collections} import akka.actor.ActorDSL.actor import akka.actor.ActorRef import io.gatling.core.action.Interruptable import io.gatling.core.action.builder.ActionBuilder import io.gatling.core.config.Protocols import io.gatling.core.result.writer.DataWriterClient import io.gatling.core.session._ import io.gatling.core.validation._ import org.jboss.logging.Logger import org.keycloak.adapters.spi.AuthOutcome import org.keycloak.adapters.KeycloakDeploymentBuilder import org.keycloak.adapters.spi.HttpFacade.Cookie import org.keycloak.common.enums.SslRequired import org.keycloak.representations.adapters.config.AdapterConfig import scala.collection.JavaConverters._ case class AuthorizeAttributes( requestName: Expression[String], uri: Expression[String], cookies: Expression[List[Cookie]], sslRequired: SslRequired = SslRequired.EXTERNAL, resource: String = null, password: String = null, realm: String = null, realmKey: String = null, authServerUrl: Expression[String] = _ => Failure("no server url") ) { def toAdapterConfig(session: Session) = { val adapterConfig = new AdapterConfig adapterConfig.setSslRequired(sslRequired.toString) adapterConfig.setResource(resource) adapterConfig.setCredentials(Collections.singletonMap("secret", password)) adapterConfig.setRealm(realm) adapterConfig.setRealmKey(realmKey) adapterConfig.setAuthServerUrl(authServerUrl(session).get) adapterConfig } } class AuthorizeActionBuilder(attributes: AuthorizeAttributes) extends ActionBuilder { def newInstance(attributes: AuthorizeAttributes) = new AuthorizeActionBuilder(attributes) def sslRequired(sslRequired: SslRequired) = newInstance(attributes.copy(sslRequired = sslRequired)) def resource(resource: String) = newInstance(attributes.copy(resource = resource)) def clientCredentials(password: String) = newInstance(attributes.copy(password = password)) def realm(realm: String) = newInstance(attributes.copy(realm = realm)) def realmKey(realmKey: String) = newInstance(attributes.copy(realmKey = realmKey)) def authServerUrl(authServerUrl: Expression[String]) = newInstance(attributes.copy(authServerUrl = authServerUrl)) override def build(next: ActorRef, protocols: Protocols): ActorRef = { actor(actorName("authorize"))(new AuthorizeAction(attributes, next)) } } object AuthorizeAction { val logger = Logger.getLogger(classOf[AuthorizeAction]) } class AuthorizeAction( attributes: AuthorizeAttributes, val next: ActorRef ) extends Interruptable with ExitOnFailure with DataWriterClient { override def executeOrFail(session: Session): Validation[_] = { val facade = new MockHttpFacade() val deployment = KeycloakDeploymentBuilder.build(attributes.toAdapterConfig(session)); facade.request.setURI(attributes.uri(session).get); facade.request.setCookies(attributes.cookies(session).get.map(c => (c.getName, c)).toMap.asJava) var nextSession = session val requestAuth: MockRequestAuthenticator = session(MockRequestAuthenticator.KEY).asOption[MockRequestAuthenticator] match { case Some(ra) => ra case None => val tmp = new MockRequestAuthenticator(facade, deployment, new MockTokenStore, -1, session.userId) nextSession = session.set(MockRequestAuthenticator.KEY, tmp) tmp } Blocking(() => { AuthorizeAction.logger.debugf("%s: Authenticating %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit) Stopwatch(() => requestAuth.authenticate()) .check(result => result == AuthOutcome.AUTHENTICATED, result => { AuthorizeAction.logger.warnf("%s: Failed auth %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit) result.toString }) .recordAndContinue(AuthorizeAction.this, nextSession, attributes.requestName(session).get) }) } }
Example 55
Source File: InternalRouterSpec.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.registry.impl import java.net.URI import java.util import java.util.Collections import com.lightbend.lagom.internal.javadsl.registry.ServiceRegistryService import com.lightbend.lagom.javadsl.api.ServiceAcl import com.lightbend.lagom.javadsl.api.transport.Method import com.lightbend.lagom.registry.impl.ServiceRegistryActor.Found import com.lightbend.lagom.registry.impl.ServiceRegistryActor.Route import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class InternalRouterSpec extends AnyFlatSpec with Matchers { behavior.of("InternalRouter") it should "find the appropriate URI given the portName" in { val httpUri = new URI("http://localhost.com/pathABC") val httpsUri = new URI("https://localhost.com:123/pathABC") val simpleName = "my-service" val acl = ServiceAcl.methodAndPath(Method.GET, "/pathABC") val srs = ServiceRegistryService.of(util.Arrays.asList(httpUri, httpsUri), Collections.singletonList(acl)) val registry = new InternalRegistry(Map.empty) registry.register(simpleName, srs) val router = new InternalRouter router.rebuild(registry) router.routeFor(Route("GET", "/pathABC", None)) should be(Found(httpUri)) router.routeFor(Route("GET", "/pathABC", Some("http"))) should be(Found(httpUri)) router.routeFor(Route("GET", "/pathABC", Some("https"))) should be(Found(httpsUri)) } }
Example 56
Source File: KuduService.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pulse.logcollector import java.util.{ ArrayList, Collections } import com.typesafe.scalalogging.LazyLogging import io.phdata.pulse.common.domain.TimeseriesEvent import org.apache.kudu.client.SessionConfiguration.FlushMode import org.apache.kudu.client.{ CreateTableOptions, KuduClient, KuduException, KuduTable } import org.apache.kudu.{ ColumnSchema, Schema, Type } import scala.collection.concurrent object TimeseriesEventColumns { val TIMESTAMP = "ts" val KEY = "key" val TAG = "tag" val VALUE = "value" } private[logcollector] def getOrCreateTable(tableName: String): KuduTable = KerberosContext.runPrivileged { if (tableCache.contains(tableName)) { tableCache(tableName) } else if (!client.tableExists(tableName)) { logger.info(s"Kudu table not found: $tableName") val columns = new ArrayList[ColumnSchema] columns.add( new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.TIMESTAMP, Type.UNIXTIME_MICROS).key(true).build) columns.add( new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.KEY, Type.STRING) .key(true) .build) columns.add( new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.TAG, Type.STRING) .key(true) .build) columns.add( new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.VALUE, Type.DOUBLE) .key(false) .build) val schema = new Schema(columns) val opts = new CreateTableOptions() .setRangePartitionColumns(Collections.singletonList(TimeseriesEventColumns.TIMESTAMP)) .addHashPartitions(Collections.singletonList(TimeseriesEventColumns.KEY), 4) val table = client.createTable(tableName, schema, opts) tableCache.put(tableName, table) logger.info(s"Created Kudu table $tableName") table } else { val table = client.openTable(tableName) tableCache.put(tableName, table) table } } }
Example 57
Source File: ZookeeperDistributedQueue.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.resourcemanager.notify import java.util.Collections import com.webank.wedatasphere.linkis.common.utils.Logging import org.apache.zookeeper.ZooDefs.Ids import org.apache.zookeeper.{CreateMode, KeeperException, ZKUtil, ZooKeeper} import scala.collection.JavaConversions._ import scala.collection.mutable class ZookeeperDistributedQueue(zk: ZooKeeper, var queueName: String) extends DistributedQueue[Array[Byte]] with Logging { if (!queueName.startsWith("/")) queueName = "/" + queueName try if (zk.exists(queueName, false) == null) zk.create(queueName, new Array[Byte](0), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT) catch { case e: KeeperException => error(s"Failed to create queue[$queueName]: ", e) } override def offer(value: Array[Byte]): Unit = { zk.create(queueName + "/element", value, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL) } override def poll(): Array[Byte] = { val path = head() if (path == null) return null val value = zk.getData(path, false, null) zk.delete(path, -1) value } override def peek(): Array[Byte] = { val path = head() if (path == null) return null zk.getData(path, false, null) } override def destroy(): Unit = { try if (zk.exists(queueName, false) == null) info(s"Queue[$queueName] already destroyed.") else ZKUtil.deleteRecursive(zk, queueName) catch { case e: KeeperException => error(s"Failed to destroy queue[$queueName]: ", e) } } private def head(): String = { val elements = zk.getChildren(queueName, false) if (elements.size == 0) return null Collections.sort(elements) queueName + "/" + elements.get(0) } override def copyToArray(): Array[Array[Byte]] = { val elements = zk.getChildren(queueName, false) if (elements.size == 0) return new Array[Array[Byte]](0) elements.map({ e => zk.getData(queueName + "/" + e, false, null) }).toArray } def indexOf(bytes: Array[Byte]): String = { val elements = zk.getChildren(queueName, false) elements.find(e => bytes.equals(zk.getData(queueName + "/" + e, false, null))).getOrElse("") } def copyToMap(): mutable.Map[String, Array[Byte]] = { val resultMap = mutable.Map.empty[String, Array[Byte]] val elements = zk.getChildren(queueName, false) if (elements.size == 0) return resultMap elements.map(e => resultMap.put(e, zk.getData(queueName + "/" + e, false, null))) resultMap } def remove(index: String) = if (index.length != 0) zk.delete(queueName + "/" + index, -1) } object ZookeeperDistributedQueue { def apply(queueName: String): ZookeeperDistributedQueue = new ZookeeperDistributedQueue(ZookeeperUtils.getOrCreateZookeeper(), queueName) def apply(zk: ZooKeeper, queueName: String): ZookeeperDistributedQueue = new ZookeeperDistributedQueue(zk, queueName) }
Example 58
Source File: package.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import java.util.Collections import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.trees.TreeNodeRef import org.apache.spark.util.{AccumulatorV2, LongAccumulator} case class ColumnMetrics() { val elementTypes = new SetAccumulator[String] sparkContext.register(elementTypes) } val tupleCount: LongAccumulator = sparkContext.longAccumulator val numColumns: Int = child.output.size val columnStats: Array[ColumnMetrics] = Array.fill(child.output.size)(new ColumnMetrics()) def dumpStats(): Unit = { debugPrint(s"== ${child.simpleString} ==") debugPrint(s"Tuples output: ${tupleCount.value}") child.output.zip(columnStats).foreach { case (attr, metric) => // This is called on driver. All accumulator updates have a fixed value. So it's safe to use // `asScala` which accesses the internal values using `java.util.Iterator`. val actualDataTypes = metric.elementTypes.value.asScala.mkString("{", ",", "}") debugPrint(s" ${attr.name} ${attr.dataType}: $actualDataTypes") } } protected override def doExecute(): RDD[InternalRow] = { child.execute().mapPartitions { iter => new Iterator[InternalRow] { def hasNext: Boolean = iter.hasNext def next(): InternalRow = { val currentRow = iter.next() tupleCount.add(1) var i = 0 while (i < numColumns) { val value = currentRow.get(i, output(i).dataType) if (value != null) { columnStats(i).elementTypes.add(value.getClass.getName) } i += 1 } currentRow } } } } override def outputPartitioning: Partitioning = child.outputPartitioning override def inputRDDs(): Seq[RDD[InternalRow]] = { child.asInstanceOf[CodegenSupport].inputRDDs() } override def doProduce(ctx: CodegenContext): String = { child.asInstanceOf[CodegenSupport].produce(ctx, this) } override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { consume(ctx, input) } } }
Example 59
Source File: FiltersSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 60
Source File: FlumeTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.flume import java.net.{InetSocketAddress, ServerSocket} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.util.{List => JList} import java.util.Collections import scala.collection.JavaConverters._ import org.apache.avro.ipc.NettyTransceiver import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.commons.lang3.RandomUtils import org.apache.flume.source.avro import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol} import org.jboss.netty.channel.ChannelPipeline import org.jboss.netty.channel.socket.SocketChannel import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory import org.jboss.netty.handler.codec.compression.{ZlibDecoder, ZlibEncoder} import org.apache.spark.util.Utils import org.apache.spark.SparkConf private class CompressionChannelFactory(compressionLevel: Int) extends NioClientSocketChannelFactory { override def newChannel(pipeline: ChannelPipeline): SocketChannel = { val encoder = new ZlibEncoder(compressionLevel) pipeline.addFirst("deflater", encoder) pipeline.addFirst("inflater", new ZlibDecoder()) super.newChannel(pipeline) } } }
Example 61
Source File: Utils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster.mesos import java.util.Collections import scala.collection.JavaConverters._ import org.apache.mesos.Protos._ import org.apache.mesos.Protos.Value.{Range => MesosRange, Ranges, Scalar} import org.apache.mesos.SchedulerDriver import org.mockito.{ArgumentCaptor, Matchers} import org.mockito.Mockito._ object Utils { def createOffer( offerId: String, slaveId: String, mem: Int, cpus: Int, ports: Option[(Long, Long)] = None, gpus: Int = 0): Offer = { val builder = Offer.newBuilder() builder.addResourcesBuilder() .setName("mem") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(mem)) builder.addResourcesBuilder() .setName("cpus") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(cpus)) ports.foreach { resourcePorts => builder.addResourcesBuilder() .setName("ports") .setType(Value.Type.RANGES) .setRanges(Ranges.newBuilder().addRange(MesosRange.newBuilder() .setBegin(resourcePorts._1).setEnd(resourcePorts._2).build())) } if (gpus > 0) { builder.addResourcesBuilder() .setName("gpus") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(gpus)) } builder.setId(createOfferId(offerId)) .setFrameworkId(FrameworkID.newBuilder() .setValue("f1")) .setSlaveId(SlaveID.newBuilder().setValue(slaveId)) .setHostname(s"host${slaveId}") .build() } def verifyTaskLaunched(driver: SchedulerDriver, offerId: String): List[TaskInfo] = { val captor = ArgumentCaptor.forClass(classOf[java.util.Collection[TaskInfo]]) verify(driver, times(1)).launchTasks( Matchers.eq(Collections.singleton(createOfferId(offerId))), captor.capture()) captor.getValue.asScala.toList } def createOfferId(offerId: String): OfferID = { OfferID.newBuilder().setValue(offerId).build() } def createSlaveId(slaveId: String): SlaveID = { SlaveID.newBuilder().setValue(slaveId).build() } def createExecutorId(executorId: String): ExecutorID = { ExecutorID.newBuilder().setValue(executorId).build() } def createTaskId(taskId: String): TaskID = { TaskID.newBuilder().setValue(taskId).build() } }
Example 62
Source File: RedeliveryActors.scala From kmq with Apache License 2.0 | 5 votes |
package com.softwaremill.kmq.redelivery import java.io.Closeable import java.util.Collections import akka.actor.{ActorSystem, Props} import com.softwaremill.kmq.{KafkaClients, KmqConfig} import com.typesafe.scalalogging.StrictLogging import scala.concurrent.Await import scala.concurrent.duration._ import scala.collection.JavaConverters._ object RedeliveryActors extends StrictLogging { def start(clients: KafkaClients, config: KmqConfig): Closeable = { val system = ActorSystem("kmq-redelivery") val consumeMakersActor = system.actorOf(Props(new ConsumeMarkersActor(clients, config)), "consume-markers-actor") consumeMakersActor ! DoConsume logger.info("Started redelivery actors") new Closeable { override def close(): Unit = Await.result(system.terminate(), 1.minute) } } }
Example 63
Source File: ForkJoinPoolMetrics.scala From prometheus-akka with Apache License 2.0 | 5 votes |
package com.workday.prometheus.akka import java.util.Collections import scala.collection.JavaConverters.seqAsJavaListConverter import scala.collection.concurrent.TrieMap import io.prometheus.client.Collector import io.prometheus.client.Collector.MetricFamilySamples import io.prometheus.client.GaugeMetricFamily object ForkJoinPoolMetrics extends Collector { val map = TrieMap[String, Option[ForkJoinPoolLike]]() this.register() override def collect(): java.util.List[MetricFamilySamples] = { val dispatcherNameList = List("dispatcherName").asJava val parallelismGauge = new GaugeMetricFamily("akka_dispatcher_forkjoinpool_parellelism", "Akka ForkJoinPool Dispatcher Parellelism", dispatcherNameList) val poolSizeGauge = new GaugeMetricFamily("akka_dispatcher_forkjoinpool_pool_size", "Akka ForkJoinPool Dispatcher Pool Size", dispatcherNameList) val activeThreadCountGauge = new GaugeMetricFamily("akka_dispatcher_forkjoinpool_active_thread_count", "Akka ForkJoinPool Dispatcher Active Thread Count", dispatcherNameList) val runningThreadCountGauge = new GaugeMetricFamily("akka_dispatcher_forkjoinpool_running_thread_count", "Akka ForkJoinPool Dispatcher Running Thread Count", dispatcherNameList) val queuedTaskCountGauge = new GaugeMetricFamily("akka_dispatcher_forkjoinpool_queued_task_count", "Akka ForkJoinPool Dispatcher Queued Task Count", dispatcherNameList) val queuedSubmissionCountGauge = new GaugeMetricFamily("akka_dispatcher_forkjoinpool_queued_submission_count", "Akka ForkJoinPool Dispatcher Queued Submission Count", dispatcherNameList) val stealCountGauge = new GaugeMetricFamily("akka_dispatcher_forkjoinpool_steal_count", "Akka ForkJoinPool Dispatcher Steal Count", dispatcherNameList) map.foreach { case (dispatcherName, fjpOption) => val dispatcherNameList = List(dispatcherName).asJava fjpOption match { case Some(fjp) => { parallelismGauge.addMetric(dispatcherNameList, fjp.getParallelism) poolSizeGauge.addMetric(dispatcherNameList, fjp.getPoolSize) activeThreadCountGauge.addMetric(dispatcherNameList, fjp.getActiveThreadCount) runningThreadCountGauge.addMetric(dispatcherNameList, fjp.getRunningThreadCount) queuedSubmissionCountGauge.addMetric(dispatcherNameList, fjp.getQueuedSubmissionCount) queuedTaskCountGauge.addMetric(dispatcherNameList, fjp.getQueuedTaskCount) stealCountGauge.addMetric(dispatcherNameList, fjp.getStealCount) } case None => { parallelismGauge.addMetric(dispatcherNameList, 0) poolSizeGauge.addMetric(dispatcherNameList, 0) activeThreadCountGauge.addMetric(dispatcherNameList, 0) runningThreadCountGauge.addMetric(dispatcherNameList, 0) queuedSubmissionCountGauge.addMetric(dispatcherNameList, 0) queuedTaskCountGauge.addMetric(dispatcherNameList, 0) stealCountGauge.addMetric(dispatcherNameList, 0) } } } val jul = new java.util.ArrayList[MetricFamilySamples] jul.add(parallelismGauge) jul.add(poolSizeGauge) jul.add(activeThreadCountGauge) jul.add(runningThreadCountGauge) jul.add(queuedSubmissionCountGauge) jul.add(queuedTaskCountGauge) jul.add(stealCountGauge) Collections.unmodifiableList(jul) } def add(dispatcherName: String, fjp: ForkJoinPoolLike): Unit = { map.put(dispatcherName, Some(fjp)) } def remove(dispatcherName: String): Unit = { map.put(dispatcherName, None) } }
Example 64
Source File: ThreadPoolMetrics.scala From prometheus-akka with Apache License 2.0 | 5 votes |
package com.workday.prometheus.akka import java.util.Collections import java.util.concurrent.ThreadPoolExecutor import scala.collection.JavaConverters.seqAsJavaListConverter import scala.collection.concurrent.TrieMap import io.prometheus.client.Collector import io.prometheus.client.Collector.MetricFamilySamples import io.prometheus.client.GaugeMetricFamily object ThreadPoolMetrics extends Collector { val map = TrieMap[String, Option[ThreadPoolExecutor]]() this.register() override def collect(): java.util.List[MetricFamilySamples] = { val dispatcherNameList = List("dispatcherName").asJava val activeThreadCountGauge = new GaugeMetricFamily("akka_dispatcher_threadpoolexecutor_active_thread_count", "Akka ThreadPool Dispatcher Active Thread Count", dispatcherNameList) val corePoolSizeGauge = new GaugeMetricFamily("akka_dispatcher_threadpoolexecutor_core_pool_size", "Akka ThreadPool Dispatcher Core Pool Size", dispatcherNameList) val currentPoolSizeGauge = new GaugeMetricFamily("akka_dispatcher_threadpoolexecutor_current_pool_size", "Akka ThreadPool Dispatcher Current Pool Size", dispatcherNameList) val largestPoolSizeGauge = new GaugeMetricFamily("akka_dispatcher_threadpoolexecutor_largest_pool_size", "Akka ThreadPool Dispatcher Largest Pool Size", dispatcherNameList) val maxPoolSizeGauge = new GaugeMetricFamily("akka_dispatcher_threadpoolexecutor_max_pool_size", "Akka ThreadPool Dispatcher Max Pool Size", dispatcherNameList) val completedTaskCountGauge = new GaugeMetricFamily("akka_dispatcher_threadpoolexecutor_completed_task_count", "Akka ThreadPoolExecutor Dispatcher Completed Task Count", dispatcherNameList) val totalTaskCountGauge = new GaugeMetricFamily("akka_dispatcher_threadpoolexecutor_total_task_count", "Akka ThreadPoolExecutor Dispatcher Total Task Count", dispatcherNameList) map.foreach { case (dispatcherName, tpeOption) => val dispatcherNameList = List(dispatcherName).asJava tpeOption match { case Some(tpe) => { activeThreadCountGauge.addMetric(dispatcherNameList, tpe.getActiveCount) corePoolSizeGauge.addMetric(dispatcherNameList, tpe.getCorePoolSize) currentPoolSizeGauge.addMetric(dispatcherNameList, tpe.getPoolSize) largestPoolSizeGauge.addMetric(dispatcherNameList, tpe.getLargestPoolSize) maxPoolSizeGauge.addMetric(dispatcherNameList, tpe.getMaximumPoolSize) completedTaskCountGauge.addMetric(dispatcherNameList, tpe.getCompletedTaskCount) totalTaskCountGauge.addMetric(dispatcherNameList, tpe.getTaskCount) } case None => { activeThreadCountGauge.addMetric(dispatcherNameList, 0) corePoolSizeGauge.addMetric(dispatcherNameList, 0) currentPoolSizeGauge.addMetric(dispatcherNameList, 0) largestPoolSizeGauge.addMetric(dispatcherNameList, 0) maxPoolSizeGauge.addMetric(dispatcherNameList, 0) completedTaskCountGauge.addMetric(dispatcherNameList, 0) totalTaskCountGauge.addMetric(dispatcherNameList, 0) } } } val jul = new java.util.ArrayList[MetricFamilySamples] jul.add(activeThreadCountGauge) jul.add(corePoolSizeGauge) jul.add(currentPoolSizeGauge) jul.add(largestPoolSizeGauge) jul.add(maxPoolSizeGauge) jul.add(completedTaskCountGauge) jul.add(totalTaskCountGauge) Collections.unmodifiableList(jul) } def add(dispatcherName: String, tpe: ThreadPoolExecutor): Unit = { map.put(dispatcherName, Some(tpe)) } def remove(dispatcherName: String): Unit = { map.put(dispatcherName, None) } }
Example 65
Source File: RecordSpec.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.javaapi.data import java.util.Collections import com.daml.ledger.api.v1.ValueOuterClass import org.scalatest.{FlatSpec, Matchers} import collection.JavaConverters._ class RecordSpec extends FlatSpec with Matchers { behavior of "Record.fromProto" it should "build a record from a grpc.Record without fields" in { val fields = Collections.emptyList() val recordValue = ValueOuterClass.Record.newBuilder().addAllFields(fields).build() val record = Record.fromProto(recordValue) record.getFields shouldBe empty record.getFieldsMap shouldBe empty } // XXX SC remove in 2.13; see notes in ConfSpec import scala.collection.GenTraversable, org.scalatest.enablers.Aggregating private[this] implicit def `fixed sig aggregatingNatureOfGenTraversable`[ E: org.scalactic.Equality, TRAV]: Aggregating[TRAV with GenTraversable[E]] = Aggregating.aggregatingNatureOfGenTraversable[E, GenTraversable] it should "build a record with an empty field map if there are no labels" in { val fields = List( ValueOuterClass.RecordField .newBuilder() .setValue(ValueOuterClass.Value.newBuilder().setInt64(1l)) .build(), ValueOuterClass.RecordField .newBuilder() .setValue(ValueOuterClass.Value.newBuilder().setInt64(2l)) .build() ).asJava val recordValue = ValueOuterClass.Record.newBuilder().addAllFields(fields).build() val record = Record.fromProto(recordValue) record.getFields should contain theSameElementsInOrderAs List( new Record.Field(new Int64(1l)), new Record.Field(new Int64(2l))) record.getFieldsMap shouldBe empty } it should "build a record with a full field map if there are labels" in { val fields = List( ValueOuterClass.RecordField .newBuilder() .setLabel("label1") .setValue(ValueOuterClass.Value.newBuilder().setInt64(1l)) .build(), ValueOuterClass.RecordField .newBuilder() .setLabel("label2") .setValue(ValueOuterClass.Value.newBuilder().setInt64(2l)) .build() ).asJava val recordValue = ValueOuterClass.Record.newBuilder().addAllFields(fields).build() val record = Record.fromProto(recordValue) record.getFields should contain theSameElementsInOrderAs List( new Record.Field("label1", new Int64(1l)), new Record.Field("label2", new Int64(2l))) record.getFieldsMap.asScala should contain theSameElementsAs Map( "label1" -> new Int64(1l), "label2" -> new Int64(2l)) } }
Example 66
Source File: ScalaHighlightExitPointsHandler.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package org.jetbrains.plugins.scala.highlighter.usages import java.util import java.util.Collections import com.intellij.codeInsight.highlighting.HighlightUsagesHandlerBase import com.intellij.openapi.editor.Editor import com.intellij.psi.{PsiElement, PsiFile} import com.intellij.util.Consumer import org.jetbrains.plugins.scala.lang.psi.api.statements.ScFunctionDefinition class ScalaHighlightExitPointsHandler(fun: ScFunctionDefinition, editor: Editor, file: PsiFile, keyword: PsiElement) extends HighlightUsagesHandlerBase[PsiElement](editor, file) { def computeUsages(targets: util.List[PsiElement]) { val usages = fun.returnUsages().toSeq :+ keyword usages.map(_.getTextRange).foreach(myReadUsages.add) } def selectTargets(targets: util.List[PsiElement], selectionConsumer: Consumer[util.List[PsiElement]]) { selectionConsumer.consume(targets) } def getTargets: util.List[PsiElement] = Collections.singletonList(keyword) }
Example 67
Source File: SparkSQLExprMapperTest.scala From morpheus with Apache License 2.0 | 5 votes |
package org.opencypher.morpheus.impl import java.util.Collections import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.{IntegerType, StructField, StructType} import org.opencypher.morpheus.api.value.MorpheusElement._ import org.opencypher.morpheus.impl.ExprEval._ import org.opencypher.morpheus.impl.SparkSQLExprMapper._ import org.opencypher.morpheus.testing.fixture.SparkSessionFixture import org.opencypher.okapi.api.types.CTInteger import org.opencypher.okapi.api.value.CypherValue.CypherMap import org.opencypher.okapi.ir.api.expr._ import org.opencypher.okapi.relational.impl.table.RecordHeader import org.opencypher.okapi.testing.BaseTestSuite import scala.language.implicitConversions class SparkSQLExprMapperTest extends BaseTestSuite with SparkSessionFixture { val vA: Var = Var("a")(CTInteger) val vB: Var = Var("b")(CTInteger) val header: RecordHeader = RecordHeader.from(vA, vB) it("converts prefix id expressions") { val id = 257L val prefix = 2.toByte val expr = PrefixId(ToId(IntegerLit(id)), prefix) expr.eval.asInstanceOf[Array[_]].toList should equal(prefix :: id.encodeAsMorpheusId.toList) } it("converts a CypherInteger to an ID") { val id = 257L val expr = ToId(IntegerLit(id)) expr.eval.asInstanceOf[Array[_]].toList should equal(id.encodeAsMorpheusId.toList) } it("converts a CypherInteger to an ID and prefixes it") { val id = 257L val prefix = 2.toByte val expr = PrefixId(ToId(IntegerLit(id)), prefix) expr.eval.asInstanceOf[Array[_]].toList should equal(prefix :: id.encodeAsMorpheusId.toList) } it("converts a CypherInteger literal") { val id = 257L val expr = IntegerLit(id) expr.eval.asInstanceOf[Long] should equal(id) } private def convert(expr: Expr, header: RecordHeader = header): Column = { expr.asSparkSQLExpr(header, df, CypherMap.empty) } val df: DataFrame = sparkSession.createDataFrame( Collections.emptyList[Row](), StructType(Seq(StructField(header.column(vA), IntegerType), StructField(header.column(vB), IntegerType)))) implicit def extractRecordHeaderFromResult[T](tuple: (RecordHeader, T)): RecordHeader = tuple._1 } object ExprEval { implicit class ExprOps(val expr: Expr) extends AnyVal { def eval(implicit spark: SparkSession): Any = { val df = spark.createDataFrame( Collections.emptyList[Row](), StructType(Seq.empty)) expr.asSparkSQLExpr(RecordHeader.empty, df, CypherMap.empty).expr.eval(InternalRow.empty) } } }
Example 68
Source File: IotHubPartitionSource.scala From toketi-kafka-connect-iothub with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package com.microsoft.azure.iot.kafka.connect.source import java.util.{Collections, Map} import com.typesafe.scalalogging.LazyLogging import org.apache.kafka.connect.data.Struct import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.source.SourceRecord import scala.collection.mutable.ListBuffer import scala.util.control.NonFatal class IotHubPartitionSource(val dataReceiver: DataReceiver, val partition: String, val topic: String, val batchSize: Int, val eventHubName: String, val sourcePartition: Map[String, String]) extends LazyLogging with JsonSerialization { def getRecords: List[SourceRecord] = { logger.debug(s"Polling for data from eventHub $eventHubName partition $partition") val list = ListBuffer.empty[SourceRecord] try { val messages: Iterable[IotMessage] = this.dataReceiver.receiveData(batchSize) if (messages.isEmpty) { logger.debug(s"Finished processing all messages from eventHub $eventHubName " + s"partition ${this.partition}") } else { logger.debug(s"Received ${messages.size} messages from eventHub $eventHubName " + s"partition ${this.partition} (requested $batchSize batch)") for (msg: IotMessage <- messages) { val kafkaMessage: Struct = IotMessageConverter.getIotMessageStruct(msg) val sourceOffset = Collections.singletonMap("EventHubOffset", kafkaMessage.getString(IotMessageConverter.offsetKey)) val sourceRecord = new SourceRecord(sourcePartition, sourceOffset, this.topic, kafkaMessage.schema(), kafkaMessage) list += sourceRecord } } } catch { case NonFatal(e) => val errorMsg = s"Error while getting SourceRecords for eventHub $eventHubName " + s"partition $partition. Exception - ${e.toString} Stack trace - ${e.printStackTrace()}" logger.error(errorMsg) throw new ConnectException(errorMsg, e) } logger.debug(s"Obtained ${list.length} SourceRecords from IotHub") list.toList } }
Example 69
Source File: S3ObjectUploader.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.tools.neptune.export import java.io._ import java.util import java.util.concurrent.{Executors, TimeoutException} import java.util.stream.Collectors import java.util.{Collections, Vector} import com.amazonaws.auth.profile.ProfileCredentialsProvider import com.amazonaws.services.s3.AmazonS3ClientBuilder import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest} import com.amazonaws.{AmazonServiceException, ClientConfiguration, Protocol, SdkClientException} import org.apache.commons.io.{FileUtils, IOUtils} import org.slf4j.LoggerFactory import scala.concurrent.{Await, ExecutionContext, Future} import scala.concurrent.duration.{FiniteDuration, _} object S3ObjectUploader{ val executor = Executors.newFixedThreadPool(1) implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.fromExecutor(executor) protected lazy val logger = LoggerFactory.getLogger("s3_uploader") def init(proxyHost:Option[String], proxyPort:Option[Int]) = { val clientRegion = "us-east-1" val config = new ClientConfiguration config.setProtocol(Protocol.HTTPS) proxyHost.foreach(host => config.setProxyHost(host)) proxyPort.foreach(port => config.setProxyPort(port)) val s3Client = AmazonS3ClientBuilder.standard() .withRegion(clientRegion) .withClientConfiguration(config) .withCredentials(new ProfileCredentialsProvider()) .build() s3Client } def persistChunkToS3Bucket(chunkData:String, fileName:String, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String) = { try{ init(proxyHost, proxyPort).putObject(s3Directory, fileName, chunkData) } catch { case e: AmazonServiceException => e.printStackTrace() throw e case e: SdkClientException => e.printStackTrace() throw e } } def persistChunkToS3Bucket(tmpFile:File, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String, retryCount:Int = 3):Unit = { try{ val s3UploadTask = Future{init(proxyHost, proxyPort).putObject(s3Directory, tmpFile.getName, tmpFile)}(ec) Await.result(s3UploadTask, 5.minutes) tmpFile.delete() } catch { case e:TimeoutException => if(retryCount > 0) { logger.error("S3 upload task run more than 5 minutes..Going to retry") persistChunkToS3Bucket(tmpFile, proxyHost, proxyPort, s3Directory, retryCount-1) } else{ throw new Exception( "S3 upload task duration was more than 5 minutes") } case e: AmazonServiceException => e.printStackTrace() throw e case e: SdkClientException => e.printStackTrace() throw e } } }
Example 70
Source File: BasicConsumerExample.scala From kafka_training with Apache License 2.0 | 5 votes |
package com.malaska.kafka.training import java.util import java.util.{Collections, Properties} import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRebalanceListener, ConsumerRecord, KafkaConsumer} import org.apache.kafka.common.TopicPartition object BasicConsumerExample { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val topic = args(2) println("Setting up parameters") val props = new Properties() props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer"); props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true"); props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000"); props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000"); props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"); props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"); println("Creating Consumer") val consumer = new KafkaConsumer[String,String](props) val listener = new RebalanceListener consumer.subscribe(Collections.singletonList(topic), listener) println("Starting Consumer") while (true) { val records = consumer.poll(1000) val it = records.iterator() while (it.hasNext) { val record = it.next() println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset()) } } } } class RebalanceListener extends ConsumerRebalanceListener { override def onPartitionsAssigned(collection: util.Collection[TopicPartition]): Unit = { print("Assigned Partitions:") val it = collection.iterator() while (it.hasNext) { print(it.next().partition() + ",") } println } override def onPartitionsRevoked(collection: util.Collection[TopicPartition]): Unit = { print("Revoked Partitions:") val it = collection.iterator() while (it.hasNext) { print(it.next().partition() + ",") } println } }
Example 71
Source File: LatencyConsumerExample.scala From kafka_training with Apache License 2.0 | 5 votes |
package com.malaska.kafka.training import java.util import java.util.{Collections, Properties} import net.liftweb.json.DefaultFormats import net.liftweb.json.Serialization.read import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer} import scala.collection.mutable object LatencyConsumerExample { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val topic = args(2) println("Setting up parameters") val props = new Properties() props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer") props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true") props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000") props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000") props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer") props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer") println("Creating Consumer") val consumer = new KafkaConsumer[String,String](props) consumer.subscribe(Collections.singletonList(topic)) implicit val formats = DefaultFormats var maxLatency = 0l var minLatency = 100000l var latencyN = 0f var latencyCount = 0l val lastNLatencies = new mutable.MutableList[Long] println("Starting Consumer") while (true) { val records = consumer.poll(1000) val it = records.iterator() while (it.hasNext) { val record = it.next() val exampleMessage = read[ExampleMessage](record.value()) val latency = System.currentTimeMillis() - exampleMessage.sentTime maxLatency = Math.max(latency, maxLatency) minLatency = Math.min(latency, minLatency) latencyN += latency latencyCount += 1 lastNLatencies += latency if (latencyCount % 10 == 0) { println("MessageCount:" + latencyCount + ",MaxLatency:" + maxLatency + ",MinLatency:" + minLatency + ",AverageLatency:" + (latencyN/latencyCount) + ",LastN:(" + lastNLatencies.mkString(",") + ")") lastNLatencies.clear() } } } } }
Example 72
Source File: DruidQueryExecutionMetric.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sparklinedata.execution.metrics import java.util.{ArrayList, Collections} import org.apache.spark.util.AccumulatorV2 import org.sparklinedata.druid.metadata.{DruidQueryExecutionView, DruidQueryHistory} class DruidQueryExecutionMetric extends AccumulatorV2[DruidQueryExecutionView, java.util.List[DruidQueryExecutionView]] { import scala.collection.JavaConverters._ private val _list: java.util.List[DruidQueryExecutionView] = Collections.synchronizedList(new ArrayList[DruidQueryExecutionView]()) private def getList : java.util.List[DruidQueryExecutionView] = { if (isAtDriverSide) DruidQueryHistory.getHistory.asJava else _list } override def isZero: Boolean = { _list.isEmpty } override def copy(): DruidQueryExecutionMetric = { val newAcc = new DruidQueryExecutionMetric newAcc._list.addAll(_list) newAcc } override def reset(): Unit = { _list.clear() } override def add(v: DruidQueryExecutionView): Unit = { if (isAtDriverSide) DruidQueryHistory.add(v) else _list.add(v) } private def addAll(v: java.util.List[DruidQueryExecutionView]): Unit = { v.asScala.foreach(add(_)) } override def merge(other: AccumulatorV2[DruidQueryExecutionView, java.util.List[DruidQueryExecutionView]]): Unit = other match { case o: DruidQueryExecutionMetric => { addAll(o._list) } case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def value = _list.synchronized { java.util.Collections.unmodifiableList(getList) } private[spark] def setValue(newValue: java.util.List[DruidQueryExecutionView]): Unit = { reset() addAll(newValue) } }
Example 73
Source File: GSetImpl.scala From cloudstate with Apache License 2.0 | 5 votes |
package io.cloudstate.javasupport.impl.crdt import java.util import java.util.Collections import io.cloudstate.javasupport.crdt.GSet import io.cloudstate.javasupport.impl.AnySupport import io.cloudstate.protocol.crdt.{CrdtDelta, CrdtState, GSetDelta, GSetState} import com.google.protobuf.any.{Any => ScalaPbAny} import scala.collection.JavaConverters._ private[crdt] final class GSetImpl[T](anySupport: AnySupport) extends util.AbstractSet[T] with InternalCrdt with GSet[T] { override final val name = "GSet" private val value = new util.HashSet[T]() private val added = new util.HashSet[ScalaPbAny]() override def size(): Int = value.size() override def isEmpty: Boolean = super.isEmpty override def contains(o: Any): Boolean = value.contains(o) override def add(e: T): Boolean = if (value.contains(e)) { false } else { added.add(anySupport.encodeScala(e)) value.add(e) } override def remove(o: Any): Boolean = throw new UnsupportedOperationException("Cannot remove elements from a GSet") override def iterator(): util.Iterator[T] = Collections.unmodifiableSet(value).iterator() override def hasDelta: Boolean = !added.isEmpty override def delta: Option[CrdtDelta.Delta] = if (hasDelta) { Some(CrdtDelta.Delta.Gset(GSetDelta(added.asScala.toVector))) } else None override def resetDelta(): Unit = added.clear() override def state: CrdtState.State = CrdtState.State.Gset(GSetState(value.asScala.toSeq.map(anySupport.encodeScala))) override val applyDelta = { case CrdtDelta.Delta.Gset(GSetDelta(added, _)) => value.addAll(added.map(e => anySupport.decode(e).asInstanceOf[T]).asJava) } override val applyState = { case CrdtState.State.Gset(GSetState(value, _)) => this.value.clear() this.value.addAll(value.map(e => anySupport.decode(e).asInstanceOf[T]).asJava) } override def toString = s"GSet(${value.asScala.mkString(",")})" }
Example 74
Source File: ImageInputFormat.scala From flink-tensorflow with Apache License 2.0 | 5 votes |
package org.apache.flink.contrib.tensorflow.examples.inception import java.io.IOException import java.util.Collections import com.twitter.bijection.Conversion._ import org.apache.flink.api.common.io.GlobFilePathFilter import org.apache.flink.configuration.Configuration import org.apache.flink.contrib.tensorflow._ import org.apache.flink.contrib.tensorflow.common.functions.util.ModelUtils import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat._ import org.apache.flink.core.fs.{FSDataInputStream, Path} import org.slf4j.{Logger, LoggerFactory} import org.tensorflow.contrib.scala.ByteStrings._ import resource._ import scala.collection.JavaConverters._ override def readRecord( reuse: (String,ImageTensorValue), filePath: Path, fileStream: FSDataInputStream, fileLength: Long): (String,ImageTensorValue) = { if(fileLength > Int.MaxValue) { throw new IllegalArgumentException("the file is too large to be fully read") } val imageData = readFully(fileStream, new Array[Byte](fileLength.toInt), 0, fileLength.toInt).asByteString[ImageFile] val imageTensor: ImageTensorValue = managed(imageData.as[ImageFileTensor]) .flatMap(x => model.normalize(x)) .acquireAndGet(_.toValue) (filePath.getName, imageTensor) } } object ImageInputFormat { def apply(): ImageInputFormat = new ImageInputFormat }
Example 75
Source File: Generator.scala From play-soap with Apache License 2.0 | 5 votes |
package play.soap.docs import java.io.File import java.util.Collections import org.apache.commons.io.FileUtils import org.pegdown.ast.WikiLinkNode import org.pegdown.VerbatimSerializer import org.pegdown.LinkRenderer import org.pegdown.Extensions import org.pegdown.PegDownProcessor import play.doc.PrettifyVerbatimSerializer import play.twirl.api.Html object Generator extends App { val outDir = new File(args(0)) val inDir = new File(args(1)) val inPages = args.drop(2) val parser = new PegDownProcessor(Extensions.ALL) val linkRenderer = new LinkRenderer { import LinkRenderer.Rendering override def render(node: WikiLinkNode) = { node.getText.split("\\|", 2) match { case Array(name) => new Rendering(name + ".html", name) case Array(title, name) => new Rendering(name + ".html", title) case _ => new Rendering(node.getText + ".html", node.getText) } } } val verbatimSerializer = Collections.singletonMap[String, VerbatimSerializer](VerbatimSerializer.DEFAULT, PrettifyVerbatimSerializer) val nav = Seq( "Home" -> "Home", "Using sbt WSDL" -> "SbtWsdl", "Using the Play SOAP client" -> "PlaySoapClient", "Using JAX WS Handlers" -> "Handlers", "Security" -> "Security" ) val titleMap = nav.map(t => t._2 -> t._1).toMap // Ensure target directory exists outDir.mkdirs() inPages.foreach { name => val inFile = new File(inDir, name + ".md") val markdown = FileUtils.readFileToString(inFile) val htmlSnippet = parser.markdownToHtml(markdown, linkRenderer, verbatimSerializer) val title = titleMap.get(name) val htmlPage = html.template(title, nav)(Html(htmlSnippet)) FileUtils.writeStringToFile(new File(outDir, name + ".html"), htmlPage.body) } }
Example 76
Source File: ScalaHighlightPrimaryConstructorExpressionsHandler.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package org.jetbrains.plugins.scala.highlighter.usages import java.util import java.util.Collections import com.intellij.codeInsight.highlighting.HighlightUsagesHandlerBase import com.intellij.openapi.editor.Editor import com.intellij.psi.{PsiElement, PsiFile} import com.intellij.util.Consumer import org.jetbrains.plugins.scala.lang.psi.api.statements.{ScPatternDefinition, ScVariableDefinition} import org.jetbrains.plugins.scala.lang.psi.api.toplevel.typedef.ScTemplateDefinition class ScalaHighlightPrimaryConstructorExpressionsHandler(templateDef: ScTemplateDefinition, editor: Editor, file: PsiFile, keyword: PsiElement) extends HighlightUsagesHandlerBase[PsiElement](editor, file) { def computeUsages(targets: util.List[PsiElement]) { val eb = templateDef.extendsBlock val varAndValDefsExprs = eb.members.flatMap { case p: ScPatternDefinition => p.expr.toList // we include lazy vals, perhaps they could be excluded. case v: ScVariableDefinition => v.expr.toList case _ => Seq.empty } val usages = varAndValDefsExprs ++ eb.templateBody.toList.flatMap(_.exprs) :+ keyword usages.map(_.getTextRange).foreach(myReadUsages.add) } def selectTargets(targets: util.List[PsiElement], selectionConsumer: Consumer[util.List[PsiElement]]) { selectionConsumer.consume(targets) } def getTargets: util.List[PsiElement] = Collections.singletonList(keyword) }
Example 77
Source File: ScalaHighlightExprResultHandler.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package org.jetbrains.plugins.scala.highlighter.usages import java.util import java.util.Collections import com.intellij.codeInsight.highlighting.HighlightUsagesHandlerBase import com.intellij.openapi.editor.Editor import com.intellij.psi.{PsiElement, PsiFile} import com.intellij.util.Consumer import org.jetbrains.plugins.scala.lang.psi.api.expr.ScExpression class ScalaHighlightExprResultHandler(expr: ScExpression, editor: Editor, file: PsiFile, keyword: PsiElement) extends HighlightUsagesHandlerBase[PsiElement](editor, file) { def computeUsages(targets: util.List[PsiElement]) { val returns = expr.calculateReturns() :+ keyword returns.map(_.getTextRange).foreach(myReadUsages.add) } def selectTargets(targets: util.List[PsiElement], selectionConsumer: Consumer[util.List[PsiElement]]) { selectionConsumer.consume(targets) } def getTargets: util.List[PsiElement] = Collections.singletonList(keyword) }
Example 78
Source File: DeduplicatedCollectorRegistry.scala From spark-metrics with Apache License 2.0 | 5 votes |
package com.banzaicloud.spark.metrics import java.{lang, util} import java.util.Collections import io.prometheus.client.{Collector, CollectorRegistry} import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging import scala.util.{Failure, Try} class DeduplicatedCollectorRegistry(parent: CollectorRegistry = CollectorRegistry.defaultRegistry) extends CollectorRegistry with Logging { private type MetricsEnum = util.Enumeration[Collector.MetricFamilySamples] override def register(m: Collector): Unit = { // in case collectors with the same name are registered multiple times keep the first one Try(parent.register(m)) match { case Failure(ex) if ex.getMessage.startsWith("Collector already registered that provides name:") => // TODO: find a more robust solution for checking if there is already a collector registered for a specific metric case Failure(ex) => throw ex case _ => } } override def unregister(m: Collector): Unit = parent.unregister(m) override def clear(): Unit = parent.clear() override def getSampleValue(name: String, labelNames: Array[String], labelValues: Array[String]): lang.Double = { parent.getSampleValue(name, labelNames, labelValues) } override def getSampleValue(name: String): lang.Double = parent.getSampleValue(name) override def metricFamilySamples(): MetricsEnum = { deduplicate(parent.metricFamilySamples()) } override def filteredMetricFamilySamples(includedNames: util.Set[String]): MetricsEnum = { deduplicate(parent.filteredMetricFamilySamples(includedNames)) } private def deduplicate(source: MetricsEnum): MetricsEnum = { val metrics = source.asScala.toSeq val deduplicated = metrics .groupBy(f => (f.name, f.`type`)) .flatMap { case (_, single) if single.lengthCompare(2) < 0 => single case ((name, metricType), duplicates) => logDebug(s"Found ${duplicates.length} metrics with the same name '${name}' and type ${metricType}") duplicates.lastOption } .toList .asJava Collections.enumeration(deduplicated) } }
Example 79
Source File: EventLoopExamples.scala From netty-in-action-scala with Apache License 2.0 | 5 votes |
package nia.chapter7 import java.util.Collections import collection.JavaConverters.asScalaBufferConverter def executeTaskInEventLoop(): Unit = { val terminated = true //... while (!terminated) { //阻塞,直到有事件已经就绪可被运行 val readyEvents = blockUntilEventsReady for (ev: Runnable ← readyEvents.asScala) { //循环遍历,并处理所有的事件 ev.run() } } } private def blockUntilEventsReady = Collections.singletonList[Runnable](new Runnable() { override def run(): Unit = { try { Thread.sleep(1000) } catch { case e: InterruptedException ⇒ e.printStackTrace() } } }) }
Example 80
Source File: ByKeyAdditiveAccumulator.scala From spark-records with Apache License 2.0 | 5 votes |
package com.swoop.spark.accumulators import java.util.Collections import org.apache.spark.util.AccumulatorV2 override lazy val value: java.util.Map[A, B] = Collections.synchronizedMap(_map) // Delaying full synchronization allows merge() to be faster as it uses unsafeAdd() override def isZero: Boolean = _map.isEmpty override def copyAndReset(): ByKeyAdditiveAccumulator[A, B] = new ByKeyAdditiveAccumulator() override def copy(): ByKeyAdditiveAccumulator[A, B] = { val newAcc = new ByKeyAdditiveAccumulator[A, B] _map.synchronized { newAcc._map.putAll(_map) } newAcc } override def reset(): Unit = _map.clear() override def add(v: (A, B)): Unit = _map.synchronized { unsafeAdd(v._1, v._2) } override def merge(other: AccumulatorV2[(A, B), java.util.Map[A, B]]): Unit = other match { case o: ByKeyAdditiveAccumulator[A, B] => _map.synchronized { other.synchronized { import scala.collection.JavaConversions._ o._map.foreach((unsafeAdd _).tupled) } } case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } private def unsafeAdd(k: A, v: B) = { val num = implicitly[Numeric[B]] val existing = if (_map.containsKey(k)) _map.get(k) else num.zero _map.put(k, num.plus(existing, v)) } }
Example 81
Source File: KafkaConsumer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.wmconsumer import java.io.File import java.time.Duration import java.util.Collections import java.util.ConcurrentModificationException import java.util.Properties import org.apache.kafka.clients.consumer.{KafkaConsumer => ApacheKafkaConsumer} import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser import org.clulab.wm.wmexchanger.utils.FileUtils import org.clulab.wm.wmexchanger.utils.FileEditor import org.json4s._ import org.slf4j.Logger import org.slf4j.LoggerFactory class KafkaConsumer(properties: Properties, closeDuration: Int, topic: String, outputDir: String) { import KafkaConsumer._ implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats logger.info("Opening consumer...") protected val consumer: ApacheKafkaConsumer[String, String] = { val consumer = new ApacheKafkaConsumer[String, String](properties) consumer.subscribe(Collections.singletonList(topic)) consumer } def poll(duration: Int): Unit = { val records = consumer.poll(Duration.ofSeconds(duration)) logger.info(s"Polling ${records.count} records...") records.forEach { record => val key = record.key val value = record.value // Imply an extension on the file so that it can be replaced. val file = FileEditor(new File(key + ".")).setDir(outputDir).setExt("json").get logger.info("Consuming " + file.getName) FileUtils.printWriterFromFile(file).autoClose { printWriter => printWriter.print(value) } } } def close(): Unit = { logger.info("Closing consumer...") try { consumer.close(Duration.ofSeconds(closeDuration)) } catch { case _: ConcurrentModificationException => // KafkaConsumer is not safe for multi-threaded access } } } object KafkaConsumer { val logger: Logger = LoggerFactory.getLogger(this.getClass) }
Example 82
Source File: FiltersSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 83
Source File: FlumeTestUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.flume import java.net.{InetSocketAddress, ServerSocket} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.util.{List => JList} import java.util.Collections import scala.collection.JavaConverters._ import org.apache.avro.ipc.NettyTransceiver import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.commons.lang3.RandomUtils import org.apache.flume.source.avro import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol} import org.jboss.netty.channel.ChannelPipeline import org.jboss.netty.channel.socket.SocketChannel import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory import org.jboss.netty.handler.codec.compression.{ZlibDecoder, ZlibEncoder} import org.apache.spark.util.Utils import org.apache.spark.SparkConf private class CompressionChannelFactory(compressionLevel: Int) extends NioClientSocketChannelFactory { override def newChannel(pipeline: ChannelPipeline): SocketChannel = { val encoder = new ZlibEncoder(compressionLevel) pipeline.addFirst("deflater", encoder) pipeline.addFirst("inflater", new ZlibDecoder()) super.newChannel(pipeline) } } }
Example 84
Source File: Utils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster.mesos import java.util.Collections import scala.collection.JavaConverters._ import org.apache.mesos.Protos._ import org.apache.mesos.Protos.Value.{Range => MesosRange, Ranges, Scalar} import org.apache.mesos.SchedulerDriver import org.mockito.{ArgumentCaptor, Matchers} import org.mockito.Mockito._ object Utils { def createOffer( offerId: String, slaveId: String, mem: Int, cpus: Int, ports: Option[(Long, Long)] = None, gpus: Int = 0): Offer = { val builder = Offer.newBuilder() builder.addResourcesBuilder() .setName("mem") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(mem)) builder.addResourcesBuilder() .setName("cpus") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(cpus)) ports.foreach { resourcePorts => builder.addResourcesBuilder() .setName("ports") .setType(Value.Type.RANGES) .setRanges(Ranges.newBuilder().addRange(MesosRange.newBuilder() .setBegin(resourcePorts._1).setEnd(resourcePorts._2).build())) } if (gpus > 0) { builder.addResourcesBuilder() .setName("gpus") .setType(Value.Type.SCALAR) .setScalar(Scalar.newBuilder().setValue(gpus)) } builder.setId(createOfferId(offerId)) .setFrameworkId(FrameworkID.newBuilder() .setValue("f1")) .setSlaveId(SlaveID.newBuilder().setValue(slaveId)) .setHostname(s"host${slaveId}") .build() } def verifyTaskLaunched(driver: SchedulerDriver, offerId: String): List[TaskInfo] = { val captor = ArgumentCaptor.forClass(classOf[java.util.Collection[TaskInfo]]) verify(driver, times(1)).launchTasks( Matchers.eq(Collections.singleton(createOfferId(offerId))), captor.capture()) captor.getValue.asScala.toList } def createOfferId(offerId: String): OfferID = { OfferID.newBuilder().setValue(offerId).build() } def createSlaveId(slaveId: String): SlaveID = { SlaveID.newBuilder().setValue(slaveId).build() } def createExecutorId(executorId: String): ExecutorID = { ExecutorID.newBuilder().setValue(executorId).build() } def createTaskId(taskId: String): TaskID = { TaskID.newBuilder().setValue(taskId).build() } }
Example 85
Source File: ServiceSupport.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.it import java.util.Collections import java.util.function.{ Function => JFunction } import akka.stream.Materializer import akka.stream.scaladsl.Source import org.scalatest.Inside import play.api.Application import play.api.Configuration import play.api.Environment import play.inject.guice.GuiceApplicationBuilder import scala.concurrent.Await import scala.concurrent.duration._ import scala.reflect.ClassTag import akka.japi.function.Procedure import com.google.inject.Binder import com.google.inject.Module import com.google.inject.TypeLiteral import com.lightbend.lagom.javadsl.testkit.ServiceTest import com.lightbend.lagom.javadsl.testkit.ServiceTest.TestServer import play.api.routing.Router import java.util import com.lightbend.lagom.internal.testkit.EmptyAdditionalRoutersModule import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpecLike sealed trait HttpBackend { final val provider: String = s"play.core.server.${codeName}ServerProvider" val codeName: String } case object AkkaHttp extends HttpBackend { val codeName = "AkkaHttp" } case object Netty extends HttpBackend { val codeName = "Netty" } trait ServiceSupport extends AnyWordSpecLike with Matchers with Inside { def withServer( configureBuilder: GuiceApplicationBuilder => GuiceApplicationBuilder )(block: Application => Unit)(implicit httpBackend: HttpBackend): Unit = { val jConfigureBuilder = new JFunction[GuiceApplicationBuilder, GuiceApplicationBuilder] { override def apply(b: GuiceApplicationBuilder): GuiceApplicationBuilder = { configureBuilder(b) .overrides(EmptyAdditionalRoutersModule) .configure("play.server.provider", httpBackend.provider) } } val jBlock = new Procedure[TestServer] { override def apply(server: TestServer): Unit = { block(server.app.asScala()) } } val setup = ServiceTest.defaultSetup.configureBuilder(jConfigureBuilder).withCluster(false) ServiceTest.withServer(setup, jBlock) } def withClient[T: ClassTag]( configureBuilder: GuiceApplicationBuilder => GuiceApplicationBuilder )(block: Application => T => Unit)(implicit httpBackend: HttpBackend): Unit = { withServer(configureBuilder) { application => val client = application.injector.instanceOf[T] block(application)(client) } } implicit def materializer(implicit app: Application): Materializer = app.materializer def consume[T](source: Source[T, _])(implicit mat: Materializer): List[T] = { Await.result(source.runFold(List.empty[T])((list, t) => t :: list), 10.seconds).reverse } }
Example 86
Source File: PerfSourceTask.scala From ohara with Apache License 2.0 | 4 votes |
package oharastream.ohara.connector.perf import java.util.Collections import oharastream.ohara.common.annotations.VisibleForTesting import oharastream.ohara.common.data.{Cell, Column, DataType, Row} import oharastream.ohara.common.setting.TopicKey import oharastream.ohara.common.util.{ByteUtils, CommonUtils} import oharastream.ohara.kafka.connector.{RowSourceRecord, RowSourceTask, TaskSetting} import scala.jdk.CollectionConverters._ class PerfSourceTask extends RowSourceTask { private[this] var props: PerfSourceProps = _ private[this] var topics: Set[TopicKey] = _ @VisibleForTesting private[perf] var schema: Seq[Column] = _ private[this] var lastPoll: Long = -1 private[this] var records: java.util.List[RowSourceRecord] = java.util.List.of() override protected def run(settings: TaskSetting): Unit = { this.props = PerfSourceProps(settings) this.topics = settings.topicKeys().asScala.toSet this.schema = settings.columns.asScala.toSeq if (schema.isEmpty) schema = DEFAULT_SCHEMA val row = Row.of( schema.sortBy(_.order).map { c => Cell.of( c.newName, c.dataType match { case DataType.BOOLEAN => false case DataType.BYTE => ByteUtils.toBytes(CommonUtils.current()).head case DataType.BYTES => new Array[Byte](props.cellSize) case DataType.SHORT => CommonUtils.current().toShort case DataType.INT => CommonUtils.current().toInt case DataType.LONG => CommonUtils.current() case DataType.FLOAT => CommonUtils.current().toFloat case DataType.DOUBLE => CommonUtils.current().toDouble case DataType.STRING => CommonUtils.randomString(props.cellSize) case _ => CommonUtils.current() } ) }: _* ) records = Collections.unmodifiableList( (0 until props.batch).flatMap(_ => topics.map(RowSourceRecord.builder().row(row).topicKey(_).build())).asJava ) } override protected def terminate(): Unit = {} override protected def pollRecords(): java.util.List[RowSourceRecord] = { val current = CommonUtils.current() if (current - lastPoll > props.freq.toMillis) { lastPoll = current records } else java.util.List.of() } }