scala.collection.immutable.HashSet Scala Examples
The following examples show how to use scala.collection.immutable.HashSet.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: CollectionConvertersSuite.scala From pureconfig with Mozilla Public License 2.0 | 5 votes |
package pureconfig import scala.collection.JavaConverters._ import scala.collection.immutable.{ HashSet, ListSet, Queue, TreeSet } import com.typesafe.config.{ ConfigFactory, ConfigValueFactory, ConfigValueType } import pureconfig.error.{ ConfigReaderFailures, ConvertFailure, WrongType } class CollectionConvertersSuite extends BaseSuite { implicit override val generatorDrivenConfig = PropertyCheckConfiguration(minSuccessful = 100) behavior of "ConfigConvert" checkArbitrary[HashSet[String]] checkArbitrary[List[Float]] checkRead[List[Int]]( // order of keys maintained ConfigValueFactory.fromMap(Map("2" -> 1, "0" -> 2, "1" -> 3).asJava) -> List(2, 3, 1), ConfigValueFactory.fromMap(Map("3" -> 2, "1" -> 4).asJava) -> List(4, 2), ConfigValueFactory.fromMap(Map("1" -> 1, "a" -> 2).asJava) -> List(1)) checkFailures[List[Int]]( ConfigValueFactory.fromMap(Map("b" -> 1, "a" -> 2).asJava) -> ConfigReaderFailures( ConvertFailure(WrongType(ConfigValueType.OBJECT, Set(ConfigValueType.LIST)), emptyConfigOrigin, "")), ConfigValueFactory.fromMap(Map().asJava) -> ConfigReaderFailures( ConvertFailure(WrongType(ConfigValueType.OBJECT, Set(ConfigValueType.LIST)), emptyConfigOrigin, ""))) checkArbitrary[ListSet[Int]] checkArbitrary[Map[String, Int]] checkFailures[Map[String, Int]]( // nested map should fail ConfigFactory.parseString("conf.a=1").root() -> ConfigReaderFailures( ConvertFailure(WrongType(ConfigValueType.OBJECT, Set(ConfigValueType.NUMBER)), stringConfigOrigin(1), "conf")), // wrong value type should fail ConfigFactory.parseString("{ a=b }").root() -> ConfigReaderFailures( ConvertFailure(WrongType(ConfigValueType.STRING, Set(ConfigValueType.NUMBER)), stringConfigOrigin(1), "a"))) checkArbitrary[Queue[Boolean]] checkArbitrary[Set[Double]] checkRead[Set[Int]]( ConfigValueFactory.fromMap(Map("1" -> 4, "2" -> 5, "3" -> 6).asJava) -> Set(4, 5, 6)) checkArbitrary[Stream[String]] checkArbitrary[TreeSet[Int]] checkArbitrary[Vector[Short]] checkArbitrary[Option[Int]] checkArbitrary[Array[Int]] }
Example 2
Source File: TestUtils.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.nio.ByteBuffer import java.io.{IOException, File} import java.nio.ByteBuffer import java.util import org.apache.avro.generic.GenericData import scala.collection.immutable.HashSet import scala.collection.mutable.ArrayBuffer import scala.util.Random import com.google.common.io.Files import org.apache.spark.sql.SQLContext import scala.util.Random object TestUtils { def generateRandomByteBuffer(rand: Random, size: Int): ByteBuffer = { val bb = ByteBuffer.allocate(size) val arrayOfBytes = new Array[Byte](size) rand.nextBytes(arrayOfBytes) bb.put(arrayOfBytes) } def generateRandomMap(rand: Random, size: Int): java.util.Map[String, Int] = { val jMap = new util.HashMap[String, Int]() for (i <- 0 until size) { jMap.put(rand.nextString(5), i) } jMap } def generateRandomArray(rand: Random, size: Int): util.ArrayList[Boolean] = { val vec = new util.ArrayList[Boolean]() for (i <- 0 until size) { vec.add(rand.nextBoolean()) } vec } }
Example 3
Source File: ColumnarTestUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, GenericMutableRow} import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayBasedMapData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericMutableRow = { val row = new GenericMutableRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericMutableRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericMutableRow(1) row(0) = value row } (values, rows) } }
Example 4
Source File: IpRangeTest.scala From ipaddr with Apache License 2.0 | 5 votes |
package com.risksense.ipaddr import scala.collection.immutable.HashSet // scalastyle:off multiple.string.literals magic.number class IpRangeTest extends UnitSpec { private val addr1 = "192.168.1.200" private val addr2 = "192.168.1.230" private val range = IpRange(addr1, addr2) private val range2 = IpRange("192.168.1.210", "192.168.1.220") private val range3 = IpRange("192.168.1.100", "192.168.1.210") private val range4 = IpRange("192.168.1.220", "192.168.1.240") "Creating an IpRange" should "result in failure if addresses are invalid" in { // first address invalid an[IpaddrException] should be thrownBy IpRange("1.2.300.20", "1.2.3.2") // second address invalid an[IpaddrException] should be thrownBy IpRange("192.168.1.200", "192.168.1.256") // first address > second address an[IpaddrException] should be thrownBy IpRange("192.168.1.230", "192.168.1.229") } it should "succeed if addresses are valid" in { IpRange("10.2.10.12", "10.2.10.15") shouldBe a[IpRange] IpRange("10.2.10.230", "10.2.10.230") shouldBe a[IpRange] } "An IpRange object" should "perform all range operations" in { range.toString() should be(addr1 + "-" + addr2) range.first should be(3232235976L) range.last should be(3232236006L) range.key should be((4, 3232235976L, 3232236006L)) range.sortKey should be((4, 3232235976L, 27)) } it should "perform contains operation" in { // Check range edge addresses range.contains(addr1) should be(true) range.contains(addr2) should be(true) range.contains(range2) should be(true) range.contains(range3) should be(false) range.contains(range4) should be(false) val net = IpNetwork("10.4.10.100/30") val rightRange = "10.4.10.105" val r1 = IpRange("10.4.10.101", "10.4.10.102") val r2 = IpRange("10.4.10.99", rightRange) val r3 = IpRange("10.4.10.100", rightRange) val r4 = IpRange("10.4.10.101", rightRange) r1.contains(net) should be(false) r2.contains(net) should be(true) r3.contains(net) should be(true) r4.contains(net) should be(false) an[IpaddrException] should be thrownBy r4.contains("1.2.3") // address is bad } it should "perform cidrs operation" in { val net1 = IpNetwork("192.168.1.200/29") val net2 = IpNetwork("192.168.1.208/28") val net3 = IpNetwork("192.168.1.224/30") val net4 = IpNetwork("192.168.1.228/31") val net5 = IpNetwork("192.168.1.230/32") val netList = List(net1, net2, net3, net4, net5) range.cidrs should be(netList) } it should "check for equality" in { val hs = HashSet(range2, range, range3) range should be(IpRange(addr1, addr2)) range == range2 should be(false) range.equals(range) should be(true) range.equals(addr1) should be(false) hs.contains(range) should be(true) hs.contains(range4) should be(false) } }
Example 5
Source File: ColumnarTestUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 6
Source File: TwoSum.scala From coding-interview-questions-scala with Apache License 2.0 | 5 votes |
package org.questions.arrays import scala.annotation.tailrec import scala.collection.immutable.HashSet class TwoSum { def findPairSum(seq: Seq[Int], sum: Int): Option[(Int, Int)] = { @tailrec def inner(seq: Seq[Int], hash: HashSet[Int]): Option[(Int, Int)] = seq match { case Nil => None case head :: tail => val lookingFor = sum - head if (hash.contains(lookingFor)) Some(head -> lookingFor) else inner(tail, hash + head) } inner(seq, HashSet.empty[Int]) } }
Example 7
Source File: ColumnarTestUtils.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericMutableRow import org.apache.spark.sql.types.{DataType, Decimal, AtomicType} import org.apache.spark.unsafe.types.UTF8String //列测试工具 object ColumnarTestUtils { def makeNullRow(length: Int): GenericMutableRow = { val row = new GenericMutableRow(length) (0 until length).foreach(row.setNullAt) row } //产生随机值 def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case DATE => Random.nextInt() case LONG => Random.nextLong() case TIMESTAMP => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case _ => // Using a random one-element map instead of an arbitrary object //使用随机一元映射代替任意对象 Map(Random.nextInt() -> Random.nextString(Random.nextInt(32))) }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } //使唯一随机值 def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericMutableRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } //使唯一值和单值行 def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericMutableRow(1) row(0) = value row } (values, rows) } }
Example 8
Source File: OptimizeInSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ // For implicit conversions import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ class OptimizeInSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, ConstantFolding, BooleanSimplification, OptimizeIn) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) //OptimizedIn测试:当子项未优化为InSet时少于10项 test("OptimizedIn test: In clause not optimized to InSet when less than 10 items") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) comparePlans(optimized, originalQuery) } //优化测试:在优化到InSert的子句中,超过10项 test("OptimizedIn test: In clause optimized to InSet when more than 10 items") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), (1 to 11).map(Literal(_)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(InSet(UnresolvedAttribute("a"), (1 to 11).toSet)) .analyze comparePlans(optimized, correctAnswer) } //OptimizedIn测试:在子句未优化的情况下,过滤器具有属性 test("OptimizedIn test: In clause not optimized in case filter has attributes") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze comparePlans(optimized, correctAnswer) } }
Example 9
Source File: MutableSet.scala From spark1.52 with Apache License 2.0 | 5 votes |
package scalaDemo object MutableSet { def main(args: Array[String]):Unit= { //可变 import scala.collection.mutable.Set val movieSet = Set("Hitch", "Poltergeist") movieSet += "Shrek" println(movieSet) //不可变 import scala.collection.immutable.HashSet val hashSet = HashSet("Tomatoes", "Chilies") println(hashSet + "Coriander") } }
Example 10
Source File: ColumnarTestUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.columnar import java.sql.Timestamp import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericMutableRow import org.apache.spark.sql.types.{UTF8String, DataType, Decimal, AtomicType} object ColumnarTestUtils { def makeNullRow(length: Int): GenericMutableRow = { val row = new GenericMutableRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case STRING => UTF8String(Random.nextString(Random.nextInt(32))) case BOOLEAN => Random.nextBoolean() case BINARY => randomBytes(Random.nextInt(32)) case DATE => Random.nextInt() case TIMESTAMP => val timestamp = new Timestamp(Random.nextLong()) timestamp.setNanos(Random.nextInt(999999999)) timestamp case _ => // Using a random one-element map instead of an arbitrary object Map(Random.nextInt() -> Random.nextString(Random.nextInt(32))) }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_ <: DataType, _], tail: ColumnType[_ <: DataType, _]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[T <: DataType, JvmType]( columnType: ColumnType[T, JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_ <: DataType, _], tail: ColumnType[_ <: DataType, _]*): Row = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Row = { val row = new GenericMutableRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericMutableRow(1) row(0) = value row } (values, rows) } }
Example 11
Source File: OptimizeInSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ // For implicit conversions import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ class OptimizeInSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, ConstantFolding, BooleanSimplification, OptimizeIn) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("OptimizedIn test: In clause optimized to InSet") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(InSet(UnresolvedAttribute("a"), HashSet[Any]() + 1 + 2)) .analyze comparePlans(optimized, correctAnswer) } test("OptimizedIn test: In clause not optimized in case filter has attributes") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze comparePlans(optimized, correctAnswer) } }
Example 12
Source File: ColumnarTestUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 13
Source File: InsertOrdSet.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.lf.data import scala.collection.immutable.{HashSet, Set, Queue} import scala.collection.{SetLike, AbstractSet} import scala.collection.generic.{ ImmutableSetFactory, GenericCompanion, CanBuildFrom, GenericSetTemplate } final class InsertOrdSet[T] private (_items: Queue[T], _hashSet: HashSet[T]) extends AbstractSet[T] with Set[T] with SetLike[T, InsertOrdSet[T]] with GenericSetTemplate[T, InsertOrdSet] with Serializable { override def empty: InsertOrdSet[T] = InsertOrdSet.empty override def size: Int = _hashSet.size def iterator: Iterator[T] = _items.reverseIterator override def contains(elem: T): Boolean = _hashSet.contains(elem) override def +(elem: T): InsertOrdSet[T] = if (_hashSet.contains(elem)) this else new InsertOrdSet( elem +: _items, _hashSet + elem ) override def -(elem: T): InsertOrdSet[T] = new InsertOrdSet( _items.filter(elem2 => elem != elem2), _hashSet - elem ) override def companion: GenericCompanion[InsertOrdSet] = InsertOrdSet } object InsertOrdSet extends ImmutableSetFactory[InsertOrdSet] { private val Empty = new InsertOrdSet(Queue.empty, HashSet.empty) override def empty[T] = Empty.asInstanceOf[InsertOrdSet[T]] def emptyInstance: InsertOrdSet[Any] = empty[Any] def fromSeq[T](s: Seq[T]): InsertOrdSet[T] = new InsertOrdSet(Queue(s.reverse: _*), HashSet(s: _*)) implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, InsertOrdSet[A]] = setCanBuildFrom[A] }
Example 14
Source File: ClusterListener.scala From akka-cluster-load-balancing with MIT License | 5 votes |
package kamkor.actor import scala.collection.immutable.HashSet import scala.concurrent.duration.DurationInt import akka.actor.{ Actor, Props } import akka.cluster.Cluster import akka.cluster.ClusterEvent.MemberUp import akka.cluster.metrics.{ ClusterMetricsChanged, ClusterMetricsExtension, NodeMetrics } import akka.cluster.metrics.StandardMetrics.HeapMemory import kamkor.{ ConsumerApp } import kamkor.metrics.{ ClusterHeapMetrics, MetricsLogger } class ClusterListener(metricsIntervalSeconds: Int) extends Actor { import context.dispatcher context.system.scheduler.schedule( metricsIntervalSeconds.seconds, metricsIntervalSeconds.seconds, self, "logConsumersHeapUse") private[this] val cluster = Cluster(context.system) private[this] val metricsLogger = new MetricsLogger(name = cluster.selfAddress.port.getOrElse(0).toString()) private[this] val clusterHeapMetrics = new ClusterHeapMetrics() private var consumers: Set[String] = HashSet.empty override def preStart(): Unit = { ClusterMetricsExtension(context.system).subscribe(self) cluster.subscribe(self, classOf[MemberUp]) } override def postStop(): Unit = { ClusterMetricsExtension(context.system).unsubscribe(self) Cluster(context.system).unsubscribe(self) } def receive: Receive = { case MemberUp(m) if m.roles.contains(ConsumerApp.clusterRole) => consumers += m.address.hostPort case ClusterMetricsChanged(clusterMetrics) => clusterMetrics .filter(nm => consumers.contains(nm.address.hostPort)) .foreach(updateHeapUse(_)) case "logConsumersHeapUse" => { metricsLogger.log(clusterHeapMetrics.calculateAverages) clusterHeapMetrics.clear() } } private[this] def updateHeapUse(nodeMetrics: NodeMetrics) { nodeMetrics match { case HeapMemory(address, timestamp, used, committed, max) => { val usedMB = Math.round(used.doubleValue / 1024 / 1024) clusterHeapMetrics.update(address.hostPort, usedMB) } case _ => // no heap info } } } object ClusterListener { def props(metricsIntervalSeconds: Int): Props = Props(new ClusterListener(metricsIntervalSeconds: Int)) }
Example 15
Source File: TestEntityConstraints.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.entities import org.clulab.wm.eidos.extraction.EntityConstraints import org.clulab.wm.eidos.test.TestUtils._ import scala.collection.immutable.HashSet class TestEntityConstraints extends Test { def matchBrackets(text: String) = EntityConstraints.matchingBrackets(text.split(' ').toSeq) behavior of "EntityConstraints" it should "approve of properly nested parentheses" in { matchBrackets("This has none.") should be (true) matchBrackets("This has ( one pair ) .") should be (true) matchBrackets("This has ( ( nested pairs ) ) .") should be (true) matchBrackets("This has ( [ { } ] ) mixed pairs .") should be (true) matchBrackets("This has ( { ) } intermixed pairs .") should be (true) matchBrackets("This has -LRB- one strange pair -RRB- .") should be (true) matchBrackets("This has ( double ( nesting ) ( of ) parens ) .") should be (true) } it should "disapprove of improperly nested parentheses" in { matchBrackets("This starts with ) a reversed pair ( .") should be (false) matchBrackets("This has ( one normal pair ) and ) a reversed pair ( .") should be (false) matchBrackets("This count is just uneven ( in this one .") should be (false) matchBrackets("and this ) too") should be (false) } }
Example 16
Source File: ColumnarTestUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 17
Source File: ColumnarTestUtils.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 18
Source File: SetCreateAccessBench.scala From abc with Apache License 2.0 | 5 votes |
package com.rklaehn.abc import cats.kernel.instances.all._ import ichi.bench.Thyme import ichi.bench.Thyme.HowWarm import scala.collection.immutable.{HashSet, SortedSet} object SetCreateAccessBench extends App { val th = Thyme.warmed(verbose = println, warmth = HowWarm.BenchOff) val ns = Array(1, 10, 100, 1000, 10000, 100000) def createInt(): Unit = { for (n ← ns) { val elements = (0 until n).toArray def s0 = HashSet(elements:_*) def s1 = SortedSet(elements:_*) def s2 = ArraySet(elements:_*) th.pbenchOffWarm(s"Create HashSet[Int] vs ArraySet[Int] $n")( th.Warm(s0.asInstanceOf[AnyRef]))( th.Warm(s2.asInstanceOf[AnyRef])) th.pbenchOffWarm(s"Create SortedSet[Int] vs ArraySet[Int] $n")( th.Warm(s1.asInstanceOf[AnyRef]))( th.Warm(s2.asInstanceOf[AnyRef])) } } def accessInt(): Unit = { for (n ← ns) { val elements = (0 until n).toArray val s0 = HashSet(elements:_*) val s1 = SortedSet(elements:_*) val s2 = ArraySet(elements:_*) val x = 0 th.pbenchOffWarm(s"Access HashSet[Int] vs ArraySet[Int] $n")( th.Warm(s0(x)))( th.Warm(s2(x))) th.pbenchOffWarm(s"Access SortedSet[Int] vs ArraySet[Int] $n")( th.Warm(s1(x)))( th.Warm(s2(x))) } } createInt() accessInt() }
Example 19
Source File: SetSetBench.scala From abc with Apache License 2.0 | 5 votes |
package com.rklaehn.abc import java.util.concurrent.TimeUnit import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole import cats.kernel.instances.all._ import ScalaCollectionConverters._ import scala.collection.immutable.{HashSet, SortedSet} sealed trait SetSetBenchOps { def union: Any def intersect: Any def diff: Any def subsetOf: Boolean def filter(f: Int => Boolean): Any } object SetSetBenchOps extends BenchUtil { def apply(a: Seq[Int], b: Seq[Int], kind: String) = { val a1 = a.map(mix) val b1 = b.map(mix) kind match { case "hashset" => ScalaCollectionBench(HashSet(a1: _*), HashSet(b1: _*)) case "sortedset" => ScalaCollectionBench(SortedSet(a1: _*), SortedSet(b1: _*)) case "arrayset" => TypeClassBench(ArraySet(a1: _*), ArraySet(b1: _*)) case "arrayset2" => ScalaCollectionBench(ArraySet(a1: _*).asCollection, ArraySet(b1: _*).asCollection) } } private final case class ScalaCollectionBench(a: Set[Int], b: Set[Int]) extends SetSetBenchOps { override def union: Any = a union b override def diff: Any = a diff b override def subsetOf: Boolean = a subsetOf b override def intersect: Any = a intersect b override def filter(f: (Int) => Boolean): Any = a filter f } private final case class TypeClassBench(a: ArraySet[Int], b: ArraySet[Int]) extends SetSetBenchOps { override def union: Any = a union b override def diff: Any = a diff b override def subsetOf: Boolean = a subsetOf b override def intersect: Any = a intersect b override def filter(f: (Int) => Boolean): Any = a filter f } } @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Thread) class SetSetBench { @Param(Array("1", "10", "100", "1000", "10000", "100000")) var size = 0 @Param(Array("0.0", "0.5", "1.0")) // @Param(Array("0.5")) var offset = 0.0 @Param(Array("arrayset", "hashset", "sortedset")) //, "arrayset2")) var kind = "" var k: Int = 0 var bench: SetSetBenchOps = _ val shift = 1000000 // so we don't get the cached java.lang.Integer instances @Setup def setup(): Unit = { k = (offset * size).toInt bench = SetSetBenchOps(shift until (shift + size), (shift + k) until (shift + k + size), kind) } @Benchmark def union(x: Blackhole): Unit = { x.consume(bench.union) } @Benchmark def intersect(x: Blackhole): Unit = { x.consume(bench.intersect) } @Benchmark def diff(x: Blackhole): Unit = { x.consume(bench.diff) } @Benchmark def subsetOf(x: Blackhole): Unit = { x.consume(bench.subsetOf) } @Benchmark def filter(x: Blackhole): Unit = { x.consume(bench.filter(_ < k + shift)) } }
Example 20
Source File: SetElementBench.scala From abc with Apache License 2.0 | 5 votes |
package com.rklaehn.abc import java.util.concurrent.TimeUnit import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole import cats.kernel.instances.all._ import ScalaCollectionConverters._ import scala.collection.immutable.{HashSet, SortedSet} sealed trait SetElementBenchOps { def containsTrue: Any def containsFalse: Any } object SetElementBenchOps extends BenchUtil { def apply(a: Seq[Int], c: Int, n: Int, kind: String) = { val a1 = a.map(mix) val c1 = mix(c) val n1 = mix(n) require(a1.length == a.length) kind match { case "hashset" => ScalaCollectionBench(HashSet(a1: _*), c1, n1) case "sortedset" => ScalaCollectionBench(SortedSet(a1: _*), c1, n1) case "arrayset" => TypeClassBench(ArraySet(a1: _*), c1, n1) case "arrayset2" => ScalaCollectionBench(ArraySet(a1: _*).asCollection, c1, n1) } } private final case class ScalaCollectionBench(a: Set[Int], c: Int, n: Int) extends SetElementBenchOps { override def containsTrue: Boolean = a.contains(c) override def containsFalse: Boolean = a.contains(n) } private final case class TypeClassBench(a: ArraySet[Int], c: Int, n: Int) extends SetElementBenchOps { override def containsTrue: Boolean = a.contains(c) override def containsFalse: Boolean = a.contains(n) } } @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Thread) class SetElementBench { @Param(Array("1", "10", "100", "1000", "10000", "100000")) var size = 0 @Param(Array("arrayset", "hashset", "sortedset")) //, "arrayset2")) var kind = "" var k: Int = 0 var bench: SetElementBenchOps = _ @Setup def setup(): Unit = { val c = (0.3 * size).toInt // a value that is contained in the set val n = (1.3 * size).toInt // a value that is not contained in the set bench = SetElementBenchOps(0 until size, c, n, kind) } @Benchmark def containsFalse(x: Blackhole): Unit = x.consume(bench.containsFalse) @Benchmark def containsTrue(x: Blackhole): Unit = x.consume(bench.containsTrue) }
Example 21
Source File: SetCreateBench.scala From abc with Apache License 2.0 | 5 votes |
package com.rklaehn.abc import java.util.concurrent.TimeUnit import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole import cats.kernel.instances.all._ import scala.collection.immutable.{HashSet, SortedSet} sealed trait SetCreateBenchOps { def createBulk: Any def createElements: Any } object SetCreateBenchOps extends BenchUtil { def apply(a: Seq[Int], kind: String) = { val a1 = a.map(mix).toArray require(a1.length == a.length) kind match { case "hashset" => ScalaCollectionBench(a1, x => HashSet.apply(x: _*)) case "sortedset" => ScalaCollectionBench(a1, x => SortedSet.apply(x: _*)) case "arrayset" => TypeClassBench(a1) case "arrayset2" => ??? } } private final case class ScalaCollectionBench(a: Array[Int], f: Array[Int] => Any) extends SetCreateBenchOps { override def createBulk: Any = f(a) override def createElements: Any = f(a) } private final case class TypeClassBench(a: Array[Int]) extends SetCreateBenchOps { override def createBulk: Any = { ArraySet(a: _*) } override def createElements: Any = { a.foldLeft(ArraySet.empty[Int])(_ + _) } } } @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Thread) class SetCreateBench { @Param(Array("1", "10", "100", "1000", "10000", "100000")) var size = 0 @Param(Array("arrayset", "hashset", "sortedset")) //, "arrayset2")) var kind = "" var bench: SetCreateBenchOps = _ @Setup def setup(): Unit = { bench = SetCreateBenchOps(0 until size, kind) } @Benchmark def createBulk(x: Blackhole): Unit = x.consume(bench.createBulk) @Benchmark def createElements(x: Blackhole): Unit = x.consume(bench.createElements) }
Example 22
Source File: CreateSizeList.scala From abc with Apache License 2.0 | 5 votes |
package com.rklaehn.abc import org.github.jamm.MemoryMeter import cats.kernel.instances.all._ import scala.collection.immutable.{SortedSet, HashSet} object CreateSizeList extends App { lazy val mm = new MemoryMeter() lazy val overhead = mm.measure(new java.lang.Object) lazy val pointerSize = (mm.measure(new Array[java.lang.Object](256)) - mm.measure(new Array[java.lang.Object](128))) / 128 println("|n|ArraySet|HashSet|SortedSet|") println("|--:|--:|--:|--:|") for(n <- Seq(1, 10, 100, 1000, 10000, 100000)) { val xs = Array(1 to n: _*) val as = ArraySet[Int](xs: _*) val hs = HashSet[Int](xs: _*) val ss = SortedSet[Int](xs: _*) val ass = mm.measureDeep(as) val hss = mm.measureDeep(hs) val sss = mm.measureDeep(ss) println(s"| $n| $ass| $hss| $sss|") } println() println("|n|ArrayMap|HashMap|SortedMap|") println("|--:|--:|--:|--:|") for(n <- Seq(1, 10, 100, 1000, 10000, 100000)) { val xs = Array(1 to n: _*) val entries = xs.map(x => x -> x) val as = ArrayMap(entries: _*) val hs = HashSet(entries: _*) val ss = SortedSet(entries: _*) val ass = mm.measureDeep(as) val hss = mm.measureDeep(hs) val sss = mm.measureDeep(ss) println(s"| $n| $ass| $hss| $sss|") } println() println("|n|ArraySeq|Vector|List|") println("|--:|--:|--:|--:|") for(n <- Seq(1, 10, 100, 1000, 10000, 100000)) { val xs = Array(1 to n: _*) val as = ArraySeq(xs: _*) val hs = Vector(xs: _*) val ss = List(xs: _*) val ass = mm.measureDeep(as) val hss = mm.measureDeep(hs) val sss = mm.measureDeep(ss) println(s"| $n| $ass| $hss| $sss|") } }
Example 23
Source File: DeltaPushFilter.scala From connectors with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import scala.collection.immutable.HashSet import scala.collection.JavaConverters._ import org.apache.hadoop.hive.ql.exec.{FunctionRegistry, SerializationUtilities} import org.apache.hadoop.hive.ql.lib._ import org.apache.hadoop.hive.ql.parse.SemanticException import org.apache.hadoop.hive.ql.plan.{ExprNodeColumnDesc, ExprNodeConstantDesc, ExprNodeGenericFuncDesc} import org.apache.hadoop.hive.ql.udf.generic._ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{And, EqualNullSafe, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, InSet, LessThan, LessThanOrEqual, Like, Literal, Not} object DeltaPushFilter extends Logging { lazy val supportedPushDownUDFs = Array( "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS", "org.apache.hadoop.hive.ql.udf.UDFLike", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn" ) def partitionFilterConverter(hiveFilterExprSeriablized: String): Seq[Expression] = { if (hiveFilterExprSeriablized != null) { val filterExpr = SerializationUtilities.deserializeExpression(hiveFilterExprSeriablized) val opRules = new java.util.LinkedHashMap[Rule, NodeProcessor]() val nodeProcessor = new NodeProcessor() { @throws[SemanticException] def process(nd: Node, stack: java.util.Stack[Node], procCtx: NodeProcessorCtx, nodeOutputs: Object*): Object = { nd match { case e: ExprNodeGenericFuncDesc if FunctionRegistry.isOpAnd(e) => nodeOutputs.map(_.asInstanceOf[Expression]).reduce(And) case e: ExprNodeGenericFuncDesc => val (columnDesc, constantDesc) = if (nd.getChildren.get(0).isInstanceOf[ExprNodeColumnDesc]) { (nd.getChildren.get(0), nd.getChildren.get(1)) } else { (nd.getChildren.get(1), nd.getChildren.get(0)) } val columnAttr = UnresolvedAttribute( columnDesc.asInstanceOf[ExprNodeColumnDesc].getColumn) val constantVal = Literal(constantDesc.asInstanceOf[ExprNodeConstantDesc].getValue) nd.asInstanceOf[ExprNodeGenericFuncDesc].getGenericUDF match { case f: GenericUDFOPNotEqualNS => Not(EqualNullSafe(columnAttr, constantVal)) case f: GenericUDFOPNotEqual => Not(EqualTo(columnAttr, constantVal)) case f: GenericUDFOPEqualNS => EqualNullSafe(columnAttr, constantVal) case f: GenericUDFOPEqual => EqualTo(columnAttr, constantVal) case f: GenericUDFOPGreaterThan => GreaterThan(columnAttr, constantVal) case f: GenericUDFOPEqualOrGreaterThan => GreaterThanOrEqual(columnAttr, constantVal) case f: GenericUDFOPLessThan => LessThan(columnAttr, constantVal) case f: GenericUDFOPEqualOrLessThan => LessThanOrEqual(columnAttr, constantVal) case f: GenericUDFBridge if f.getUdfName.equals("like") => Like(columnAttr, constantVal) case f: GenericUDFIn => val inConstantVals = nd.getChildren.asScala .filter(_.isInstanceOf[ExprNodeConstantDesc]) .map(_.asInstanceOf[ExprNodeConstantDesc].getValue) .map(Literal(_)).toSet InSet(columnAttr, HashSet() ++ inConstantVals) case _ => throw new RuntimeException(s"Unsupported func(${nd.getName}) " + s"which can not be pushed down to delta") } case _ => null } } } val disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null) val ogw = new DefaultGraphWalker(disp) val topNodes = new java.util.ArrayList[Node]() topNodes.add(filterExpr) val nodeOutput = new java.util.HashMap[Node, Object]() try { ogw.startWalking(topNodes, nodeOutput) } catch { case ex: Exception => throw new RuntimeException(ex) } logInfo(s"converted partition filter expr:" + s"${nodeOutput.get(filterExpr).asInstanceOf[Expression].toJSON}") Seq(nodeOutput.get(filterExpr).asInstanceOf[Expression]) } else Seq.empty[org.apache.spark.sql.catalyst.expressions.Expression] } }
Example 24
Source File: ColumnarTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }