scala.collection.immutable.HashSet Scala Examples

The following examples show how to use scala.collection.immutable.HashSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: CollectionConvertersSuite.scala    From pureconfig   with Mozilla Public License 2.0 5 votes vote down vote up
package pureconfig

import scala.collection.JavaConverters._
import scala.collection.immutable.{ HashSet, ListSet, Queue, TreeSet }

import com.typesafe.config.{ ConfigFactory, ConfigValueFactory, ConfigValueType }
import pureconfig.error.{ ConfigReaderFailures, ConvertFailure, WrongType }

class CollectionConvertersSuite extends BaseSuite {
  implicit override val generatorDrivenConfig = PropertyCheckConfiguration(minSuccessful = 100)

  behavior of "ConfigConvert"


    // order of keys maintained
    ConfigValueFactory.fromMap(Map("2" -> 1, "0" -> 2, "1" -> 3).asJava) -> List(2, 3, 1),
    ConfigValueFactory.fromMap(Map("3" -> 2, "1" -> 4).asJava) -> List(4, 2),
    ConfigValueFactory.fromMap(Map("1" -> 1, "a" -> 2).asJava) -> List(1))

    ConfigValueFactory.fromMap(Map("b" -> 1, "a" -> 2).asJava) -> ConfigReaderFailures(
      ConvertFailure(WrongType(ConfigValueType.OBJECT, Set(ConfigValueType.LIST)), emptyConfigOrigin, "")),
    ConfigValueFactory.fromMap(Map().asJava) -> ConfigReaderFailures(
      ConvertFailure(WrongType(ConfigValueType.OBJECT, Set(ConfigValueType.LIST)), emptyConfigOrigin, "")))


  checkArbitrary[Map[String, Int]]
  checkFailures[Map[String, Int]](
    // nested map should fail
    ConfigFactory.parseString("conf.a=1").root() -> ConfigReaderFailures(
      ConvertFailure(WrongType(ConfigValueType.OBJECT, Set(ConfigValueType.NUMBER)), stringConfigOrigin(1), "conf")),
    // wrong value type should fail
    ConfigFactory.parseString("{ a=b }").root() -> ConfigReaderFailures(
      ConvertFailure(WrongType(ConfigValueType.STRING, Set(ConfigValueType.NUMBER)), stringConfigOrigin(1), "a")))


    ConfigValueFactory.fromMap(Map("1" -> 4, "2" -> 5, "3" -> 6).asJava) -> Set(4, 5, 6))





Example 2
Source File: TestUtils.scala    From shc   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.nio.ByteBuffer
import{IOException, File}
import java.nio.ByteBuffer
import java.util

import org.apache.avro.generic.GenericData

import scala.collection.immutable.HashSet
import scala.collection.mutable.ArrayBuffer
import scala.util.Random

import org.apache.spark.sql.SQLContext

import scala.util.Random

object TestUtils {

  def generateRandomByteBuffer(rand: Random, size: Int): ByteBuffer = {
    val bb = ByteBuffer.allocate(size)
    val arrayOfBytes = new Array[Byte](size)

  def generateRandomMap(rand: Random, size: Int): java.util.Map[String, Int] = {
    val jMap = new util.HashMap[String, Int]()
    for (i <- 0 until size) {
      jMap.put(rand.nextString(5), i)

  def generateRandomArray(rand: Random, size: Int): util.ArrayList[Boolean] = {
    val vec = new util.ArrayList[Boolean]()
    for (i <- 0 until size) {
Example 3
Source File: ColumnarTestUtils.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, GenericMutableRow}
import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayBasedMapData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericMutableRow(1)
      row(0) = value

    (values, rows)
Example 4
Source File: IpRangeTest.scala    From ipaddr   with Apache License 2.0 5 votes vote down vote up
package com.risksense.ipaddr

import scala.collection.immutable.HashSet

// scalastyle:off multiple.string.literals magic.number

class IpRangeTest extends UnitSpec {

  private val addr1 = ""
  private val addr2 = ""
  private val range = IpRange(addr1, addr2)
  private val range2 = IpRange("", "")
  private val range3 = IpRange("", "")
  private val range4 = IpRange("", "")

  "Creating an IpRange" should "result in failure if addresses are invalid" in {
    // first address invalid
    an[IpaddrException] should be thrownBy IpRange("1.2.300.20", "")

    // second address invalid
    an[IpaddrException] should be thrownBy IpRange("", "")

    // first address > second address
    an[IpaddrException] should be thrownBy IpRange("", "")

  it should "succeed if addresses are valid" in {
    IpRange("", "") shouldBe a[IpRange]
    IpRange("", "") shouldBe a[IpRange]

  "An IpRange object" should "perform all range operations" in {
    range.toString() should be(addr1 + "-" + addr2)
    range.first should be(3232235976L)
    range.last should be(3232236006L)
    range.key should be((4, 3232235976L, 3232236006L))
    range.sortKey should be((4, 3232235976L, 27))

  it should "perform contains operation" in {
    // Check range edge addresses
    range.contains(addr1) should be(true)
    range.contains(addr2) should be(true)
    range.contains(range2) should be(true)
    range.contains(range3) should be(false)
    range.contains(range4) should be(false)

    val net = IpNetwork("")
    val rightRange = ""
    val r1 = IpRange("", "")
    val r2 = IpRange("", rightRange)
    val r3 = IpRange("", rightRange)
    val r4 = IpRange("", rightRange)
    r1.contains(net) should be(false)
    r2.contains(net) should be(true)
    r3.contains(net) should be(true)
    r4.contains(net) should be(false)
    an[IpaddrException] should be thrownBy r4.contains("1.2.3") // address is bad

  it should "perform cidrs operation" in {
    val net1 = IpNetwork("")
    val net2 = IpNetwork("")
    val net3 = IpNetwork("")
    val net4 = IpNetwork("")
    val net5 = IpNetwork("")
    val netList = List(net1, net2, net3, net4, net5)
    range.cidrs should be(netList)

  it should "check for equality" in {
    val hs = HashSet(range2, range, range3)
    range should be(IpRange(addr1, addr2))
    range == range2 should be(false)
    range.equals(range) should be(true)
    range.equals(addr1) should be(false)
    hs.contains(range) should be(true)
    hs.contains(range4) should be(false)

Example 5
Source File: ColumnarTestUtils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericInternalRow(1)
      row(0) = value

    (values, rows)
Example 6
Source File: TwoSum.scala    From coding-interview-questions-scala   with Apache License 2.0 5 votes vote down vote up
package org.questions.arrays

import scala.annotation.tailrec
import scala.collection.immutable.HashSet

class TwoSum {
  def findPairSum(seq: Seq[Int], sum: Int): Option[(Int, Int)] = {

    def inner(seq: Seq[Int], hash: HashSet[Int]): Option[(Int, Int)] = seq match {
      case Nil => None
      case head :: tail =>
        val lookingFor = sum - head
        if (hash.contains(lookingFor))
          Some(head -> lookingFor)
        else inner(tail, hash + head)

    inner(seq, HashSet.empty[Int])
Example 7
Source File: ColumnarTestUtils.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.columnar

import scala.collection.immutable.HashSet
import scala.util.Random
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types.{DataType, Decimal, AtomicType}
import org.apache.spark.unsafe.types.UTF8String
object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)
  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case DATE => Random.nextInt()
      case LONG => Random.nextLong()
      case TIMESTAMP => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case _ =>
        // Using a random one-element map instead of an arbitrary object
        Map(Random.nextInt() -> Random.nextString(Random.nextInt(32)))

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericMutableRow(1)
      row(0) = value

    (values, rows)
Example 8
Source File: OptimizeInSuite.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.optimizer

import scala.collection.immutable.HashSet
import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.types._

// For implicit conversions
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.dsl.expressions._

class OptimizeInSuite extends PlanTest {

  object Optimize extends RuleExecutor[LogicalPlan] {
    val batches =
      Batch("AnalysisNodes", Once,
        EliminateSubQueries) ::
      Batch("ConstantFolding", Once,
        OptimizeIn) :: Nil

  val testRelation = LocalRelation(', ', '
  test("OptimizedIn test: In clause not optimized to InSet when less than 10 items") {
    val originalQuery =
        .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2))))

    val optimized = Optimize.execute(originalQuery.analyze)
    comparePlans(optimized, originalQuery)
  test("OptimizedIn test: In clause optimized to InSet when more than 10 items") {
    val originalQuery =
        .where(In(UnresolvedAttribute("a"), (1 to 11).map(Literal(_))))

    val optimized = Optimize.execute(originalQuery.analyze)
    val correctAnswer =
        .where(InSet(UnresolvedAttribute("a"), (1 to 11).toSet))

    comparePlans(optimized, correctAnswer)
  test("OptimizedIn test: In clause not optimized in case filter has attributes") {
    val originalQuery =
        .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b"))))

    val optimized = Optimize.execute(originalQuery.analyze)
    val correctAnswer =
        .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b"))))

    comparePlans(optimized, correctAnswer)
Example 9
Source File: MutableSet.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package scalaDemo

object MutableSet {
  def main(args: Array[String]):Unit= {
    import scala.collection.mutable.Set
    val movieSet = Set("Hitch", "Poltergeist")
    movieSet += "Shrek"
    import scala.collection.immutable.HashSet
    val hashSet = HashSet("Tomatoes", "Chilies")
    println(hashSet + "Coriander")

Example 10
Source File: ColumnarTestUtils.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.columnar

import java.sql.Timestamp

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types.{UTF8String, DataType, Decimal, AtomicType}

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)

  def makeRandomValue[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case STRING => UTF8String(Random.nextString(Random.nextInt(32)))
      case BOOLEAN => Random.nextBoolean()
      case BINARY => randomBytes(Random.nextInt(32))
      case DATE => Random.nextInt()
      case TIMESTAMP =>
        val timestamp = new Timestamp(Random.nextLong())
      case _ =>
        // Using a random one-element map instead of an arbitrary object
        Map(Random.nextInt() -> Random.nextString(Random.nextInt(32)))

  def makeRandomValues(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Seq[Any] = {

  def makeUniqueRandomValues[T <: DataType, JvmType](
      columnType: ColumnType[T, JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Row = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Row = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericMutableRow(1)
      row(0) = value

    (values, rows)
Example 11
Source File: OptimizeInSuite.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.optimizer

import scala.collection.immutable.HashSet
import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.types._

// For implicit conversions
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.dsl.expressions._

class OptimizeInSuite extends PlanTest {

  object Optimize extends RuleExecutor[LogicalPlan] {
    val batches =
      Batch("AnalysisNodes", Once,
        EliminateSubQueries) ::
      Batch("ConstantFolding", Once,
        OptimizeIn) :: Nil

  val testRelation = LocalRelation(', ', '

  test("OptimizedIn test: In clause optimized to InSet") {
    val originalQuery =
        .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2))))

    val optimized = Optimize.execute(originalQuery.analyze)
    val correctAnswer =
        .where(InSet(UnresolvedAttribute("a"), HashSet[Any]() + 1 + 2))

    comparePlans(optimized, correctAnswer)

  test("OptimizedIn test: In clause not optimized in case filter has attributes") {
    val originalQuery =
        .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b"))))

    val optimized = Optimize.execute(originalQuery.analyze)
    val correctAnswer =
        .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b"))))

    comparePlans(optimized, correctAnswer)
Example 12
Source File: ColumnarTestUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericInternalRow(1)
      row(0) = value

    (values, rows)
Example 13
Source File: InsertOrdSet.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0


import scala.collection.immutable.{HashSet, Set, Queue}
import scala.collection.{SetLike, AbstractSet}
import scala.collection.generic.{

final class InsertOrdSet[T] private (_items: Queue[T], _hashSet: HashSet[T])
    extends AbstractSet[T]
    with Set[T]
    with SetLike[T, InsertOrdSet[T]]
    with GenericSetTemplate[T, InsertOrdSet]
    with Serializable {
  override def empty: InsertOrdSet[T] = InsertOrdSet.empty
  override def size: Int = _hashSet.size

  def iterator: Iterator[T] =

  override def contains(elem: T): Boolean =

  override def +(elem: T): InsertOrdSet[T] =
    if (_hashSet.contains(elem))
      new InsertOrdSet(
        elem +: _items,
        _hashSet + elem

  override def -(elem: T): InsertOrdSet[T] =
    new InsertOrdSet(
      _items.filter(elem2 => elem != elem2),
      _hashSet - elem

  override def companion: GenericCompanion[InsertOrdSet] = InsertOrdSet


object InsertOrdSet extends ImmutableSetFactory[InsertOrdSet] {
  private val Empty = new InsertOrdSet(Queue.empty, HashSet.empty)
  override def empty[T] = Empty.asInstanceOf[InsertOrdSet[T]]
  def emptyInstance: InsertOrdSet[Any] = empty[Any]

  def fromSeq[T](s: Seq[T]): InsertOrdSet[T] =
    new InsertOrdSet(Queue(s.reverse: _*), HashSet(s: _*))

  implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, InsertOrdSet[A]] =

Example 14
Source File: ClusterListener.scala    From akka-cluster-load-balancing   with MIT License 5 votes vote down vote up

import scala.collection.immutable.HashSet
import scala.concurrent.duration.DurationInt
import{ Actor, Props }
import akka.cluster.Cluster
import akka.cluster.ClusterEvent.MemberUp
import akka.cluster.metrics.{ ClusterMetricsChanged, ClusterMetricsExtension, NodeMetrics }
import akka.cluster.metrics.StandardMetrics.HeapMemory
import kamkor.{ ConsumerApp }
import kamkor.metrics.{ ClusterHeapMetrics, MetricsLogger }

class ClusterListener(metricsIntervalSeconds: Int) extends Actor {

  import context.dispatcher
    metricsIntervalSeconds.seconds, metricsIntervalSeconds.seconds, self, "logConsumersHeapUse")

  private[this] val cluster = Cluster(context.system)
  private[this] val metricsLogger =
    new MetricsLogger(name = cluster.selfAddress.port.getOrElse(0).toString())
  private[this] val clusterHeapMetrics = new ClusterHeapMetrics()

  private var consumers: Set[String] = HashSet.empty

  override def preStart(): Unit = {
    cluster.subscribe(self, classOf[MemberUp])
  override def postStop(): Unit = {

  def receive: Receive = {
    case MemberUp(m) if m.roles.contains(ConsumerApp.clusterRole) =>
      consumers += m.address.hostPort
    case ClusterMetricsChanged(clusterMetrics) =>
        .filter(nm => consumers.contains(nm.address.hostPort))
    case "logConsumersHeapUse" => {

  private[this] def updateHeapUse(nodeMetrics: NodeMetrics) {
    nodeMetrics match {
      case HeapMemory(address, timestamp, used, committed, max) => {
        val usedMB = Math.round(used.doubleValue / 1024 / 1024)
        clusterHeapMetrics.update(address.hostPort, usedMB)
      case _ => // no heap info


object ClusterListener {

  def props(metricsIntervalSeconds: Int): Props = Props(new ClusterListener(metricsIntervalSeconds: Int))

Example 15
Source File: TestEntityConstraints.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.entities

import org.clulab.wm.eidos.extraction.EntityConstraints
import org.clulab.wm.eidos.test.TestUtils._

import scala.collection.immutable.HashSet

class TestEntityConstraints extends Test {

  def matchBrackets(text: String) =
      EntityConstraints.matchingBrackets(text.split(' ').toSeq)

  behavior of "EntityConstraints"

  it should "approve of properly nested parentheses" in {

    matchBrackets("This has none.") should be (true)
    matchBrackets("This has ( one pair ) .") should be (true)
    matchBrackets("This has ( ( nested pairs ) ) .") should be (true)
    matchBrackets("This has ( [ { } ] ) mixed pairs .") should be (true)
    matchBrackets("This has ( { ) } intermixed pairs .") should be (true)
    matchBrackets("This has -LRB- one strange pair -RRB- .") should be (true)
    matchBrackets("This has ( double ( nesting ) ( of ) parens ) .") should be (true)

  it should "disapprove of improperly nested parentheses" in {
    matchBrackets("This starts with ) a reversed pair ( .") should be (false)
    matchBrackets("This has ( one normal pair ) and ) a reversed pair ( .") should be (false)
    matchBrackets("This count is just uneven ( in this one .") should be (false)
    matchBrackets("and this ) too") should be (false)
Example 16
Source File: ColumnarTestUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericInternalRow(1)
      row(0) = value

    (values, rows)
Example 17
Source File: ColumnarTestUtils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericInternalRow(1)
      row(0) = value

    (values, rows)
Example 18
Source File: SetCreateAccessBench.scala    From abc   with Apache License 2.0 5 votes vote down vote up

import cats.kernel.instances.all._
import ichi.bench.Thyme
import ichi.bench.Thyme.HowWarm

import scala.collection.immutable.{HashSet, SortedSet}

object SetCreateAccessBench extends App {

  val th = Thyme.warmed(verbose = println, warmth = HowWarm.BenchOff)

  val ns = Array(1, 10, 100, 1000, 10000, 100000)

  def createInt(): Unit = {
    for (n ← ns) {
      val elements = (0 until n).toArray
      def s0 = HashSet(elements:_*)
      def s1 = SortedSet(elements:_*)
      def s2 = ArraySet(elements:_*)
      th.pbenchOffWarm(s"Create HashSet[Int] vs ArraySet[Int] $n")(
      th.pbenchOffWarm(s"Create SortedSet[Int] vs ArraySet[Int] $n")(

  def accessInt(): Unit = {
    for (n ← ns) {
      val elements = (0 until n).toArray
      val s0 = HashSet(elements:_*)
      val s1 = SortedSet(elements:_*)
      val s2 = ArraySet(elements:_*)
      val x = 0
      th.pbenchOffWarm(s"Access HashSet[Int] vs ArraySet[Int] $n")(
      th.pbenchOffWarm(s"Access SortedSet[Int] vs ArraySet[Int] $n")(

Example 19
Source File: SetSetBench.scala    From abc   with Apache License 2.0 5 votes vote down vote up

import java.util.concurrent.TimeUnit

import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole
 import cats.kernel.instances.all._
import ScalaCollectionConverters._

import scala.collection.immutable.{HashSet, SortedSet}

sealed trait SetSetBenchOps {
  def union: Any
  def intersect: Any
  def diff: Any
  def subsetOf: Boolean
  def filter(f: Int => Boolean): Any

object SetSetBenchOps extends BenchUtil {

  def apply(a: Seq[Int], b: Seq[Int], kind: String) = {
    val a1 =
    val b1 =
    kind match {
      case "hashset" => ScalaCollectionBench(HashSet(a1: _*), HashSet(b1: _*))
      case "sortedset" => ScalaCollectionBench(SortedSet(a1: _*), SortedSet(b1: _*))
      case "arrayset" => TypeClassBench(ArraySet(a1: _*), ArraySet(b1: _*))
      case "arrayset2" => ScalaCollectionBench(ArraySet(a1: _*).asCollection, ArraySet(b1: _*).asCollection)

  private final case class ScalaCollectionBench(a: Set[Int], b: Set[Int]) extends SetSetBenchOps {
    override def union: Any = a union b
    override def diff: Any = a diff b
    override def subsetOf: Boolean = a subsetOf b
    override def intersect: Any = a intersect b
    override def filter(f: (Int) => Boolean): Any = a filter f

  private final case class TypeClassBench(a: ArraySet[Int], b: ArraySet[Int]) extends SetSetBenchOps {
    override def union: Any = a union b
    override def diff: Any = a diff b
    override def subsetOf: Boolean = a subsetOf b
    override def intersect: Any = a intersect b
    override def filter(f: (Int) => Boolean): Any = a filter f

class SetSetBench {

  @Param(Array("1", "10", "100", "1000", "10000", "100000"))
  var size = 0

  @Param(Array("0.0", "0.5", "1.0"))
//  @Param(Array("0.5"))
  var offset = 0.0

  @Param(Array("arrayset", "hashset", "sortedset")) //, "arrayset2"))
  var kind = ""

  var k: Int = 0
  var bench: SetSetBenchOps = _

  val shift = 1000000 // so we don't get the cached java.lang.Integer instances

  def setup(): Unit = {
    k = (offset * size).toInt
    bench = SetSetBenchOps(shift until (shift + size), (shift + k) until (shift + k + size), kind)

  def union(x: Blackhole): Unit = {

  def intersect(x: Blackhole): Unit = {

  def diff(x: Blackhole): Unit = {

  def subsetOf(x: Blackhole): Unit = {

  def filter(x: Blackhole): Unit = {
    x.consume(bench.filter(_ < k + shift))
Example 20
Source File: SetElementBench.scala    From abc   with Apache License 2.0 5 votes vote down vote up

import java.util.concurrent.TimeUnit

import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole
 import cats.kernel.instances.all._
import ScalaCollectionConverters._

import scala.collection.immutable.{HashSet, SortedSet}

sealed trait SetElementBenchOps {
  def containsTrue: Any
  def containsFalse: Any

object SetElementBenchOps extends BenchUtil {

  def apply(a: Seq[Int], c: Int, n: Int, kind: String) = {
    val a1 =
    val c1 = mix(c)
    val n1 = mix(n)
    require(a1.length == a.length)
    kind match {
      case "hashset" => ScalaCollectionBench(HashSet(a1: _*), c1, n1)
      case "sortedset" => ScalaCollectionBench(SortedSet(a1: _*), c1, n1)
      case "arrayset" => TypeClassBench(ArraySet(a1: _*), c1, n1)
      case "arrayset2" => ScalaCollectionBench(ArraySet(a1: _*).asCollection, c1, n1)

  private final case class ScalaCollectionBench(a: Set[Int], c: Int, n: Int) extends SetElementBenchOps {
    override def containsTrue: Boolean = a.contains(c)
    override def containsFalse: Boolean = a.contains(n)

  private final case class TypeClassBench(a: ArraySet[Int], c: Int, n: Int) extends SetElementBenchOps {
    override def containsTrue: Boolean = a.contains(c)
    override def containsFalse: Boolean = a.contains(n)

class SetElementBench {

  @Param(Array("1", "10", "100", "1000", "10000", "100000"))
  var size = 0

  @Param(Array("arrayset", "hashset", "sortedset")) //, "arrayset2"))
  var kind = ""

  var k: Int = 0
  var bench: SetElementBenchOps = _

  def setup(): Unit = {
    val c = (0.3 * size).toInt // a value that is contained in the set
    val n = (1.3 * size).toInt // a value that is not contained in the set
    bench = SetElementBenchOps(0 until size, c, n, kind)

  def containsFalse(x: Blackhole): Unit = x.consume(bench.containsFalse)

  def containsTrue(x: Blackhole): Unit = x.consume(bench.containsTrue)
Example 21
Source File: SetCreateBench.scala    From abc   with Apache License 2.0 5 votes vote down vote up

import java.util.concurrent.TimeUnit

import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole
 import cats.kernel.instances.all._

import scala.collection.immutable.{HashSet, SortedSet}

sealed trait SetCreateBenchOps {
  def createBulk: Any
  def createElements: Any

object SetCreateBenchOps extends BenchUtil {

  def apply(a: Seq[Int], kind: String) = {
    val a1 =
    require(a1.length == a.length)
    kind match {
      case "hashset" => ScalaCollectionBench(a1, x => HashSet.apply(x: _*))
      case "sortedset" => ScalaCollectionBench(a1, x => SortedSet.apply(x: _*))
      case "arrayset" => TypeClassBench(a1)
      case "arrayset2" => ???

  private final case class ScalaCollectionBench(a: Array[Int], f: Array[Int] => Any) extends SetCreateBenchOps {
    override def createBulk: Any = f(a)
    override def createElements: Any = f(a)

  private final case class TypeClassBench(a: Array[Int]) extends SetCreateBenchOps {
    override def createBulk: Any = {
      ArraySet(a: _*)
    override def createElements: Any = {
      a.foldLeft(ArraySet.empty[Int])(_ + _)

class SetCreateBench {

  @Param(Array("1", "10", "100", "1000", "10000", "100000"))
  var size = 0

  @Param(Array("arrayset", "hashset", "sortedset")) //, "arrayset2"))
  var kind = ""

  var bench: SetCreateBenchOps = _

  def setup(): Unit = {
    bench = SetCreateBenchOps(0 until size, kind)

  def createBulk(x: Blackhole): Unit = x.consume(bench.createBulk)

  def createElements(x: Blackhole): Unit = x.consume(bench.createElements)
Example 22
Source File: CreateSizeList.scala    From abc   with Apache License 2.0 5 votes vote down vote up

import org.github.jamm.MemoryMeter
import cats.kernel.instances.all._
import scala.collection.immutable.{SortedSet, HashSet}

object CreateSizeList extends App {

  lazy val mm = new MemoryMeter()
  lazy val overhead = mm.measure(new java.lang.Object)
  lazy val pointerSize = (mm.measure(new Array[java.lang.Object](256)) - mm.measure(new Array[java.lang.Object](128))) / 128

  for(n <- Seq(1, 10, 100, 1000, 10000, 100000)) {
    val xs = Array(1 to n: _*)
    val as = ArraySet[Int](xs: _*)
    val hs = HashSet[Int](xs: _*)
    val ss = SortedSet[Int](xs: _*)
    val ass = mm.measureDeep(as)
    val hss = mm.measureDeep(hs)
    val sss = mm.measureDeep(ss)
    println(s"| $n| $ass| $hss| $sss|")

  for(n <- Seq(1, 10, 100, 1000, 10000, 100000)) {
    val xs = Array(1 to n: _*)
    val entries = => x -> x)
    val as = ArrayMap(entries: _*)
    val hs = HashSet(entries: _*)
    val ss = SortedSet(entries: _*)
    val ass = mm.measureDeep(as)
    val hss = mm.measureDeep(hs)
    val sss = mm.measureDeep(ss)
    println(s"| $n| $ass| $hss| $sss|")

  for(n <- Seq(1, 10, 100, 1000, 10000, 100000)) {
    val xs = Array(1 to n: _*)
    val as = ArraySeq(xs: _*)
    val hs = Vector(xs: _*)
    val ss = List(xs: _*)
    val ass = mm.measureDeep(as)
    val hss = mm.measureDeep(hs)
    val sss = mm.measureDeep(ss)
    println(s"| $n| $ass| $hss| $sss|")
Example 23
Source File: DeltaPushFilter.scala    From connectors   with Apache License 2.0 5 votes vote down vote up

import scala.collection.immutable.HashSet
import scala.collection.JavaConverters._

import org.apache.hadoop.hive.ql.exec.{FunctionRegistry, SerializationUtilities}
import org.apache.hadoop.hive.ql.lib._
import org.apache.hadoop.hive.ql.parse.SemanticException
import org.apache.hadoop.hive.ql.plan.{ExprNodeColumnDesc, ExprNodeConstantDesc, ExprNodeGenericFuncDesc}
import org.apache.hadoop.hive.ql.udf.generic._
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.expressions.{And, EqualNullSafe, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, InSet, LessThan, LessThanOrEqual, Like, Literal, Not}

object DeltaPushFilter extends Logging {
  lazy val supportedPushDownUDFs = Array(

  def partitionFilterConverter(hiveFilterExprSeriablized: String): Seq[Expression] = {
    if (hiveFilterExprSeriablized != null) {
      val filterExpr = SerializationUtilities.deserializeExpression(hiveFilterExprSeriablized)
      val opRules = new java.util.LinkedHashMap[Rule, NodeProcessor]()
      val nodeProcessor = new NodeProcessor() {
        def process(nd: Node, stack: java.util.Stack[Node],
            procCtx: NodeProcessorCtx, nodeOutputs: Object*): Object = {
          nd match {
            case e: ExprNodeGenericFuncDesc if FunctionRegistry.isOpAnd(e) =>
            case e: ExprNodeGenericFuncDesc =>
              val (columnDesc, constantDesc) =
                if (nd.getChildren.get(0).isInstanceOf[ExprNodeColumnDesc]) {
                  (nd.getChildren.get(0), nd.getChildren.get(1))
                } else { (nd.getChildren.get(1), nd.getChildren.get(0)) }

              val columnAttr = UnresolvedAttribute(
              val constantVal = Literal(constantDesc.asInstanceOf[ExprNodeConstantDesc].getValue)
              nd.asInstanceOf[ExprNodeGenericFuncDesc].getGenericUDF match {
                case f: GenericUDFOPNotEqualNS =>
                  Not(EqualNullSafe(columnAttr, constantVal))
                case f: GenericUDFOPNotEqual =>
                  Not(EqualTo(columnAttr, constantVal))
                case f: GenericUDFOPEqualNS =>
                  EqualNullSafe(columnAttr, constantVal)
                case f: GenericUDFOPEqual =>
                  EqualTo(columnAttr, constantVal)
                case f: GenericUDFOPGreaterThan =>
                  GreaterThan(columnAttr, constantVal)
                case f: GenericUDFOPEqualOrGreaterThan =>
                  GreaterThanOrEqual(columnAttr, constantVal)
                case f: GenericUDFOPLessThan =>
                  LessThan(columnAttr, constantVal)
                case f: GenericUDFOPEqualOrLessThan =>
                  LessThanOrEqual(columnAttr, constantVal)
                case f: GenericUDFBridge if f.getUdfName.equals("like") =>
                  Like(columnAttr, constantVal)
                case f: GenericUDFIn =>
                  val inConstantVals = nd.getChildren.asScala
                  InSet(columnAttr, HashSet() ++ inConstantVals)
                case _ =>
                  throw new RuntimeException(s"Unsupported func(${nd.getName}) " +
                    s"which can not be pushed down to delta")
            case _ => null

      val disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null)
      val ogw = new DefaultGraphWalker(disp)
      val topNodes = new java.util.ArrayList[Node]()
      val nodeOutput = new java.util.HashMap[Node, Object]()
      try {
        ogw.startWalking(topNodes, nodeOutput)
      } catch {
        case ex: Exception =>
          throw new RuntimeException(ex)
      logInfo(s"converted partition filter expr:" +
    } else Seq.empty[org.apache.spark.sql.catalyst.expressions.Expression]
Example 24
Source File: ColumnarTestUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = { value =>
      val row = new GenericInternalRow(1)
      row(0) = value

    (values, rows)