org.scalatest.FlatSpec Scala Example

Source File: NestedCaseClassesTest.scala From cleanframes with Apache License 2.0

8 votes

package cleanframes

import com.holdenkarau.spark.testing.DataFrameSuiteBase
import org.apache.spark.sql.functions
import org.scalatest.{FlatSpec, Matchers}


class NestedCaseClassesTest
  extends FlatSpec
    with Matchers
    with DataFrameSuiteBase {

  "Cleaner" should "compile and use a custom transformer for a custom type" in {
    import cleanframes.syntax._ // to use `.clean`
    import spark.implicits._

    // define test data for a dataframe
    val input = Seq(
      // @formatter:off
      ("1",           "1",          "1",           "1",           null),
      (null,          "2",          null,          "2",           "corrupted"),
      ("corrupted",   null,         "corrupted",   null,          "true"),
      ("4",           "corrupted",  "4",           "4",           "false"),
      ("5",           "5",          "5",           "corrupted",   "false"),
      ("6",           "6",          "6",           "6",           "true")
      // @formatter:on
    )
      // give column names that are known to you
      .toDF("col1", "col2", "col3", "col4", "col5")

    // import standard functions for conversions shipped with the library
    import cleanframes.instances.all._

    // !important: you need to give a new structure to allow to access sub elements
    val renamed = input.select(
      functions.struct(
        input.col("col1") as "a_col_1",
        input.col("col2") as "a_col_2"
      ) as "a",
      functions.struct(
        input.col("col3") as "b_col_1",
        input.col("col4") as "b_col_2"
      ) as "b",
      input.col("col5") as "c"
    )

    val result = renamed.clean[AB]
      .as[AB]
      .collect

    result should {
      contain theSameElementsAs Seq(
        // @formatter:off
        AB( A(Some(1), Some(1)),  B(Some(1),  Some(1.0)), Some(false)),
        AB( A(None,    Some(2)),  B(None,     Some(2.0)), Some(false)),
        AB( A(None,    None),     B(None,     None),      Some(true)),
        AB( A(Some(4), None),     B(Some(4),  Some(4.0)), Some(false)),
        AB( A(Some(5), Some(5)),  B(Some(5),  None),      Some(false)),
        AB( A(Some(6), Some(6)),  B(Some(6),  Some(6.0)), Some(true))
        // @formatter:on
      )
    }
  }

}

case class A(a_col_1: Option[Int], a_col_2: Option[Float])

case class B(b_col_1: Option[Float], b_col_2: Option[Double])

case class AB(a: A, b: B, c: Option[Boolean])

Source File: TanhSpec.scala From BigDL with Apache License 2.0

7 votes

package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.nn.tf.TanhGrad
import com.intel.analytics.bigdl.utils.T
import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest
import org.scalatest.{FlatSpec, Matchers}

import scala.math.abs

@com.intel.analytics.bigdl.tags.Parallel
class TanhSpec extends FlatSpec with Matchers {
  "A Tanh Module " should "generate correct output and grad" in {
    val module = new Tanh[Double]()
    val input = Tensor[Double](2, 2, 2)
    input(Array(1, 1, 1)) = -0.17020166106522
    input(Array(1, 1, 2)) = 0.57785657607019
    input(Array(1, 2, 1)) = -1.3404131438583
    input(Array(1, 2, 2)) = 1.0938102817163
    input(Array(2, 1, 1)) = 1.120370157063
    input(Array(2, 1, 2)) = -1.5014141565189
    input(Array(2, 2, 1)) = 0.3380249235779
    input(Array(2, 2, 2)) = -0.625677742064
    val gradOutput = Tensor[Double](2, 2, 2)
    gradOutput(Array(1, 1, 1)) = 0.79903302760795
    gradOutput(Array(1, 1, 2)) = 0.019753993256018
    gradOutput(Array(1, 2, 1)) = 0.63136631483212
    gradOutput(Array(1, 2, 2)) = 0.29849314852618
    gradOutput(Array(2, 1, 1)) = 0.94380705454387
    gradOutput(Array(2, 1, 2)) = 0.030344664584845
    gradOutput(Array(2, 2, 1)) = 0.33804601291195
    gradOutput(Array(2, 2, 2)) = 0.8807330634445
    val expectedOutput = Tensor[Double](2, 2, 2)
    expectedOutput(Array(1, 1, 1)) = -0.16857698275003
    expectedOutput(Array(1, 1, 2)) = 0.52110579963112
    expectedOutput(Array(1, 2, 1)) = -0.87177144344863
    expectedOutput(Array(1, 2, 2)) = 0.79826462420686
    expectedOutput(Array(2, 1, 1)) = 0.80769763073281
    expectedOutput(Array(2, 1, 2)) = -0.90540347425835
    expectedOutput(Array(2, 2, 1)) = 0.32571298952384
    expectedOutput(Array(2, 2, 2)) = -0.55506882753488
    val expectedGrad = Tensor[Double](2, 2, 2)
    expectedGrad(Array(1, 1, 1)) = 0.77632594793144
    expectedGrad(Array(1, 1, 2)) = 0.014389771607755
    expectedGrad(Array(1, 2, 1)) = 0.15153710218424
    expectedGrad(Array(1, 2, 2)) = 0.1082854310036
    expectedGrad(Array(2, 1, 1)) = 0.32809049064441
    expectedGrad(Array(2, 1, 2)) = 0.0054694603766104
    expectedGrad(Array(2, 2, 1)) = 0.3021830658283
    expectedGrad(Array(2, 2, 2)) = 0.6093779706637
    val inputOrg = input.clone()
    val gradOutputOrg = gradOutput.clone()
    val output = module.forward(input)
    val gradInput = module.backward(input, gradOutput)
    expectedOutput.map(output, (v1, v2) => {
      assert(abs(v1 - v2) < 1e-6);
      v1
    })
    expectedGrad.map(gradInput, (v1, v2) => {
      assert(abs(v1 - v2) < 1e-6);
      v1
    })
    assert(input == inputOrg)
    assert(gradOutput == gradOutputOrg)
  }

  "A Tanh Module " should "be good in gradient check" in {
    val module = new Tanh[Double]()
    val input = Tensor[Double](2, 2, 2).rand()

    val checker = new GradientChecker(1e-4, 1e-2)
    checker.checkLayer[Double](module, input) should be(true)
  }
}

class TanhSerialTest extends ModuleSerializationTest {
  override def test(): Unit = {
    val module = TanhGrad[Float, Float]()

    val input = T(Tensor[Float](1, 5, 3, 4).rand(), Tensor[Float](1, 5, 3, 4).rand())

    runSerializationTest(module, input)
  }
}

Source File: CirceSpec.scala From featherbed with Apache License 2.0

6 votes

package featherbed.circe

import cats.implicits._
import com.twitter.util.Future
import io.circe._
import io.circe.generic.auto._
import io.circe.parser.parse
import io.circe.syntax._
import org.scalatest.FlatSpec
import shapeless.{Coproduct, Witness}
import shapeless.union.Union

case class Foo(someText: String, someInt: Int)

class CirceSpec extends FlatSpec {

  "post request of a case class" should "derive JSON encoder" in {

    import com.twitter.util.{Future, Await}
    import com.twitter.finagle.{Service, Http}
    import com.twitter.finagle.http.{Request, Response}
    import java.net.InetSocketAddress

    val server = Http.serve(new InetSocketAddress(8766), new Service[Request, Response] {
      def apply(request: Request): Future[Response] = Future {
        val rep = Response()
        rep.contentString = s"${request.contentString}"
        rep.setContentTypeJson()
        rep
      }
    })

    import java.net.URL
    val client = new featherbed.Client(new URL("http://localhost:8766/api/"))

    import io.circe.generic.auto._

    val req = client.post("foo/bar")
      .withContent(Foo("Hello world!", 42), "application/json")
        .accept[Coproduct.`"application/json"`.T]

    val result = Await.result {
       req.send[Foo]()
    }

    Foo("test", 42).asJson.toString

    parse("""{"someText": "test", "someInt": 42}""").toValidated.map(_.as[Foo])

    Await.result(server.close())

  }

  "API example" should "compile" in {
    import shapeless.Coproduct
    import java.net.URL
    import com.twitter.util.Await
    case class Post(userId: Int, id: Int, title: String, body: String)

    case class Comment(postId: Int, id: Int, name: String, email: String, body: String)
    class JSONPlaceholderAPI(baseUrl: URL) {

      private val client = new featherbed.Client(baseUrl)
      type JSON = Coproduct.`"application/json"`.T

      object posts {

        private val listRequest = client.get("posts").accept[JSON]
        private val getRequest = (id: Int) => client.get(s"posts/$id").accept[JSON]

        def list(): Future[Seq[Post]] = listRequest.send[Seq[Post]]()
        def get(id: Int): Future[Post] = getRequest(id).send[Post]()
      }

      object comments {
        private val listRequest = client.get("comments").accept[JSON]
        private val getRequest = (id: Int) => client.get(s"comments/$id").accept[JSON]

        def list(): Future[Seq[Comment]] = listRequest.send[Seq[Comment]]()
        def get(id: Int): Future[Comment] = getRequest(id).send[Comment]()
      }
    }

    val apiClient = new JSONPlaceholderAPI(new URL("http://jsonplaceholder.typicode.com/"))

    Await.result(apiClient.posts.list())
  }

}

Source File: ReadOnlyNodeStorageSpec.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.db.storage

import akka.util.ByteString
import io.iohk.ethereum.db.dataSource.EphemDataSource
import org.scalatest.{FlatSpec, Matchers}

class ReadOnlyNodeStorageSpec extends FlatSpec with Matchers {

  "ReadOnlyNodeStorage" should "not update dataSource" in new TestSetup {
    readOnlyNodeStorage.put(ByteString("key1"), ByteString("Value1").toArray)
    dataSource.storage.size shouldEqual 0
  }

  it should "be able to read from underlying storage but not change it" in new TestSetup {
    val key1 = ByteString("key1")
    val val1 = ByteString("Value1").toArray
    referenceCountNodeStorage.put(key1, val1)

    val previousSize = dataSource.storage.size
    readOnlyNodeStorage.get(key1).get shouldEqual val1

    readOnlyNodeStorage.remove(key1)

    dataSource.storage.size shouldEqual previousSize
    readOnlyNodeStorage.get(key1).get shouldEqual val1
  }

  trait TestSetup {
    val dataSource = EphemDataSource()
    val nodeStorage = new NodeStorage(dataSource)

    val referenceCountNodeStorage = new ReferenceCountNodeStorage(nodeStorage, blockNumber = Some(1))
    val readOnlyNodeStorage = ReadOnlyNodeStorage(referenceCountNodeStorage)
  }
}

Source File: EthashSpec.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.consensus

import akka.util.ByteString
import io.iohk.ethereum.crypto.kec256
import org.scalacheck.Arbitrary
import org.scalatest.prop.PropertyChecks
import org.scalatest.{FlatSpec, Matchers}
import org.spongycastle.util.encoders.Hex

class EthashSpec extends FlatSpec with Matchers with PropertyChecks {

  import Ethash._

  "Ethash" should "generate correct hash" in {
    forAll(Arbitrary.arbitrary[Long].filter(_ < 15000000)) { blockNumber =>
      seed(epoch(blockNumber)) shouldBe seedForBlockReference(blockNumber)
    }
  }

  it should "calculate cache size" in {
    val cacheSizes = Seq(16776896, 16907456, 17039296, 17170112, 17301056, 17432512, 17563072)
    cacheSizes.zipWithIndex.foreach { case (referenceSize, epoch) =>
      cacheSize(epoch) shouldBe referenceSize
    }
  }

  it should "compute proof of work using cache" in {
    val hash = Array(0xf5, 0x7e, 0x6f, 0x3a, 0xcf, 0xc0, 0xdd, 0x4b, 0x5b, 0xf2, 0xbe, 0xe4, 0x0a, 0xb3, 0x35, 0x8a, 0xa6, 0x87, 0x73, 0xa8, 0xd0, 0x9f, 0x5e, 0x59, 0x5e, 0xab, 0x55, 0x94, 0x05, 0x52, 0x7d, 0x72).map(_.toByte)
    val nonce = Array(0xd7, 0xb3, 0xac, 0x70, 0xa3, 0x01, 0xa2, 0x49).map(_.toByte)

    val mixHash = Array(0x1f, 0xff, 0x04, 0xce, 0xc9, 0x41, 0x73, 0xfd, 0x59, 0x1e, 0x3d, 0x89, 0x60, 0xce, 0x6b, 0xdf, 0x8b, 0x19, 0x71, 0x04, 0x8c, 0x71, 0xff, 0x93, 0x7b, 0xb2, 0xd3, 0x2a, 0x64, 0x31, 0xab, 0x6d).map(_.toByte)
    val boundary = Array(0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0x9b, 0x6c, 0x69, 0xbc, 0x2c, 0xe2, 0xa2, 0x4a, 0x8e, 0x95, 0x69, 0xef, 0xc7, 0xd7, 0x1b, 0x33, 0x35, 0xdf, 0x36, 0x8c, 0x9a, 0xe9, 0x7e, 0x53, 0x84).map(_.toByte)

    val blockNumber = 486382
    val cache = makeCache(epoch(blockNumber))
    val proofOfWork = hashimotoLight(hash, nonce, dagSize(epoch(blockNumber)), cache)

    proofOfWork.mixHash shouldBe ByteString(mixHash)
    proofOfWork.difficultyBoundary shouldBe ByteString(boundary)

    val table = Table(
      ("blockNumber", "hashWithoutNonce", "nonce", "mixHash"),
      (3521,"269d13f7ca546dced28ee26071dcb61085b7c54dfc5f93808b94885e136cd616","534ab630b9aa1f68","c6913517d1dc7544febde9f17e65d6ae4fa380d4a2a2d31305b0043caf95e717"),
      (5021,"7bd6c3c49a0627712c51f1abf0a7828bb25ebb8679d2584385a191db955667da","413dc4ec1a0df7c4","35890608f8867402052b2ce55a694b86a44ce87e7fb5412a77a025b184f25883"),
      (5091,"5b27820bfa3a059274ce17db0beea90ba0b6fbe6b49d2a23cbf972e8cde79319","59225875d18ad606","46f72f8b269461078e9d1cf4edf1b608f9d101e0f335ea59568c3436f291d01b"),
      (3091,"c37d980124cf83a4de4d9600f5bb6d3883797b84b7ec472feff6ca855c01d245","745609efa9c4eef3","c647fec06481b9f3f74cd771968d6d630aa11bf75ebd9e3c55ccfbae0fbad4da"),
      (1091,"c1c1efb8fdd4241a55db39e092fedae3df6d4abc13133778810027ade6557bc6","4d9ddadaea6c20b2","53624a7faac2ec82208348f7a11e3b38c880a2fec76dd8b47e434fe641eeacde"),
      (109,"aa234d4bcee14e93d127275dcc83504b6e730a14e9110bd09b68e1964f0daad3","388e6b37c22147b7","df14701b1ad6d3d5639956e463250960de3189a726cb38d71a6f6042f45dea72"),
      (1009,"3259779f9d2c477d29e18ead0ccc829bf2146723563c3e81e5e4886673d93bfb","5faa044b70ccdf6b","a1f1af0c2ca3e1d8e69da59fefbfeb4d0d172ec96bdbdac71b2cde49ddb3a828"),
      (1001,"028cc9a70d6db52c2a2606f04392e9a323d0370291d6c6d78bc8ce54acf1d761","a54b5b31ce3de766","819c26573f1a9cd6c4b9a399b72fbfb0084a104b25b62083533e114ee98a4831"),
      (1000,"15c5729eb017a703c13d00752338f6b55e2d2551b380706f0486f2ccca57ae1e","eb610e766452a801","a369e2fd5c4e357cf9f60ba063ae0baf32075b0d7ed80cd78134bc401db8f1bf"),
      (100,"41944a94a42695180b1ca231720a87825f17d36475112b659c23dea1542e0977","37129c7f29a9364b","5bb43c0772e58084b221c8e0c859a45950c103c712c5b8f11d9566ee078a4501"))

    forAll(table) { (blockNumber, hashWithoutNonce, nonce, mixHash) =>
      val cache = makeCache(epoch(blockNumber))
      val proofOfWork = hashimotoLight(Hex.decode(hashWithoutNonce), Hex.decode(nonce), dagSize(epoch(blockNumber)), cache)
      proofOfWork.mixHash shouldBe ByteString(Hex.decode(mixHash))
    }
  }

  def seedForBlockReference(blockNumber: BigInt): ByteString = {
    if (blockNumber < EPOCH_LENGTH) {
      //wrong version from YP:
      //ByteString(kec256(Hex.decode("00" * 32)))
      //working version:
      ByteString(Hex.decode("00" * 32))
    } else {
      kec256(seedForBlockReference(blockNumber - EPOCH_LENGTH))
    }
  }

}

Source File: BlockchainSpec.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.domain

import akka.util.ByteString
import io.iohk.ethereum.Fixtures
import io.iohk.ethereum.blockchain.sync.EphemBlockchainTestSetup
import io.iohk.ethereum.db.storage.ArchiveNodeStorage
import io.iohk.ethereum.mpt.MerklePatriciaTrie
import org.scalatest.{FlatSpec, Matchers}

class BlockchainSpec extends FlatSpec with Matchers {

  "Blockchain" should "be able to store a block and return if if queried by hash" in new EphemBlockchainTestSetup {
    val validBlock = Fixtures.Blocks.ValidBlock.block
    blockchain.save(validBlock)
    val block = blockchain.getBlockByHash(validBlock.header.hash)
    assert(block.isDefined)
    assert(validBlock == block.get)
    val blockHeader = blockchain.getBlockHeaderByHash(validBlock.header.hash)
    assert(blockHeader.isDefined)
    assert(validBlock.header == blockHeader.get)
    val blockBody = blockchain.getBlockBodyByHash(validBlock.header.hash)
    assert(blockBody.isDefined)
    assert(validBlock.body == blockBody.get)
  }

  it should "be able to store a block and retrieve it by number" in new EphemBlockchainTestSetup {
    val validBlock = Fixtures.Blocks.ValidBlock.block
    blockchain.save(validBlock)
    val block = blockchain.getBlockByNumber(validBlock.header.number)
    assert(block.isDefined)
    assert(validBlock == block.get)
  }

  it should "be able to query a stored blockHeader by it's number" in new EphemBlockchainTestSetup {
    val validHeader = Fixtures.Blocks.ValidBlock.header
    blockchain.save(validHeader)
    val header = blockchain.getBlockHeaderByNumber(validHeader.number)
    assert(header.isDefined)
    assert(validHeader == header.get)
  }

  it should "not return a value if not stored" in new EphemBlockchainTestSetup {
    assert(blockchain.getBlockByNumber(Fixtures.Blocks.ValidBlock.header.number).isEmpty)
    assert(blockchain.getBlockByHash(Fixtures.Blocks.ValidBlock.header.hash).isEmpty)
  }

  it should "return an account given an address and a block number" in new EphemBlockchainTestSetup {
    val address = Address(42)
    val account = Account.empty(UInt256(7))

    val validHeader = Fixtures.Blocks.ValidBlock.header

    val emptyMpt = MerklePatriciaTrie[Address, Account](
      new ArchiveNodeStorage(storagesInstance.storages.nodeStorage)
    )

    val mptWithAcc = emptyMpt.put(address, account)
    val headerWithAcc = validHeader.copy(stateRoot = ByteString(mptWithAcc.getRootHash))

    blockchain.save(headerWithAcc)

    val retrievedAccount = blockchain.getAccount(address, headerWithAcc.number)
    retrievedAccount shouldEqual Some(account)
  }
}

Source File: SignedTransactionSpec.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.domain

import io.iohk.ethereum.crypto
import io.iohk.ethereum.crypto.generateKeyPair
import io.iohk.ethereum.domain.SignedTransaction.FirstByteOfAddress
import io.iohk.ethereum.nodebuilder.SecureRandomBuilder
import io.iohk.ethereum.vm.Generators
import org.scalacheck.Arbitrary
import org.scalatest.prop.PropertyChecks
import org.scalatest.{FlatSpec, Matchers}
import org.spongycastle.crypto.params.ECPublicKeyParameters

class SignedTransactionSpec extends FlatSpec with Matchers with PropertyChecks with SecureRandomBuilder {
  "SignedTransaction" should "correctly set pointSign for chainId with chain specific signing schema" in {
    forAll(Generators.transactionGen(), Arbitrary.arbitrary[Unit].map(_ => generateKeyPair(secureRandom))) {
      (tx, key) =>
        val chainId: Byte = 0x3d
        val allowedPointSigns = Set((chainId * 2 + 35).toByte, (chainId * 2 + 36).toByte)
        //byte 0 of encoded ECC point indicates that it is uncompressed point, it is part of spongycastle encoding
        val address = Address(crypto.kec256(key.getPublic.asInstanceOf[ECPublicKeyParameters].getQ.getEncoded(false).tail).drop(FirstByteOfAddress))
        val result = SignedTransaction.sign(tx, key, Some(chainId))

        allowedPointSigns should contain(result.signature.v)
        address shouldEqual result.senderAddress
    }
  }
}

Source File: EncryptedKeySpec.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.keystore

import org.scalatest.{FlatSpec, Matchers}
import io.iohk.ethereum.crypto
import io.iohk.ethereum.domain.Address
import io.iohk.ethereum.nodebuilder.SecureRandomBuilder

class EncryptedKeySpec extends FlatSpec with Matchers with SecureRandomBuilder {

  val gethKey =
    """{
      |  "id": "033b7a63-30f2-47fc-bbbe-d22925a14ab3",
      |  "address": "932245e1c40ec2026a2c7acc80befb68816cdba4",
      |  "crypto": {
      |    "cipher": "aes-128-ctr",
      |    "ciphertext": "8fb53f8695795d1f0480cad7954bd7a888392bb24c414b9895b4cb288b4897dc",
      |    "cipherparams": {
      |      "iv": "7a754cfd548a351aed270f6b1bfd306d"
      |    },
      |    "kdf": "scrypt",
      |    "kdfparams": {
      |      "dklen": 32,
      |      "n": 262144,
      |      "p": 1,
      |      "r": 8,
      |      "salt": "2321125eff8c3172a05a5947726004075b30e0a01534061fa5c13fb4e5e32465"
      |    },
      |    "mac": "6383677d3b0f34b1dcb9e1c767f8130daf6233266e35f28e00467af97bf2fbfa"
      |  },
      |  "version": 3
      |}
    """.stripMargin

  val parityKey =
    """{
      |  "id": "20909a42-09c4-0740-02dc-a0b8cbaea688",
      |  "version": 3,
      |  "Crypto": {
      |    "cipher": "aes-128-ctr",
      |    "cipherparams": {
      |      "iv": "68521b4d5fc5ecf83bbe24768a321fe5"
      |    },
      |    "ciphertext": "235ce2efb355a963eb838f42e3c30e59a00ab14030e66202d729f60fc7af57b3",
      |    "kdf": "pbkdf2",
      |    "kdfparams": {
      |      "c": 10240,
      |      "dklen": 32,
      |      "prf": "hmac-sha256",
      |      "salt": "b720278006c39e3ed01903421aad4ca3d3267f40ddc48bf4ec06429eb1c10fc5"
      |    },
      |    "mac": "f52a2c40173dea137695f40f0f6bed67fc7814e16639acdbff11092cc9f563d0"
      |  },
      |  "address": "04fecb5c49ee66fdbda1f196c120225bdd1ac35c",
      |  "name": "20909a42-09c4-0740-02dc-a0b8cbaea688",
      |  "meta": "{}"
      |}""".stripMargin


  "EncryptedKey" should "securely store private keys" in {
    val prvKey = crypto.secureRandomByteString(secureRandom, 32)
    val passphrase = "P4S5W0rd"
    val encKey = EncryptedKey(prvKey, passphrase, secureRandom)

    val json = EncryptedKeyJsonCodec.toJson(encKey)
    val decoded = EncryptedKeyJsonCodec.fromJson(json)

    decoded shouldEqual Right(encKey)
    decoded.flatMap(_.decrypt(passphrase)) shouldEqual Right(prvKey)
  }

  it should "decrypt a key encrypted by Geth" in {
    val encKey = EncryptedKeyJsonCodec.fromJson(gethKey)
    val prvKey = encKey.flatMap(_.decrypt("qwerty"))
    val address = prvKey.map(k => Address(crypto.kec256(crypto.pubKeyFromPrvKey(k))))
    address shouldEqual Right(Address("932245e1c40ec2026a2c7acc80befb68816cdba4"))
  }

  it should "decrypt a key encrypted by Parity" in {
    val encKey = EncryptedKeyJsonCodec.fromJson(parityKey)
    val prvKey = encKey.flatMap(_.decrypt("qwerty"))
    val address = prvKey.map(k => Address(crypto.kec256(crypto.pubKeyFromPrvKey(k))))
    address shouldEqual Right(Address("04fecb5c49ee66fdbda1f196c120225bdd1ac35c"))
  }
}

Source File: UtilsTest.scala From sparkMeasure with Apache License 2.0

5 votes

package ch.cern.sparkmeasure

import java.io.File

import scala.collection.mutable.ListBuffer

import org.scalatest.{FlatSpec, Matchers}

class UtilsTest extends FlatSpec with Matchers {

  val stageVals0 = StageVals(jobId = 1, jobGroup = "test", stageId= 2, name = "stageVal",
    submissionTime = 10, completionTime = 11, stageDuration = 12, numTasks = 13,
    executorRunTime = 14, executorCpuTime = 15,
    executorDeserializeTime = 16, executorDeserializeCpuTime = 17,
    resultSerializationTime = 18, jvmGCTime = 19, resultSize = 20, numUpdatedBlockStatuses = 21,
    diskBytesSpilled = 30, memoryBytesSpilled = 31, peakExecutionMemory = 32, recordsRead = 33,
    bytesRead = 34, recordsWritten = 35, bytesWritten = 36,
    shuffleFetchWaitTime = 40, shuffleTotalBytesRead = 41, shuffleTotalBlocksFetched = 42,
    shuffleLocalBlocksFetched = 43, shuffleRemoteBlocksFetched = 44, shuffleWriteTime = 45,
    shuffleBytesWritten = 46, shuffleRecordsWritten = 47
  )

  val taskVals0 = TaskVals(jobId = 1, jobGroup = "test", stageId = 2, index = 3, launchTime = 4, finishTime = 5,
    duration = 10, schedulerDelay = 11, executorId = "exec0", host = "host0", taskLocality = 12,
    speculative = false, gettingResultTime = 12, successful = true,
    executorRunTime = 14, executorCpuTime = 15,
    executorDeserializeTime = 16, executorDeserializeCpuTime = 17,
    resultSerializationTime = 18, jvmGCTime = 19, resultSize = 20, numUpdatedBlockStatuses = 21,
    diskBytesSpilled = 30, memoryBytesSpilled = 31, peakExecutionMemory = 32, recordsRead = 33,
    bytesRead = 34, recordsWritten = 35, bytesWritten = 36,
    shuffleFetchWaitTime = 40, shuffleTotalBytesRead = 41, shuffleTotalBlocksFetched = 42,
    shuffleLocalBlocksFetched = 43, shuffleRemoteBlocksFetched = 44, shuffleWriteTime = 45,
    shuffleBytesWritten = 46, shuffleRecordsWritten = 47
  )

  it should "write and read back StageVal (Java Serialization)" in {
    val file = File.createTempFile("stageVal", ".tmp")
    try {
      IOUtils.writeSerialized(file.getAbsolutePath, ListBuffer(stageVals0))
      val stageVals = IOUtils.readSerializedStageMetrics(file.getAbsolutePath)
      stageVals should have length 1
      stageVals.head shouldEqual stageVals0
    } finally {
      file.delete()
    }
  }

  it should "write and read back TaskVal (Java Serialization)" in {
    val file = File.createTempFile("taskVal", ".tmp")
    try {
      IOUtils.writeSerialized(file.getAbsolutePath, ListBuffer(taskVals0))
      val taskVals = IOUtils.readSerializedTaskMetrics(file.getAbsolutePath)
      taskVals should have length 1
      taskVals.head shouldEqual taskVals0
    } finally {
      file.delete()
    }
  }

  it should "write and read back StageVal JSON" in {
    val file = File.createTempFile("stageVal", ".json")
    try {
      IOUtils.writeSerializedJSON(file.getAbsolutePath, ListBuffer(stageVals0))
      val stageVals = IOUtils.readSerializedStageMetricsJSON(file.getAbsolutePath)
      stageVals should have length 1
      stageVals.head shouldEqual stageVals0
    } finally {
      file.delete()
    }
  }

  it should "write and read back TaskVal JSON" in {
    val file = File.createTempFile("taskVal", ".json")
    try {
      IOUtils.writeSerializedJSON(file.getAbsolutePath, ListBuffer(taskVals0))
      val taskVals = IOUtils.readSerializedTaskMetricsJSON(file.getAbsolutePath)
      taskVals should have length 1
      taskVals.head shouldEqual taskVals0
    } finally {
      file.delete()
    }
  }

}

Source File: TestSpec.scala From akka-serialization-test with Apache License 2.0

5 votes

package com.github.dnvriend

import akka.actor.{ ActorRef, ActorSystem, PoisonPill }
import akka.event.{ Logging, LoggingAdapter }
import akka.serialization.SerializationExtension
import akka.stream.{ ActorMaterializer, Materializer }
import akka.testkit.TestProbe
import akka.util.Timeout
import org.scalatest.concurrent.{ Eventually, ScalaFutures }
import org.scalatest.prop.PropertyChecks
import org.scalatest.{ BeforeAndAfterAll, FlatSpec, GivenWhenThen, Matchers }

import scala.concurrent.duration._
import scala.concurrent.{ ExecutionContext, Future }
import scala.util.Try

trait TestSpec extends FlatSpec
    with Matchers
    with GivenWhenThen
    with ScalaFutures
    with BeforeAndAfterAll
    with Eventually
    with PropertyChecks
    with AkkaPersistenceQueries
    with AkkaStreamUtils
    with InMemoryCleanup {

  implicit val timeout: Timeout = Timeout(10.seconds)
  implicit val system: ActorSystem = ActorSystem()
  implicit val ec: ExecutionContext = system.dispatcher
  implicit val mat: Materializer = ActorMaterializer()
  implicit val log: LoggingAdapter = Logging(system, this.getClass)
  implicit val pc: PatienceConfig = PatienceConfig(timeout = 50.seconds)
  val serialization = SerializationExtension(system)

  implicit class FutureToTry[T](f: Future[T]) {
    def toTry: Try[T] = Try(f.futureValue)
  }

  def killActors(actors: ActorRef*): Unit = {
    val probe = TestProbe()
    actors.foreach { actor ⇒
      probe watch actor
      actor ! PoisonPill
      probe expectTerminated actor
    }
  }

  override protected def afterAll(): Unit = {
    system.terminate()
    system.whenTerminated.toTry should be a 'success
  }
}

Source File: InterpreterLiteralExpressionTest.scala From feel-scala with Apache License 2.0

5 votes

package org.camunda.feel.interpreter.impl

import org.scalatest.{FlatSpec, Matchers}
import org.camunda.feel.syntaxtree._


class InterpreterLiteralExpressionTest
    extends FlatSpec
    with Matchers
    with FeelIntegrationTest {

  "A literal" should "be a number" in {

    eval("2") should be(ValNumber(2))
    eval("2.4") should be(ValNumber(2.4))
    eval("-3") should be(ValNumber(-3))
  }

  it should "be a string" in {

    eval(""" "a" """) should be(ValString("a"))
  }

  it should "be a boolean" in {

    eval("true") should be(ValBoolean(true))
  }

  it should "be null" in {

    eval("null") should be(ValNull)
  }

  it should "be a context" in {

    eval("{ a : 1 }")
      .asInstanceOf[ValContext]
      .context
      .variableProvider
      .getVariables should be(Map("a" -> ValNumber(1)))

    eval("""{ a:1, b:"foo" }""")
      .asInstanceOf[ValContext]
      .context
      .variableProvider
      .getVariables should be(Map("a" -> ValNumber(1), "b" -> ValString("foo")))

    // nested
    val nestedContext = eval("{ a : { b : 1 } }")
      .asInstanceOf[ValContext]
      .context
      .variableProvider
      .getVariable("a")
      .get

    nestedContext shouldBe a[ValContext]
    nestedContext
      .asInstanceOf[ValContext]
      .context
      .variableProvider
      .getVariables should be(Map("b" -> ValNumber(1)))
  }

  it should "be a list" in {

    eval("[1]") should be(ValList(List(ValNumber(1))))

    eval("[1,2]") should be(ValList(List(ValNumber(1), ValNumber(2))))

    // nested
    eval("[ [1], [2] ]") should be(
      ValList(List(ValList(List(ValNumber(1))), ValList(List(ValNumber(2))))))
  }

}

Source File: FacetedLuceneRDDImplicitsSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd.facets

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.testing.FavoriteCaseClass
import org.zouzias.spark.lucenerdd.{LuceneRDD, LuceneRDDKryoRegistrator}

class FacetedLuceneRDDImplicitsSpec  extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {

  var luceneRDD: LuceneRDD[_] = _


  override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  override def afterEach() {
    luceneRDD.close()
  }


  val elem = Array("fear", "death", "water", "fire", "house")
    .zipWithIndex.map{ case (str, index) =>
    FavoriteCaseClass(str, index, 10L, 12.3F, s"${str}@gmail.com")}


  "FacetedLuceneRDD(case class).count" should "return correct number of elements" in {
    val rdd = sc.parallelize(elem)
    val spark = SparkSession.builder().getOrCreate()
    import spark.implicits._
    val df = rdd.toDF()
    luceneRDD = FacetedLuceneRDD(df)
    luceneRDD.count should equal (elem.size)
  }

  "FacetedLuceneRDD(case class).fields" should "return all fields" in {
    val rdd = sc.parallelize(elem)
    val spark = SparkSession.builder().getOrCreate()
    import spark.implicits._
    val df = rdd.toDF()
    luceneRDD = FacetedLuceneRDD(df)

    luceneRDD.fields().size should equal(5)
    luceneRDD.fields().contains("name") should equal(true)
    luceneRDD.fields().contains("age") should equal(true)
    luceneRDD.fields().contains("myLong") should equal(true)
    luceneRDD.fields().contains("myFloat") should equal(true)
    luceneRDD.fields().contains("email") should equal(true)
  }

  "FacetedLuceneRDD(case class).termQuery" should "correctly search with TermQueries" in {
    val rdd = sc.parallelize(elem)
    val spark = SparkSession.builder().getOrCreate()
    import spark.implicits._
    val df = rdd.toDF()
    luceneRDD = FacetedLuceneRDD(df)

    val results = luceneRDD.termQuery("name", "water")
    results.count() should equal(1)
  }
}

Source File: LucenePrimitiveTypesSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}

class LucenePrimitiveTypesSpec extends FlatSpec with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {

  override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  def randomString(length: Int): String = scala.util.Random.alphanumeric.take(length).mkString
  val array = (1 to 24).map(randomString(_))

  var luceneRDD: LuceneRDD[_] = _

  override def afterEach() {
    luceneRDD.close()
  }

  

  "LuceneRDD" should "work with RDD[Array[String]]" in {
    val array = Array(Array("aaa", "aaa2"), Array("bbb", "bbb2"),
      Array("ccc", "ccc2"), Array("ddd"), Array("eee"))
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should be (array.length)
  }

  "LuceneRDD" should "work with RDD[Set[String]]" in {
    val array = Array(Set("aaa", "aaa2"), Set("bbb", "bbb2"),
      Set("ccc", "ccc2"), Set("ddd"), Set("eee"))
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should be (array.length)
  }

  "LuceneRDD" should "work with RDD[String]" in {
    val array = Array("aaa", "bbb", "ccc", "ddd", "eee")
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should be (array.length)
  }

  "LuceneRDD" should "work with RDD[Int]" in {
    val array = (1 to 22)
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should be (array.size)
  }

  "LuceneRDD" should "work with RDD[Float]" in {
    val array: IndexedSeq[Float] = (1 to 22).map(_.toFloat)
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should be (array.size)
  }

  "LuceneRDD" should "work with RDD[Double]" in {
    val array: IndexedSeq[Double] = (1 to 22).map(_.toDouble)
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should be (array.size)
  }

  "LuceneRDD" should "work with RDD[Long]" in {
    val array: IndexedSeq[Long] = (1 to 22).map(_.toLong)
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should equal (array.size)
  }

  "LuceneRDD" should "work with RDD[Map[String, String]]" in {
    val maps = List(Map( "a" -> "hello"), Map("b" -> "world"), Map("c" -> "how are you"))
    val rdd = sc.parallelize(maps)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should equal (maps.size)
    luceneRDD.termQuery("a", "hello").isEmpty() should equal (false)
    luceneRDD.prefixQuery("b", "wor").isEmpty() should equal (false)
    luceneRDD.prefixQuery("a", "no").isEmpty() should equal (true)
  }

  "LuceneRDD" should "work with RDD[String] and ignore null values" in {
    val array = Array("aaa", null, "ccc", null, "eee")
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should be (array.length)
  }

}

Source File: BlockingLinkageSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.lucene.index.Term
import org.apache.lucene.search.{Query, TermQuery}
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, SparkSession}
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.testing.Person

class BlockingLinkageSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {

  override val conf: SparkConf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  "LuceneRDD.blockEntityLinkage" should "deduplicate elements on unique elements" in {
    val spark = SparkSession.builder().getOrCreate()
    import spark.implicits._

    val peopleLeft: Array[Person] = Array("fear", "death", "water", "fire", "house")
      .zipWithIndex.map { case (str, index) =>
      val email = if (index % 2 == 0) "[email protected]" else "[email protected]"
      Person(str, index, email)
    }

    val peopleRight: Array[Person] = Array("fear", "death", "water", "fire", "house")
      .zipWithIndex.map { case (str, index) =>
      val email = if (index % 2 == 0) "[email protected]" else "[email protected]"
      Person(str, index, email)
    }

    val leftDF = sc.parallelize(peopleLeft).repartition(2).toDF()
    val rightDF = sc.parallelize(peopleRight).repartition(3).toDF()

    // Define a Lucene Term linker
    val linker: Row => Query = { row =>
      val name = row.getString(row.fieldIndex("name"))
      val term = new Term("name", name)

      new TermQuery(term)
    }


    val linked = LuceneRDD.blockEntityLinkage(leftDF, rightDF, linker,
      Array("email"), Array("email"))

    val linkedCount, dfCount = (linked.count, leftDF.count())

    linkedCount should equal(dfCount)

    // Check for correctness
    // Age is a unique index
    linked.collect().foreach { case (row, results) =>
      val leftAge, rightAge = (row.getInt(row.fieldIndex("age")),
        results.headOption.map(x => x.getInt(x.fieldIndex("age"))))

      leftAge should equal(rightAge)

    }
  }
}

Source File: LuceneRDDCustomCaseClassImplicitsSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.testing.Person

class LuceneRDDCustomCaseClassImplicitsSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {

  var luceneRDD: LuceneRDD[_] = _

  override def afterEach() {
    luceneRDD.close()
  }

  override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  val elem: Array[Person] = Array("fear", "death", "water", "fire", "house")
    .zipWithIndex.map{ case (str, index) => Person(str, index, s"${str}@gmail.com")}

  "LuceneRDD(case class).count" should "handle nulls properly" in {
    val elemsWithNulls = Array("fear", "death", "water", "fire", "house")
      .zipWithIndex.map{ case (str, index) => Person(str, index, null)}
    val rdd = sc.parallelize(elemsWithNulls)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count() should equal (elemsWithNulls.length)
  }

  "LuceneRDD(case class).count" should "return correct number of elements" in {
    val rdd = sc.parallelize(elem)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count() should equal (elem.length)
  }

  "LuceneRDD(case class).fields" should "return all fields" in {
    val rdd = sc.parallelize(elem)
    luceneRDD = LuceneRDD(rdd)

    luceneRDD.fields().size should equal(3)
    luceneRDD.fields().contains("name") should equal(true)
    luceneRDD.fields().contains("age") should equal(true)
    luceneRDD.fields().contains("email") should equal(true)
  }

  "LuceneRDD(case class).termQuery" should "correctly search with TermQueries" in {
    val rdd = sc.parallelize(elem)
    luceneRDD = LuceneRDD(rdd)

    val results = luceneRDD.termQuery("name", "water")
    results.count() should equal(1)
  }
}

Source File: ShapeLuceneRDDImplicitsSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd.spatial.shape.implicits

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.spatial.shape.{ShapeLuceneRDD, _}
import org.zouzias.spark.lucenerdd.testing.LuceneRDDTestUtils
import org.zouzias.spark.lucenerdd._
import org.zouzias.spark.lucenerdd.spatial.shape.context.ContextLoader

class ShapeLuceneRDDImplicitsSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext
  with ContextLoader
  with LuceneRDDTestUtils {

  val Radius: Double = 5D

  override val conf = ShapeLuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  "ShapeLuceneRDDImplicits" should "implicitly convert to point" in {

    val rdd = sc.parallelize(cities)
    val shapeRDD = ShapeLuceneRDD(rdd)

    shapeRDD.count should equal(cities.length)
  }

  "ShapeLuceneRDDImplicits" should "implicitly convert to circle" in {

    val circleCities: Array[(((Double, Double), Double), String)]
    = cities.map(convertToCircle)
    val rdd = sc.parallelize(circleCities)
    val shapeRDD = ShapeLuceneRDD(rdd)

    shapeRDD.count should equal(circleCities.length)
  }

  "ShapeLuceneRDDImplicits" should "implicitly convert to rectangle" in {

    val rectangleCities = cities.map(convertToRectangle)
    val rdd = sc.parallelize(rectangleCities)
    val shapeRDD = ShapeLuceneRDD(rdd)

    shapeRDD.count should equal(rectangleCities.length)
  }

  "ShapeLuceneRDDImplicits" should "implicitly convert POINTS from WKT" in {
    val sparkSession = SparkSession.builder().getOrCreate()
    val citiesDF = sparkSession.read.parquet("data/world-cities-points.parquet")
    import sparkSession.implicits._
    val citiesRDD = citiesDF.map(row =>
      (row.getString(2), (row.getString(0), row.getString(1))))

    val total = citiesDF.count()
    total > 0 should equal(true)

    val shapeRDD = ShapeLuceneRDD(citiesRDD)

    shapeRDD.count > 0 should equal(true)
  }

  "ShapeLuceneRDDImplicits" should "implicitly convert BBOX from WKT" in {
    val sparkSession = SparkSession.builder().getOrCreate()
    import sparkSession.implicits._
    val countriesDF = sparkSession.read.parquet("data/countries-bbox.parquet")
    val citiesRDD = countriesDF.map(row =>
      (row.getString(2), (row.getString(0), row.getString(1))))

    val total = countriesDF.count()
    total > 0 should equal(true)

    val shapeRDD = ShapeLuceneRDD(citiesRDD)

    shapeRDD.count > 0 should equal(true)
  }

  "ShapeLuceneRDDImplicits" should "implicitly convert to polygon" in {

    val polygonCities = cities.map(convertToPolygon(_, Radius))
    val rdd = sc.parallelize(polygonCities)
    val shapeRDD = ShapeLuceneRDD(rdd)

    shapeRDD.count should equal(polygonCities.length)
  }

}

Source File: AnalyzersConfigurableSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd.analyzers

import org.apache.lucene.analysis.en.EnglishAnalyzer
import org.apache.lucene.analysis.el.GreekAnalyzer
import org.apache.lucene.analysis.de.GermanAnalyzer
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}

class AnalyzersConfigurableSpec extends FlatSpec with Matchers
  with BeforeAndAfterEach
  with AnalyzerConfigurable {

  "AnalyzersConfigurable.getAnalyzer" should "return english analyzer with 'en' input" in {
    val englishAnalyzer = getAnalyzer(Some("en"))
    englishAnalyzer shouldNot equal(null)
    englishAnalyzer.isInstanceOf[EnglishAnalyzer] should equal(true)
  }

  "AnalyzersConfigurable.getAnalyzer" should
    "return custom test analyzer with 'org.apache.lucene.analysis.el.GreekAnalyzer'" in {
    val greekAnalyzer = getAnalyzer(Some("org.apache.lucene.analysis.el.GreekAnalyzer"))
    greekAnalyzer shouldNot equal(null)
    greekAnalyzer.isInstanceOf[GreekAnalyzer] should equal(true)
  }

  "AnalyzersConfigurable.getAnalyzer" should
    "return custom test analyzer with 'org.apache.lucene.analysis.de.GermanAnalyzer'" in {
    val deutschAnalyzer = getAnalyzer(Some("org.apache.lucene.analysis.de.GermanAnalyzer"))
    deutschAnalyzer shouldNot equal(null)
    deutschAnalyzer.isInstanceOf[GermanAnalyzer] should equal(true)
  }
}

Source File: LuceneRDDSearchSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.testing.LuceneRDDTestUtils

class LuceneRDDSearchSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with LuceneRDDTestUtils
  with SharedSparkContext {

  var luceneRDD: LuceneRDD[_] = _

  override def Radius: Double = 0

  override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  override def afterEach() {
    luceneRDD.close()
  }


  val First = "_1"

  val array = List("fear", "death", " apologies", "romance", "tree", "fashion", "fascism")

  "LuceneRDD.query" should "use phrase query syntax" in {
    val words = Array("aabaa", "aaacaa", "aadaa", "aaaa", "qwerty")
    val rdd = sc.parallelize(words)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.query("_1:aadaa").isEmpty() should equal (false)
    luceneRDD.query("_1:aa*").count() should equal (4)
    luceneRDD.query("_1:q*").count() should equal (1)
  }

  "LuceneRDD.count" should "return correct number of elements" in {
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should equal (array.size)
  }

  "LuceneRDD.termQuery" should "correctly search with TermQueries" in {
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(First, array(1))
    results.count() should equal (1)
  }

  "LuceneRDD.prefixQuery" should "correctly search with PrefixQueries" in {

    val prefices = Array("aaaabcd", "aaadcb", "aaz", "az", "qwerty")
    val rdd = sc.parallelize(prefices)
    luceneRDD = LuceneRDD(rdd)

    luceneRDD.prefixQuery(First, "a").count() should equal (4)
    luceneRDD.prefixQuery(First, "aa").count() should equal(3)
    luceneRDD.prefixQuery(First, "aaa").count() should equal (2)
    luceneRDD.prefixQuery(First, "aaaa").count() should equal (1)
  }

  "LuceneRDD.fuzzyQuery" should "correctly search with FuzzyQuery" in {
    val rdd = sc.parallelize(array)
    luceneRDD = LuceneRDD(rdd)

    luceneRDD.fuzzyQuery(First, "fear", 1).count() should equal (1)
    luceneRDD.fuzzyQuery(First, "fascsm", 1).count() should equal(1)
    luceneRDD.fuzzyQuery(First, "dath", 1).count() should equal (1)
    luceneRDD.fuzzyQuery(First, "tree", 1).count() should equal (1)
  }

  

  "LuceneRDD.phraseQuery" should "correctly search with PhraseQuery" in {
    val phrases = Array("hello world", "the company name was", "highlight lucene")
    val rdd = sc.parallelize(phrases)
    luceneRDD = LuceneRDD(rdd)

    luceneRDD.phraseQuery(First, "company name", 10).count() should equal (1)
    luceneRDD.phraseQuery(First, "hello world", 10).count() should equal (1)
    luceneRDD.phraseQuery(First, "highlight lucene", 10).count() should equal(1)
  }
}

Source File: BlockingDedupSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.lucene.index.Term
import org.apache.lucene.search.{Query, TermQuery}
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, SparkSession}
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.testing.Person

class BlockingDedupSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {

  override val conf: SparkConf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  "LuceneRDD.blockDedup" should "deduplicate elements on unique elements" in {
    val spark = SparkSession.builder().getOrCreate()
    import spark.implicits._

    val people: Array[Person] = Array("fear", "death", "water", "fire", "house")
      .zipWithIndex.map { case (str, index) =>
      val email = if (index % 2 == 0) "[email protected]" else "[email protected]"
      Person(str, index, email)
    }
    val df = sc.parallelize(people).repartition(2).toDF()

    val linker: Row => Query = { row =>
      val name = row.getString(row.fieldIndex("name"))
      val term = new Term("name", name)

      new TermQuery(term)
    }


    val linked = LuceneRDD.blockDedup(df, linker, Array("email"))

    val linkedCount, dfCount = (linked.count, df.count())

    linkedCount should equal(dfCount)

    // Check for correctness
    // Age is a unique index
    linked.collect().foreach { case (row, results) =>
      val leftAge, rightAge = (row.getInt(row.fieldIndex("age")),
        results.headOption.map(x => x.getInt(x.fieldIndex("age"))))

      leftAge should equal(rightAge)

    }
  }
}

Source File: LuceneDocToSparkRowpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import java.io.{Reader, StringReader}

import org.apache.lucene.document.{Document, DoublePoint, Field, FloatPoint, IntPoint, LongPoint, StoredField, TextField}
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.models.SparkScoreDoc
import org.zouzias.spark.lucenerdd.models.SparkScoreDoc.{DocIdField, ScoreField, ShardField}

import scala.collection.JavaConverters._

class LuceneDocToSparkRowpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach {

  val (score: Float, docId: Int, shardIndex: Int) = (1.0f, 1, 2)
  val float: Float = 20.001f
  val double: Double = 10.1000000001D

  def generate_doc(): Document = {
    val doc = new Document()

    // Add long field
    doc.add(new LongPoint("longField", 10))
    doc.add(new StoredField("longField", 10))

    doc.add(new FloatPoint("floatField", float))
    doc.add(new StoredField("floatField", float))

    doc.add(new IntPoint("intField", 9))
    doc.add(new StoredField("intField", 9))

    doc.add(new DoublePoint("doubleField", double))
    doc.add(new StoredField("doubleField", double))

    doc.add(new TextField("textField", "hello world", Field.Store.NO))
    doc.add(new StoredField("textField", "hello world"))

    doc
  }

  private val doc: Document = generate_doc()

  val sparkScoreDoc = SparkScoreDoc(score, docId, shardIndex, doc)


  "SparkScoreDoc.toRow" should "return correct score" in {
    val row = sparkScoreDoc.toRow()
    row.getFloat(row.fieldIndex(ScoreField)) should equal(score)
  }

  "SparkScoreDoc.toRow" should "return correct docId" in {
    val row = sparkScoreDoc.toRow()
    row.getInt(row.fieldIndex(DocIdField)) should equal(docId)
  }

  "SparkScoreDoc.toRow" should "return correct shard number" in {
    val row = sparkScoreDoc.toRow()
    row.getInt(row.fieldIndex(ShardField)) should equal(shardIndex)
  }

  "SparkScoreDoc.toRow" should "return correct number of fields" in {
    val row = sparkScoreDoc.toRow()
    row.getFields().asScala.count(_.fieldType().stored()) should equal(8)
  }

  "SparkScoreDoc.toRow" should "set correctly DoublePoint" in {
    val row = sparkScoreDoc.toRow()
    row.getDouble(row.fieldIndex("doubleField")) should equal(double)
  }

  "SparkScoreDoc.toRow" should "set correctly FloatPoint" in {
    val row = sparkScoreDoc.toRow()
    row.getFloat(row.fieldIndex("floatField")) should equal(float)
  }
}

Source File: LuceneRDDTermVectorsSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import org.zouzias.spark.lucenerdd.testing.LuceneRDDTestUtils

class LuceneRDDTermVectorsSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with LuceneRDDTestUtils
  with SharedSparkContext {

  var luceneRDD: LuceneRDD[_] = _

  override def Radius: Double = 0

  override val conf: SparkConf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  override def afterEach() {
    luceneRDD.close()
  }

  val First = "_1"

  "LuceneRDD.termVectors" should "return valid terms" in {

    val words = Array("To smile or not to smile smile",
      "Don't cry because it's over, smile because it happened",
      "So many books, so little time",
      "A room without books is like a body without a soul",
      "If you tell the truth, you don't have to remember anything")
    val rdd = sc.parallelize(words)

    luceneRDD = LuceneRDD(rdd)

    val terms = luceneRDD.termVectors(First).collect()

    // These terms should exist
    terms.exists(_.term.compareToIgnoreCase("time") == 0) should equal(true)
    terms.exists(_.term.compareToIgnoreCase("room") == 0) should equal(true)
    terms.exists(_.term.compareToIgnoreCase("soul") == 0) should equal(true)
    terms.exists(_.term.compareToIgnoreCase("smile") == 0) should equal(true)

    terms.exists(t => (t.term.compareToIgnoreCase("smile") == 0)
      && t.count == 3) should equal (true)
    terms.exists(t => (t.term.compareToIgnoreCase("becaus") == 0)
      && t.count == 2) should equal (true)
  }
}

Source File: LuceneRDDTuplesSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import org.scalatest.{FlatSpec, Matchers}

class LuceneRDDTuplesSpec extends FlatSpec with Matchers with SharedSparkContext {

  val First = "_1"
  val Second = "_2"

  val array = List("fear", "death", " apology", "romance", "tree", "fashion", "fascism")


  override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  "LuceneRDD" should "work with Tuple2" in {
    val rdd = sc.parallelize(array).map(x => (x, x))
    val luceneRDD = LuceneRDD(rdd)
    luceneRDD.count should equal (array.size)
  }

  "LuceneRDD" should "work with Tuple3" in {
    val rdd = sc.parallelize(array).map(x => (x, x, x))
    val luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(Second, array(1))
    results.count should equal (1)
  }

  "LuceneRDD" should "work with Tuple4" in {
    val rdd = sc.parallelize(array).map(x => (x, x, x, x))
    val luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(Second, array(1))
    results.count should equal (1)
  }

  "LuceneRDD" should "work with Tuple5" in {
    val rdd = sc.parallelize(array).map(x => (x, x, x, x, x))
    val luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(Second, array(1))
    results.count should equal (1)
  }

  "LuceneRDD" should "work with Tuple6" in {
    val rdd = sc.parallelize(array).map(x => (x, x, x, x, x, x))
    val luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(Second, array(1))
    results.count should equal (1)
  }

  "LuceneRDD" should "work with Tuple7" in {
    val rdd = sc.parallelize(array).map(x => (x, x, 2.0d, 1.0d, x, 1, x))
    val luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(First, array.head)
    results.count should equal (1)
  }

  "LuceneRDD" should "work with Tuple8" in {
    val rdd = sc.parallelize(array).map(x => (x, x, 2.0d, 1.0d, x, 1, x, 3.4))
    val luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(First, array(1))
    results.count should equal (1)
  }

  "LuceneRDD" should "work with mixed types in Tuples" in {
    val rdd = sc.parallelize(array).map(x => (x, 1, x, 2L, x, 3.0F))
    val luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD.termQuery(First, array(1))
    results.count should equal (1)
  }
}

Source File: LuceneRDDMoreLikeThisSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import scala.collection.JavaConverters._
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}

import scala.io.Source

class LuceneRDDMoreLikeThisSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {

  var luceneRDD: LuceneRDD[_] = _


  override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  override def afterEach() {
    luceneRDD.close()
  }

  "LuceneRDD.moreLikeThis" should "return relevant documents" in {
    val words: Seq[String] = Source.fromFile("src/test/resources/alice.txt")
      .getLines().map(_.toLowerCase).toSeq
    val rdd = sc.parallelize(words)
    luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD
      .moreLikeThis("_1", "alice adventures wonderland", 1, 1)
      .collect()

    results.length > 0 should equal(true)
    val firstDoc = results.head
    val x = firstDoc.getString(firstDoc.fieldIndex("_1"))

    x.contains("alice") &&
      x.contains("wonderland") &&
      x.contains("adventures") should equal(true)

    val lastDoc = results.last
    val y = lastDoc.getString(lastDoc.fieldIndex("_1"))


      y.contains("alice") &&
        !y.contains("wonderland") &&
        !y.contains("adventures") should equal(true)

  }
}

Source File: ProxyOptionsTest.scala From jvm-toxcore-api with GNU General Public License v3.0

5 votes

package im.tox.tox4j.core.options

import im.tox.tox4j.core.enums.ToxProxyType
import org.scalatest.FlatSpec

@SuppressWarnings(Array("org.wartremover.warts.Equals"))
final class ProxyOptionsTest extends FlatSpec {

  "proxy options" should "not allow negative ports" in {
    intercept[IllegalArgumentException] {
      ProxyOptions.Http("localhost", -1)
    }
    intercept[IllegalArgumentException] {
      ProxyOptions.Socks5("localhost", -1)
    }
  }

  it should "allow the port to be 0" in {
    ProxyOptions.Http("localhost", 0)
    ProxyOptions.Socks5("localhost", 0)
  }

  it should "allow the port to be 65535" in {
    ProxyOptions.Http("localhost", 65535)
    ProxyOptions.Socks5("localhost", 65535)
  }

  it should "not allow the port to be greater than 65535" in {
    intercept[IllegalArgumentException] {
      ProxyOptions.Http("localhost", 65536)
    }
    intercept[IllegalArgumentException] {
      ProxyOptions.Socks5("localhost", 65536)
    }
  }

  it should "produce the right low level enum values" in {
    assert(ProxyOptions.None.proxyType == ToxProxyType.NONE)
    assert(ProxyOptions.Http("localhost", 1).proxyType == ToxProxyType.HTTP)
    assert(ProxyOptions.Socks5("localhost", 1).proxyType == ToxProxyType.SOCKS5)
  }

}

Source File: ToxOptionsTest.scala From jvm-toxcore-api with GNU General Public License v3.0

5 votes

package im.tox.tox4j.core.options

import org.scalatest.FlatSpec

final class ToxOptionsTest extends FlatSpec {

  "tox options" should "not allow negative ports" in {
    intercept[IllegalArgumentException] {
      ToxOptions(startPort = -1)
    }
    intercept[IllegalArgumentException] {
      ToxOptions(endPort = -1)
    }
    intercept[IllegalArgumentException] {
      ToxOptions(tcpPort = -1)
    }
  }

  it should "allow the port to be 0" in {
    ToxOptions(startPort = 0, endPort = 0, tcpPort = 0)
  }

  it should "allow the port to be 65535" in {
    ToxOptions(startPort = 65535, endPort = 65535, tcpPort = 65535)
  }

  it should "not allow the port to be greater than 65535" in {
    intercept[IllegalArgumentException] {
      ToxOptions(startPort = 65536)
    }
    intercept[IllegalArgumentException] {
      ToxOptions(endPort = 65536)
    }
    intercept[IllegalArgumentException] {
      ToxOptions(tcpPort = 65536)
    }
  }

  it should "require startPort <= endPort" in {
    intercept[IllegalArgumentException] {
      ToxOptions(startPort = 2, endPort = 1)
    }
  }

}

Source File: CounterEtlFunctionsSpec.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.counter.loader.core

import com.typesafe.config.ConfigFactory
import org.apache.s2graph.core.schema.{Label, Service}
import org.apache.s2graph.core.types.HBaseType
import org.apache.s2graph.core.{S2Graph, Management}
import org.apache.s2graph.counter.models.DBModel
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.concurrent.ExecutionContext.Implicits.global

class CounterEtlFunctionsSpec extends FlatSpec with BeforeAndAfterAll with Matchers {
  val config = ConfigFactory.load()
  val cluster = config.getString("hbase.zookeeper.quorum")
  DBModel.initialize(config)

  val graph = new S2Graph(config)(global)
  val management = new Management(graph)

  override def beforeAll: Unit = {
    management.createService("test", cluster, "test", 1, None, "gz")
    management.createLabel("test_case", "test", "src", "string", "test", "tgt", "string", true, "test", Nil, Nil, "weak", None, None, HBaseType.DEFAULT_VERSION, false, "gz")
  }

  override def afterAll: Unit = {
    Label.delete(Label.findByName("test_case", false).get.id.get)
    Service.delete(Service.findByName("test", false).get.id.get)
  }

  "CounterEtlFunctions" should "parsing log" in {
    val data =
      """
        |1435107139287	insert	e	aaPHfITGUU0B_150212123559509	abcd	test_case	{"cateid":"100110102","shopid":"1","brandid":""}
        |1435106916136	insert	e	Tgc00-wtjp2B_140918153515441	efgh	test_case	{"cateid":"101104107","shopid":"2","brandid":""}
      """.stripMargin.trim.split('\n')
    val items = {
      for {
        line <- data
        item <- CounterEtlFunctions.parseEdgeFormat(line)
      } yield {
        item.action should equal("test_case")
        item
      }
    }

    items should have size 2
  }
}

Source File: ContractInjectionTests.scala From dsentric with Apache License 2.0

5 votes

package dsentric

import dsentric._
import org.scalatest.{FlatSpec, FunSpec, Matchers}

class ContractInjectionTests extends FlatSpec with Matchers {

  import PessimisticCodecs._
  import Dsentric._

  object Flat extends Contract {
    val one = \?[String]
    val two = \![Int](123)
    val _three = \![String]("three", "bob")
  }

  object FlatNoDefaults extends Contract {
    val one = \?[String]
  }

  object Nested extends Contract {
    val nest = new \\ {
      val one = \?[String]
      val two = \![Int](123)
      val _three = \![String]("three", "bob")
    }
  }

  object NestedNoDefaults extends Contract {
    val nest = new \\ {
      val one = \?[String]
    }
  }

  "$applyDefaults" should "add a missing value to a dobject only if it has a default" in {
    val dObject = DObject.empty
    val res = Flat.$applyDefaults(dObject)
    Flat.one.$get(res) shouldBe None
    Flat.two.$get(res) shouldBe 123
  }
  "$applyDefaults" should "does nothing if no defaults in flat structure" in {
    val dObject = DObject.empty
    val res = FlatNoDefaults.$applyDefaults(dObject)
    res shouldBe DObject.empty
  }
  "$applyDefaults" should "not override an existing property in a dobject with a default" in {
    val dObject = DObject("two" -> Data(321))
    val res = Flat.$applyDefaults(dObject)
    Flat.two.$get(res) shouldBe 321
  }
  "$applyDefaults" should "work when there is a name override" in {
    val dObject1 = DObject.empty
    val res1 = Flat.$applyDefaults(dObject1)
    Flat._three.$get(res1) shouldBe "bob"

    val dObject2 = DObject("three" -> Data("sally"))
    val res2 = Flat.$applyDefaults(dObject2)
    Flat._three.$get(res2) shouldBe "sally"
  }

  "$applyDefaults" should "add a missing value in a nested structure" in {
    val dObject = DObject.empty
    val res = Nested.$applyDefaults(dObject)
    Nested.nest.two.$get(res) shouldBe 123
  }
  "$applyDefaults" should "work when there is a name override in a nested structure" in {
    val dObject1 = DObject.empty
    val res1 = Nested.$applyDefaults(dObject1)
    Nested.nest._three.$get(res1) shouldBe "bob"

    val dObject2 = DObject("nest" -> DObject("three" -> Data("sally")))
    val res2 = Nested.$applyDefaults(dObject2)
    Nested.nest._three.$get(res2) shouldBe "sally"
  }
  "$applyDefaults" should "does nothing if no defaults in nested structure" in {
    val dObject = DObject("non" -> Data("bob"))
    val res = NestedNoDefaults.$applyDefaults(dObject)
    res shouldBe dObject
  }


}

Source File: PailDataSourceSpec.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.spark.pail

import java.util

import com.backtype.hadoop.pail.{PailFormatFactory, PailSpec, PailStructure}
import com.backtype.support.{Utils => PailUtils}
import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec}
import org.scalatest.Matchers._

import scala.collection.JavaConverters._
import scala.util.Random

case class User(name: String, age: Int)

class UserPailStructure extends PailStructure[User] {
  override def isValidTarget(dirs: String*): Boolean = true

  override def getType: Class[_] = classOf[User]

  override def serialize(user: User): Array[Byte] = PailUtils.serialize(user)

  override def getTarget(user: User): util.List[String] = List(user.age % 10).map(_.toString).asJava

  override def deserialize(serialized: Array[Byte]): User = PailUtils.deserialize(serialized).asInstanceOf[User]
}

class PailDataSourceSpec extends FlatSpec with BeforeAndAfterAll with PailDataSource {
  private var spark: SparkSession = _

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    spark = SparkSession.builder().master("local[2]").appName("PailDataSource").getOrCreate()
  }

  val userPailSpec = new PailSpec(PailFormatFactory.SEQUENCE_FILE, new UserPailStructure)

  "PailBasedReaderWriter" should "read/write user records from/into pail" in {
    val output = Files.createTempDir()
    val users = (1 to 100).map { index => User(s"foo$index", Random.nextInt(40))}
    spark.sparkContext.parallelize(users)
      .saveAsPail(output.getAbsolutePath, userPailSpec)

    val input = output.getAbsolutePath
    val total = spark.sparkContext.pailFile[User](input)
      .map(u => u.name)
      .count()

    total should be(100)
    FileUtils.deleteDirectory(output)
  }
}

Source File: ParquetAvroDataSourceSpec.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.spark.parquet

import java.io.File

import com.google.common.io.Files
import com.indix.utils.spark.parquet.avro.ParquetAvroDataSource
import org.apache.commons.io.FileUtils
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.apache.spark.sql.SparkSession
import org.scalactic.Equality
import org.scalatest.Matchers.{be, convertToAnyShouldWrapper, equal}
import org.scalatest.{BeforeAndAfterAll, FlatSpec}
import java.util.{Arrays => JArrays}

case class SampleAvroRecord(a: Int, b: String, c: Seq[String], d: Boolean, e: Double, f: collection.Map[String, String], g: Array[Byte])

class ParquetAvroDataSourceSpec extends FlatSpec with BeforeAndAfterAll with ParquetAvroDataSource {
  private var spark: SparkSession = _
  implicit val sampleAvroRecordEq = new Equality[SampleAvroRecord] {
    override def areEqual(left: SampleAvroRecord, b: Any): Boolean = b match {
      case right: SampleAvroRecord =>
        left.a == right.a &&
          left.b == right.b &&
          Equality.default[Seq[String]].areEqual(left.c, right.c) &&
          left.d == right.d &&
          left.e == right.e &&
          Equality.default[collection.Map[String, String]].areEqual(left.f, right.f) &&
          JArrays.equals(left.g, right.g)
      case _ => false
    }
  }

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    spark = SparkSession.builder().master("local[2]").appName("ParquetAvroDataSource").getOrCreate()
  }

  override protected def afterAll(): Unit = {
    try {
      spark.sparkContext.stop()
    } finally {
      super.afterAll()
    }
  }

  "AvroBasedParquetDataSource" should "read/write avro records as ParquetData" in {

    val outputLocation = Files.createTempDir().getAbsolutePath + "/output"

    val sampleRecords: Seq[SampleAvroRecord] = Seq(
      SampleAvroRecord(1, "1", List("a1"), true, 1.0d, Map("a1" -> "b1"), "1".getBytes),
      SampleAvroRecord(2, "2", List("a2"), false, 2.0d, Map("a2" -> "b2"), "2".getBytes),
      SampleAvroRecord(3, "3", List("a3"), true, 3.0d, Map("a3" -> "b3"), "3".getBytes),
      SampleAvroRecord(4, "4", List("a4"), true, 4.0d, Map("a4" -> "b4"), "4".getBytes),
      SampleAvroRecord(5, "5", List("a5"), false, 5.0d, Map("a5" -> "b5"), "5".getBytes)
    )

    val sampleDf = spark.createDataFrame(sampleRecords)

    sampleDf.rdd.saveAvroInParquet(outputLocation, sampleDf.schema, CompressionCodecName.GZIP)

    val sparkVal = spark

    import sparkVal.implicits._

    val records: Array[SampleAvroRecord] = spark.read.parquet(outputLocation).as[SampleAvroRecord].collect()

    records.length should be(5)
    // We use === to use the custom Equality defined above for comparing Array[Byte]
    // Ref - https://github.com/scalatest/scalatest/issues/491
    records.sortBy(_.a) === sampleRecords.sortBy(_.a)

    FileUtils.deleteDirectory(new File(outputLocation))
  }

}

Source File: RocksMapTest.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.store

import java.io.Serializable
import java.nio.file.{Paths, Files}

import org.apache.commons.io.FileUtils
import org.scalatest.{Matchers, FlatSpec}


case class TestObject(a: Int, b: String, c: Array[Int], d: Array[String]) extends Serializable {

  def equals(other: TestObject): Boolean = {
    this.a.equals(other.a) && this.b.equals(other.b) && this.c.sameElements(other.c) && this.d.sameElements(other.d)
  }

}

case class ComplexTestObject(a: Int, b: TestObject) extends Serializable {
  def equals(other: ComplexTestObject): Boolean = {
    this.a.equals(other.a) && this.b.equals(other.b)
  }
}

class RocksMapTest extends FlatSpec with Matchers {

  "RocksMap" should "serialize and deserialize the keys and values" in {
    val db = new RocksMap("test")

    val a: Int = 1
    val b: String = "hello"
    val c: Array[Int] = Array(1, 2, 3)

    val d: Array[String] = Array("a", "b", "c")

    val serialized_a = db.serialize(a)
    val serialized_b = db.serialize(b)
    val serialized_c = db.serialize(c)
    val serialized_d = db.serialize(d)
    val serialized_TestObject = db.serialize(TestObject(a, b, c, d))
    val serialized_ComplexObject = db.serialize(ComplexTestObject(a, TestObject(a, b, c, d)))

    db.deserialize[Int](serialized_a) should be(a)
    db.deserialize[String](serialized_b) should be(b)
    db.deserialize[Array[Int]](serialized_c) should be(c)
    db.deserialize[Array[String]](serialized_d) should be(d)
    db.deserialize[TestObject](serialized_TestObject).equals(TestObject(a, b, c, d)) should be(true)
    db.deserialize[ComplexTestObject](serialized_ComplexObject).equals(ComplexTestObject(a, TestObject(a, b, c, d))) should be(true)
    db.drop()
    db.close()
  }

  it should "put and get values" in {
    val db = new RocksMap("test")

    db.put(1, 1.0)
    db.get[Int, Double](1).getOrElse(0) should be(1.0)
    db.clear()
    db.drop()
    db.close()
  }

  it should "remove values" in {
    val db = new RocksMap("test")

    db.put(1, 1L)
    db.get[Int, Long](1).getOrElse(0) should be(1L)
    db.remove(1)
    db.get[Int, Long](1) should be(None)
    db.drop()
    db.close()
  }

  it should "clear all the values" in {
    val db = new RocksMap(name = "test")
    db.put(1, "hello")
    db.put(2, "yello")
    db.get(1) should not be (None)
    db.get(2) should not be (None)
    db.clear()
    db.get(1) should be(None)
    db.get(2) should be(None)
    db.drop()
    db.close()
  }

  it should "clear the data files when drop is called" in {
    val db = new RocksMap(name = "test")
    Files.exists(Paths.get(db.pathString)) should be (true)
    db.drop()
    Files.exists(Paths.get(db.pathString)) should be (false)
    db.close()
  }


}

Source File: UrlUtilsSpec.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.scalatest.FlatSpec
import org.scalatest.Matchers._
import scala.collection.JavaConverters._

class UrlUtilsSpec extends FlatSpec {

  "UrlUtils#toHostname" should "return the hostname of any given url" in {
    UrlUtils.toHostname("http://www.google.com") should be(Some("www.google.com"))
    UrlUtils.toHostname("https://www.google.com") should be(Some("www.google.com"))
    UrlUtils.toHostname("www.google.com/abc") should be(None)
  }

  "UrlUtils#toQueryMap" should "return the query params from a url as a scala map" in {
    val resMap = UrlUtils.toQueryMap("http://google.com/?query=hello&lang=en&somekey=value&")
    resMap.size should be (3)
    resMap.head should be ("query", "hello")

    val resMap1 = UrlUtils.toQueryMap("http://google.com/???query=hello&lang=en&somekey=value&")
    resMap1.size should be (3)
    resMap1.head should be ("??query", "hello")

    val resMap2 = UrlUtils.toQueryMap("http://google.com/")
    resMap2.size should be (0)

    val resMap3 = UrlUtils.toQueryMap("http://uae.souq.com/ae-en/educational-book/national-park-service/english/a-19-1401/l/?ref=nav?ref=nav&page=23&msg=foo%20bar")
    resMap3.size should be (3)
    resMap3 should contain ("ref" -> "nav?ref=nav")
    resMap3 should contain ("page" -> "23")
    resMap3 should contain ("msg" -> "foo bar")
  }

  "UrlUtils#isValid" should "return true/false given the url is valid" in {
    UrlUtils.isValid("google.coma") should be(false)
    UrlUtils.isValid("https://google.com") should be(true)
  }

  "UrlUtils#resolve" should "resolve relative urls against the base url" in {
    UrlUtils.resolve("http://google.com/", "shopping") should be("http://google.com/shopping")
  }

  "UrlUtils#decode" should "UTF-8 encoded urls to unicode strings" in {
    UrlUtils.decode("http%3A%2F%2Fwww.example.com%2Fd%C3%BCsseldorf%3Fneighbourhood%3DL%C3%B6rick") should be ("http://www.example.com/düsseldorf?neighbourhood=Lörick")
  }

  "UrlUtils#encode" should "UTF-8 decoded urls to unicode strings" in {
    UrlUtils.encode("http://www.example.com/düsseldorf?neighbourhood=Lörick") should be ("http%3A%2F%2Fwww.example.com%2Fd%C3%BCsseldorf%3Fneighbourhood%3DL%C3%B6rick")
  }

  "UrlUtils#encodeSpaces" should "UTF-8 decoded urls to unicode strings" in {
    UrlUtils.encode("word1 abcd") should be ("word1%20abcd")
  }

  "UrlUtils#stripHashes" should "UTF-8 decoded urls to unicode strings" in {
    UrlUtils.stripHashes("http://www.example.com/url#fragment") should be ("http://www.example.com/url")
    UrlUtils.stripHashes("http://www.example.com/url#fragment1#fragment2") should be ("http://www.example.com/url")
  }

  "UrlUtils#addHashFragments" should "add fragments to url" in {
    UrlUtils.addHashFragments("http://www.example.com/url",
      Map[String, String](
        "attr1" -> "fragment2",
        "attr2" -> "fragment 1",
        "attr3" -> "Fragment-of-1",
        "attr4" -> "XL"
      ).asJava) should be ("http://www.example.com/url#Fragment2#Fragment+1#Fragment-Of-1#XL")
  }

  "UrlUtils#convertToUrlFragment" should "convert to url fragments" in {
    UrlUtils.convertToUrlFragment("x-large / red") should be ("X-Large+%2F+Red")
  }

  "UrlUtils#get" should "UTF-8 decoded urls to unicode strings" in {
    UrlUtils.getHashFragments("http://www.example.com/url#fragment1#fragment2") should be (List("fragment1", "fragment2"))
    UrlUtils.getHashFragments("http://www.example.com/url") should be (List.empty)
  }

}

Source File: UPCSpec.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.scalatest.{FlatSpec, Matchers}

class UPCSpec extends FlatSpec with Matchers {

  "UPC" should "convert a UPC to a standardized format" in {
    UPC.standardize("63938200039") should be("00639382000393")
    UPC.standardize("99999999623") should be("00999999996237")
    UPC.standardize("89504500098") should be("00895045000982")
    UPC.standardize("934093502349") should be("09340935023493")
    UPC.standardize("841106172217") should be("08411061722176")
    UPC.standardize("810000439") should be("00008100004393")
    UPC.standardize("931177059140") should be("09311770591409")
    UPC.standardize("9311770591409") should be("09311770591409")
    UPC.standardize("27242860940") should be("00027242860940")
    UPC.standardize("75317405253") should be("00753174052534")
    UPC.standardize("-810000439") should be("00008100004393")
    UPC.standardize("810-000-439") should be("00008100004393")

    // Iphone UPCs
    UPC.standardize("885909950652") should be("00885909950652")
    UPC.standardize("715660702866") should be("00715660702866")
  }

  it should "check if the input UPC is valid as checked by UPCItemDB" in {
    UPC.isValid("0420160002247") should be(false)
    UPC.isValid("000000010060") should be(false)
    // the above same UPCs are validated after standardizing them
    UPC.isValid(UPC.standardize("0420160002247")) should be(true)
    UPC.isValid(UPC.standardize("000000010060")) should be(true)
  }

  it should "work correctly for GTIN UPCs by converting it to a valid EAN-13 with padded zeros" in {
    UPC.standardize("10010942220401") should be ("00010942220404")
    UPC.standardize("47111850104013") should be("07111850104015")
    UPC.standardize("40628043604719") should be("00628043604711")
  }


  it should "work correctly for ISBN numbers" in {
    UPC.standardize("978052549832") should be("9780525498322")
    UPC.standardize("9780500517260") should be("9780500517260")
    UPC.standardize("9780316512787") should be("9780316512787")
    UPC.standardize("9780997355932") should be("9780997355932")
  }

  it should "not replace check-digit if UPC already left padded and check-digit and last-digit same" in {
    UPC.standardize("0753174052534") should be("00753174052534")
  }

  it should "fail for an invalid 14-digit UPC (GTIN)" in {
    intercept[IllegalArgumentException] {
      UPC.standardize("47111850104010")
    }
  }

  it should "fail for all zeroes UPC" in {
    intercept[IllegalArgumentException] {
      UPC.standardize("00000000000")
    }
  }

  it should "fail for invalid UPC" in {
    intercept[IllegalArgumentException] {
      UPC.standardize("12345")
    }
  }

  it should "fail for empty or null UPC" in {
    intercept[IllegalArgumentException] {
      UPC.standardize("")
    }
  }

  it should "fail in case UPC is not a number" in {
    intercept[IllegalArgumentException] {
      UPC.standardize("ABCD")
    }
  }
}

Source File: MPNSpec.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.scalatest.{FlatSpec, Matchers}

class MPNSpec extends FlatSpec with Matchers {

  behavior of "MPN"

  it should "check title case" in {
    MPN.isTitleCase("Key Shell") should be(true)
    MPN.isTitleCase("Samsung Galaxy A8") should be(true)
    MPN.isTitleCase("Samsung Galaxy Note 5") should be(true)
    MPN.isTitleCase("Tempered Glass") should be(true)

    MPN.isTitleCase("1442820G1") should be(false)
    MPN.isTitleCase("Macbook") should be(false)
    MPN.isTitleCase("CE 7200") should be(false)
    MPN.isTitleCase("IPHONE") should be(false)
  }

  it should "validate identifier" in {
    MPN.isValidIdentifier(null) should be (false)
    MPN.isValidIdentifier("") should be (false)
    MPN.isValidIdentifier("51") should be (false)
    MPN.isValidIdentifier("  NA   ") should be (false)
    MPN.isValidIdentifier("Does not apply") should be (false)

    MPN.isValidIdentifier("DT.VFGAA.003") should be (true)
    MPN.isValidIdentifier("A55BM-A/USB3") should be (true)
    MPN.isValidIdentifier("cASSP1598345-10") should be (true)
    MPN.isValidIdentifier("016393B119058-Regular-18x30-BE-BK") should be (true)
    MPN.isValidIdentifier("PJS2V") should be (true)
  }

  it should "standardize MPN" in {
    MPN.standardizeMPN(null) should be (None)
    MPN.standardizeMPN("Does not apply") should be (None)
    MPN.standardizeMPN("All Windows %22") should be (None)
    MPN.standardizeMPN("Samsung Galaxy Note 5") should be (None)
    MPN.standardizeMPN("A Square") should be (None)
    MPN.standardizeMPN("{{availableProducts[0].sku}}") should be (None)

    MPN.standardizeMPN("PJS2V") should be (Some("PJS2V"))
    MPN.standardizeMPN("30634190, 30753839, 31253006") should be (Some("30634190"))
    MPN.standardizeMPN("mr16r082gbn1-ck8 ") should be (Some("MR16R082GBN1-CK8"))
    MPN.standardizeMPN("SM-G950FZDAXSA") should be (Some("SM-G950FZDAXSA"))
  }

}

Source File: ISBNSpec.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.scalatest.{FlatSpec, Matchers}

class ISBNSpec extends FlatSpec with Matchers {
  "ISBN" should "create a valid ISBN object for" in {
    ISBN("0-306-40615-2").get.isbn10 should be (Some("0306406152"))
    ISBN("0-306-40615-2").get.isbn should be ("9780306406157")
    ISBN("978-0-306-40615-7").get.isbn should be ("9780306406157")
    ISBN("978-0-306-40615-7").get.isbn10 should be (None)
    ISBN("9971502100").get.isbn10 should be (Some("9971502100"))
    ISBN("9971502100").get.isbn should be ("9789971502102")
    ISBN("960 425 059 0").get.isbn10 should be (Some("9604250590"))
    ISBN("960 425 059 0").get.isbn should be ("9789604250592")
  }

  it should "not create a valid ISBN object for" in {
    ISBN("abcd") should be (None)
    ISBN("123") should be (None)
    ISBN("") should be (None)
    ISBN("  ") should be (None)
    ISBN("-") should be (None)
    ISBN("1234567890") should be (None)
    ISBN("1234567890111") should be (None)
    ISBN("0-306-40615-1") should be (None)
    ISBN("978-0-306-40615-5") should be (None)
  }
}

Source File: TimeEncodingCompatibilityItSpec.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import java.util.TimeZone

import com.github.mjakubowski84.parquet4s.CompatibilityTestCases.TimePrimitives
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

class TimeEncodingCompatibilityItSpec extends
  FlatSpec
    with Matchers
    with BeforeAndAfter
    with SparkHelper {

  private val localTimeZone = TimeZone.getDefault
  private val utcTimeZone = TimeZone.getTimeZone("UTC")
  private lazy val newYearEveEvening = java.sql.Timestamp.valueOf(java.time.LocalDateTime.of(2018, 12, 31, 23, 0, 0))
  private lazy val newYearMidnight = java.sql.Timestamp.valueOf(java.time.LocalDateTime.of(2019, 1, 1, 0, 0, 0))
  private lazy val newYear = java.sql.Date.valueOf(java.time.LocalDate.of(2019, 1, 1))

  override def beforeAll(): Unit = {
    super.beforeAll()
    TimeZone.setDefault(utcTimeZone)
  }

  before {
    clearTemp()
  }

  private def writeWithSpark(data: TimePrimitives): Unit = writeToTemp(Seq(data))
  private def readWithSpark: TimePrimitives = readFromTemp[TimePrimitives].head
  private def writeWithParquet4S(data: TimePrimitives, timeZone: TimeZone): Unit =
    ParquetWriter.writeAndClose(tempPathString, Seq(data), ParquetWriter.Options(timeZone = timeZone))
  private def readWithParquet4S(timeZone: TimeZone): TimePrimitives = {
    val parquetIterable = ParquetReader.read[TimePrimitives](tempPathString, ParquetReader.Options(timeZone = timeZone))
    try {
      parquetIterable.head
    } finally {
      parquetIterable.close()
    }
  }

  "Spark" should "read properly time written with time zone one hour east" in {
    val input = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    writeWithParquet4S(input, TimeZone.getTimeZone("GMT+1"))
    readWithSpark should be(expectedOutput)
  }

  it should "read properly written with time zone one hour west" in {
    val input = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    writeWithParquet4S(input, TimeZone.getTimeZone("GMT-1"))
    readWithSpark should be(expectedOutput)
  }

  "Parquet4S" should "read properly time written with time zone one hour east" in {
    val input = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    writeWithSpark(input)
    readWithParquet4S(TimeZone.getTimeZone("GMT-1")) should be(expectedOutput)
  }

  it should "read properly time written with time zone one hour west" in {
    val input = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    writeWithSpark(input)
    readWithParquet4S(TimeZone.getTimeZone("GMT+1")) should be(expectedOutput)
  }

  override def afterAll(): Unit = {
    TimeZone.setDefault(localTimeZone)
    super.afterAll()
  }

}

Source File: TimeEncodingCompatibilityItSpec.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import java.util.TimeZone

import com.github.mjakubowski84.parquet4s.CompatibilityTestCases.TimePrimitives
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

class TimeEncodingCompatibilityItSpec extends
  FlatSpec
    with Matchers
    with BeforeAndAfter
    with SparkHelper {

  private val localTimeZone = TimeZone.getDefault
  private val utcTimeZone = TimeZone.getTimeZone("UTC")
  private lazy val newYearEveEvening = java.sql.Timestamp.valueOf(java.time.LocalDateTime.of(2018, 12, 31, 23, 0, 0))
  private lazy val newYearMidnight = java.sql.Timestamp.valueOf(java.time.LocalDateTime.of(2019, 1, 1, 0, 0, 0))
  private lazy val newYear = java.sql.Date.valueOf(java.time.LocalDate.of(2019, 1, 1))

  override def beforeAll(): Unit = {
    super.beforeAll()
    TimeZone.setDefault(utcTimeZone)
  }

  before {
    clearTemp()
  }

  private def writeWithSpark(data: TimePrimitives): Unit = writeToTemp(Seq(data))
  private def readWithSpark: TimePrimitives = readFromTemp[TimePrimitives].head
  private def writeWithParquet4S(data: TimePrimitives, timeZone: TimeZone): Unit =
    ParquetWriter.writeAndClose(tempPathString, Seq(data), ParquetWriter.Options(timeZone = timeZone))
  private def readWithParquet4S(timeZone: TimeZone): TimePrimitives = {
    val parquetIterable = ParquetReader.read[TimePrimitives](tempPathString, ParquetReader.Options(timeZone = timeZone))
    try {
      parquetIterable.head
    } finally {
      parquetIterable.close()
    }
  }

  "Spark" should "read properly time written with time zone one hour east" in {
    val input = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    writeWithParquet4S(input, TimeZone.getTimeZone("GMT+1"))
    readWithSpark should be(expectedOutput)
  }

  it should "read properly written with time zone one hour west" in {
    val input = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    writeWithParquet4S(input, TimeZone.getTimeZone("GMT-1"))
    readWithSpark should be(expectedOutput)
  }

  "Parquet4S" should "read properly time written with time zone one hour east" in {
    val input = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    writeWithSpark(input)
    readWithParquet4S(TimeZone.getTimeZone("GMT-1")) should be(expectedOutput)
  }

  it should "read properly time written with time zone one hour west" in {
    val input = TimePrimitives(timestamp = newYearEveEvening, date = newYear)
    val expectedOutput = TimePrimitives(timestamp = newYearMidnight, date = newYear)
    writeWithSpark(input)
    readWithParquet4S(TimeZone.getTimeZone("GMT+1")) should be(expectedOutput)
  }

  override def afterAll(): Unit = {
    TimeZone.setDefault(localTimeZone)
    super.afterAll()
  }

}

Source File: ValueCodecSpec.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import org.scalatest.{FlatSpec, Matchers}


class ValueCodecSpec extends FlatSpec with Matchers {

  case class TestType(i: Int)

  val requiredValueCodec: RequiredValueCodec[TestType] = new RequiredValueCodec[TestType] {
    override protected def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): TestType = value match {
      case IntValue(i) => TestType(i)
    }
    override protected def encodeNonNull(data: TestType, configuration: ValueCodecConfiguration): Value = IntValue(data.i)
  }
  val optionalValueCodec: OptionalValueCodec[TestType] = new OptionalValueCodec[TestType] {
    override protected def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): TestType = value match {
      case IntValue(i) => TestType(i)
    }
    override protected def encodeNonNull(data: TestType, configuration: ValueCodecConfiguration): Value = IntValue(data.i)
  }

  val testType = TestType(42)
  val testValue = IntValue(testType.i)
  val configuration: ValueCodecConfiguration = ValueCodecConfiguration.default

  "Required value codec" should "encode non-null value" in {
    requiredValueCodec.encode(testType, configuration) should be(testValue)
  }

  it should "decode non-null value" in {
    requiredValueCodec.decode(testValue, configuration) should be(testType)
  }

  it should "throw an exception when decoding null-value" in {
    an[IllegalArgumentException] should be thrownBy requiredValueCodec.decode(NullValue, configuration)
  }

  it should "throw an exception when encoding null" in {
    an[IllegalArgumentException] should be thrownBy requiredValueCodec.encode(null.asInstanceOf[TestType], configuration)
  }

  "Optional value codec" should "encode non-null value" in {
    optionalValueCodec.encode(testType, configuration) should be(testValue)
  }

  it should "decode non-null value" in {
    optionalValueCodec.decode(testValue, configuration) should be(testType)
  }

  it should "throw an exception when decoding null-value" in {
    optionalValueCodec.decode(NullValue, configuration) should be(null)
  }

  it should "throw an exception when encoding null" in {
    optionalValueCodec.encode(null.asInstanceOf[TestType], configuration) should be(NullValue)
  }

}

Source File: DecimalsSpec.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import org.scalatest.{FlatSpec, Matchers}

class DecimalsSpec extends FlatSpec with Matchers {

  "Decimals" should "be able to convert decimal to binary and back" in {
    val decimal = BigDecimal(1001, 2)
    val binary = Decimals.binaryFromDecimal(decimal)
    val revertedDecimal = Decimals.decimalFromBinary(binary)

    revertedDecimal should be(decimal)
  }

  it should "be able to convert negative decimal to binary and back" in {
    val decimal = BigDecimal(-1001, 2)
    val binary = Decimals.binaryFromDecimal(decimal)
    val revertedDecimal = Decimals.decimalFromBinary(binary)

    revertedDecimal should be(decimal)
  }

  it should "be able to convert zero decimal to binary and back" in {
    val decimal = BigDecimal(0, 0)
    val binary = Decimals.binaryFromDecimal(decimal)
    val revertedDecimal = Decimals.decimalFromBinary(binary)

    revertedDecimal should be(decimal)
  }

  it should "round decimal with scale greater than Parquet4S uses" in {
    val decimal = BigDecimal(1L, Decimals.Scale + 1, Decimals.MathContext)
    val binary = Decimals.binaryFromDecimal(decimal)
    val revertedDecimal = Decimals.decimalFromBinary(binary)

    revertedDecimal should be(0)
  }

}

Source File: FileCompressionFormatSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package daf.filesystem

import org.scalatest.{ FlatSpec, Matchers }

class FileCompressionFormatSpec extends FlatSpec with Matchers {

  "Uncompressed files" must "be correctly categorized" in {
    FileCompressionFormats.fromName { "path/to/data.json" } should be { FileCompressionFormats.none }
  }

  "lzo compressed files" must "be correctly categorized" in {
    FileCompressionFormats.fromName { "path/to/data.lzo.avro" } should be { FileCompressionFormats.lzo }
    FileCompressionFormats.fromName { "path/to/data.lzo" } should be { FileCompressionFormats.lzo }
    FileCompressionFormats.fromName { "path/to/data.parq.lzo" } should be { FileCompressionFormats.lzo }
  }

  "snappy compressed files" must "be correctly categorized" in {
    FileCompressionFormats.fromName { "path/to/data.snappy.json" } should be { FileCompressionFormats.snappy }
    FileCompressionFormats.fromName { "path/to/data.snappy" } should be { FileCompressionFormats.snappy }
    FileCompressionFormats.fromName { "path/to/data.avro.snappy" } should be { FileCompressionFormats.snappy }
  }

  "gzip data files" must "be correctly categorized" in {
    FileCompressionFormats.fromName { "path/to/data.gzip.parquet" } should be { FileCompressionFormats.gzip }
    FileCompressionFormats.fromName { "path/to/data.gzip" } should be { FileCompressionFormats.gzip }
    FileCompressionFormats.fromName { "path/to/data.json.gzip" } should be { FileCompressionFormats.gzip }
  }

}

Source File: HDFSBase.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package daf.util

import better.files.{ File, _ }
import daf.util.DataFrameClasses.{ Address, Person }
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hdfs.{ HdfsConfiguration, MiniDFSCluster }
import org.apache.hadoop.test.PathUtils
import org.apache.spark.sql.{ SaveMode, SparkSession }
import org.scalatest.{ BeforeAndAfterAll, FlatSpec, Matchers }
import org.slf4j.LoggerFactory

import scala.util.{ Failure, Random, Try }

abstract class HDFSBase extends FlatSpec with Matchers with BeforeAndAfterAll {

  var miniCluster: Try[MiniDFSCluster] = Failure[MiniDFSCluster](new Exception)

  var fileSystem: Try[FileSystem] = Failure[FileSystem](new Exception)

  val sparkSession: SparkSession = SparkSession.builder().master("local").getOrCreate()

  val alogger = LoggerFactory.getLogger(this.getClass)

  val (testDataPath, confPath) = {
    val testDataPath = s"${PathUtils.getTestDir(this.getClass).getCanonicalPath}/MiniCluster"
    val confPath = s"$testDataPath/conf"
    (
      testDataPath.toFile.createIfNotExists(asDirectory = true, createParents = false),
      confPath.toFile.createIfNotExists(asDirectory = true, createParents = false)
    )
  }

  def pathAvro = "opendata/test.avro"
  def pathParquet = "opendata/test.parquet"
  def pathCsv = "opendata/test.csv"

  def getSparkSession = sparkSession

  override def beforeAll(): Unit = {

    val conf = new HdfsConfiguration()
    conf.setBoolean("dfs.permissions", true)
    System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA)

    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath.pathAsString)
    //FileUtil.fullyDelete(testDataPath.toJava)

    conf.set(s"hadoop.proxyuser.${System.getProperties.get("user.name")}.groups", "*")
    conf.set(s"hadoop.proxyuser.${System.getProperties.get("user.name")}.hosts", "*")

    val builder = new MiniDFSCluster.Builder(conf)
    miniCluster = Try(builder.build())
    fileSystem = miniCluster.map(_.getFileSystem)
    fileSystem.foreach(fs => {
      val confFile: File = confPath / "hdfs-site.xml"
      for { os <- confFile.newOutputStream.autoClosed } fs.getConf.writeXml(os)
    })

    writeDf()
  }

  override def afterAll(): Unit = {
    miniCluster.foreach(_.shutdown(true))
    val _ = testDataPath.parent.parent.delete(true)
    sparkSession.stop()
  }

  
  private def writeDf(): Unit = {
    import sparkSession.implicits._

    alogger.info(s"TestDataPath ${testDataPath.toJava.getAbsolutePath}")
    alogger.info(s"ConfPath ${confPath.toJava.getAbsolutePath}")
    val persons = (1 to 10).map(i => Person(s"Andy$i", Random.nextInt(85), Address("Via Ciccio Cappuccio")))
    val caseClassDS = persons.toDS()
    caseClassDS.write.format("parquet").mode(SaveMode.Overwrite).save(pathParquet)
    caseClassDS.write.format("com.databricks.spark.avro").mode(SaveMode.Overwrite).save(pathAvro)
    //writing directly the Person dataframe generates an exception
    caseClassDS.toDF.select("name", "age").write.format("csv").mode(SaveMode.Overwrite).option("header", "true").save(pathCsv)
  }
}

object DataFrameClasses {

  final case class Address(street: String)

  final case class Person(name: String, age: Int, address: Address)
}

Source File: SftpHandlerTest.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package it.gov.daf.ftp

import org.scalatest.{FlatSpec, Matchers}

class SftpHandlerTest extends FlatSpec with Matchers {

  val host = "edge1.platform.daf.gov.it"
  val user = "put your user"
  val pwd = "put your password"

  "A SftpHandler" should "be able to print the working directory" in {
    val client = new SftpHandler(user, pwd, host)
    val res = client.workingDir()
    println(res)
    res shouldBe 'Success

    client.disconnect()
  }

  it should "create a directory from an absolute path" in {
    val path = s"/home/$user/test"
    val client = new SftpHandler(user, pwd, host)
    val res = client.mkdir(path)

    println(res)
    res shouldBe 'Success

    client.rmdir(path)
    client.disconnect()
  }

  it should "create folders recursively from a relative path" in {
    val path = s"/home/$user/test/subtest"
    val client = new SftpHandler(user, pwd, host)
    val res = client.mkdir(path)

    println(res)
    res shouldBe 'Success

    client.rmdir(path)
    client.disconnect()
  }

  it should "create a directory from a relative path" in {
    val path = "test"
    val client = new SftpHandler(user, pwd, host)
    val res = client.mkdir(path)

    println(res)
    res shouldBe 'Success

    client.rmdir(path)
    client.disconnect()
  }

}

Source File: EventToKuduEventSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package it.teamdigitale.events

import it.gov.daf.iotingestion.event.Event
import it.teamdigitale.EventModel.EventToKuduEvent
import it.teamdigitale.EventModel.EventToStorableEvent
import org.scalatest.{FlatSpec, Matchers}

import scala.util.{Success, Try}

class EventToKuduEventSpec extends FlatSpec with Matchers{

  val metrics = Range(0,100).map(x => Success( Event(
    version = 1L,
    id = x + "metric",
    ts = System.currentTimeMillis(),
    event_type_id = 0,
    location = "41.1260529:16.8692905",
    source = "http://domain/sensor/url",
    body = Option("""{"rowdata": "this json should contain row data"}""".getBytes()),
    event_subtype_id = Some("SPEED_Via_Cernaia_TO"),
    attributes = Map(
      "value" -> x.toString)
  )))

  // this metric doesn't have any value
  val wrongMetric = Success( Event(
    version = 1L,
    id = "wrongmetric1",
    ts = System.currentTimeMillis(),
    event_type_id = 0,
    location = "41.1260529:16.8692905",
    source = "http://domain/sensor/url",
    body = Option("""{"rowdata": "this json should contain row data"}""".getBytes()),
    event_annotation = Some(s"This is a free text for a wrong metric"),
    event_subtype_id = Some("SPEED_Via_Cernaia_TO"),
    attributes = Map()
  ))

  // this metric doesn't have a correct value
  val wrongMetric2 = Success( Event(
    version = 1L,
    id = "wrongmetric2",
    ts = System.currentTimeMillis(),
    event_type_id = 0,
    location = "41.1260529:16.8692905",
    source = "http://domain/sensor/url",
    body = Option("""{"rowdata": "this json should contain row data"}""".getBytes()),
    event_annotation = Some(s"This is a free text ©"),
    event_subtype_id = Some("SPEED_Via_Cernaia_TO"),
    attributes = Map(
      "value" -> "wrongValue"
    )
  ))

  // this metric doesn't have the metric id
  val wrongMetric3 = Success( Event(
    version = 1L,
    id = "wrongmetric3",
    ts = System.currentTimeMillis(),
    event_type_id = 2,
    location = "41.1260529:16.8692905",
    source = "http://domain/sensor/url",
    body = Option("""{"rowdata": "this json should contain row data"}""".getBytes()),
    event_annotation = Some(s"This is a free text for a wrong metric"),
    attributes = Map(
      "value" -> "100"
    )
  ))


  "Correct events" should "be converted" in {
    val res = metrics.map(event => EventToStorableEvent(event)).flatMap(_.toOption).map(event => EventToKuduEvent(event)).filter(_.isSuccess)
    res.length shouldBe 100
    res.head.get.metric shouldBe 0D
    res.head.get.metric_id shouldBe "SPEED_Via_Cernaia_TO"
  }

  "Wrong events" should "be filtered" in {
    val seq = metrics ++ List(wrongMetric, wrongMetric2, wrongMetric3)

    val res = seq.map(event => EventToStorableEvent(event)).flatMap(_.toOption).map(event => EventToKuduEvent(event)).filter(_.isSuccess)
    res.length shouldBe 100
  }
}

Source File: KuduMiniCluster.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package it.teamdigitale.miniclusters

import org.apache.kudu.client.{KuduClient, MiniKuduCluster}
import org.apache.kudu.client.KuduClient.KuduClientBuilder
import org.apache.kudu.client.MiniKuduCluster.MiniKuduClusterBuilder
import org.apache.kudu.spark.kudu.KuduContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
import org.apache.logging.log4j.LogManager

class KuduMiniCluster extends AutoCloseable {
  val alogger = LogManager.getLogger(this.getClass)

  var kuduMiniCluster: MiniKuduCluster = _
  var kuduClient: KuduClient = _
  var kuduContext: KuduContext = _
  var sparkSession = SparkSession.builder().appName(s"test-${System.currentTimeMillis()}").master("local[*]").getOrCreate()

  def start() {
    alogger.info("Starting KUDU mini cluster")

    System.setProperty(
      "binDir",
      s"${System.getProperty("user.dir")}/src/test/kudu_executables/${sun.awt.OSInfo.getOSType().toString.toLowerCase}"
    )


    kuduMiniCluster = new MiniKuduClusterBuilder()
      .numMasters(1)
      .numTservers(3)
      .build()

    val envMap = Map[String, String](("Xmx", "512m"))

    kuduClient = new KuduClientBuilder(kuduMiniCluster.getMasterAddresses).build()
    assert(kuduMiniCluster.waitForTabletServers(1))

    kuduContext = new KuduContext(kuduMiniCluster.getMasterAddresses, sparkSession.sparkContext)

  }

  override def close() {
    alogger.info("Ending KUDU mini cluster")
    kuduClient.shutdown()
    kuduMiniCluster.shutdown()
    sparkSession.close()
  }
}

  object KuduMiniCluster {

    def main(args: Array[String]): Unit = {

      try {
        val kudu = new KuduMiniCluster()
        kudu.start()

        println(s"MASTER KUDU ${kudu.kuduMiniCluster.getMasterAddresses}")
        while(true){

        }
      }
    }


  }

Source File: KuduEventsHandlerSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package it.teamdigitale.storage

import java.io.File
import java.util.concurrent.TimeUnit

import org.apache.kudu.spark.kudu._
import it.teamdigitale.miniclusters.KuduMiniCluster
import it.teamdigitale.config.IotIngestionManagerConfig.KuduConfig
import it.teamdigitale.managers.IotIngestionManager
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
import it.gov.daf.iotingestion.event.Event
import it.teamdigitale.EventModel.{EventToKuduEvent, EventToStorableEvent}
import org.apache.logging.log4j.LogManager

import scala.util.{Failure, Success, Try}

class KuduEventsHandlerSpec extends FlatSpec with Matchers with BeforeAndAfterAll {

  val logger = LogManager.getLogger(this.getClass)
  val kuduCluster = new KuduMiniCluster()

  val metrics: Seq[Try[Event]] = Range(0,100).map(x => Success( Event(
    version = 1L,
    id = x + "metric",
    ts = System.currentTimeMillis() + x ,
    event_type_id = 0,
    location = "41.1260529:16.8692905",
    source = "http://domain/sensor/url",
    body = Option("""{"rowdata": "this json should contain row data"}""".getBytes()),
    event_subtype_id = Some("Via Cernaia(TO)"),
    attributes = Map("value" -> x.toString)
  )))

  val rdd = kuduCluster.sparkSession.sparkContext.parallelize(metrics)


  "KuduEventsHandler" should "store correctly data" in {

   val metricsRDD = rdd
      .map(event => EventToStorableEvent(event))
      .flatMap(e => e.toOption)
      .map(se => EventToKuduEvent(se)).flatMap(e => e.toOption)

    val metricsDF = kuduCluster.sparkSession.createDataFrame(metricsRDD)

    val kuduConfig = KuduConfig(kuduCluster.kuduMiniCluster.getMasterAddresses, "TestEvents", 2)

    KuduEventsHandler.getOrCreateTable(kuduCluster.kuduContext, kuduConfig)
    KuduEventsHandler.write(metricsDF, kuduCluster.kuduContext, kuduConfig)

    val df = kuduCluster.sparkSession.sqlContext
      .read
      .options(Map("kudu.master" -> kuduConfig.masterAdresses,"kudu.table" -> kuduConfig.eventsTableName))
      .kudu

    df.count shouldBe 100

  }

  "KuduEventsHandler" should "handle redundant data" in {

    val metricsRDD = rdd
      .map(event => EventToStorableEvent(event))
      .flatMap(e => e.toOption)
      .map(se => EventToKuduEvent(se))
      .flatMap(e => e.toOption)

    val metricsDF = kuduCluster.sparkSession.createDataFrame(metricsRDD)

    val kuduConfig = KuduConfig(kuduCluster.kuduMiniCluster.getMasterAddresses, "TestEventsDuplicate", 2)
    KuduEventsHandler.getOrCreateTable(kuduCluster.kuduContext, kuduConfig)

    KuduEventsHandler.write(metricsDF, kuduCluster.kuduContext, kuduConfig)
    KuduEventsHandler.write(metricsDF, kuduCluster.kuduContext, kuduConfig)

    val df = kuduCluster.sparkSession.sqlContext
      .read
      .options(Map("kudu.master" -> kuduConfig.masterAdresses,"kudu.table" -> kuduConfig.eventsTableName))
      .kudu

    df.count shouldBe 100

  }

  override def beforeAll() {
    kuduCluster.start()
  }

  override def afterAll() {
    kuduCluster.start()
  }

}

Source File: GranularBigMatrixSpec.scala From glint with MIT License

5 votes

package glint.matrix

import glint.SystemTest
import glint.models.client.granular.GranularBigMatrix
import org.scalatest.{FlatSpec, Matchers}


class GranularBigMatrixSpec extends FlatSpec with SystemTest with Matchers {

  "A GranularBigMatrix" should "handle large push/pull requests" in withMaster { _ =>
    withServers(2) { _ =>
      withClient { client =>
        val model = client.matrix[Double](1000, 1000)
        val granularModel = new GranularBigMatrix[Double](model, 10000)
        val rows = new Array[Long](1000000)
        val cols = new Array[Int](1000000)
        val values = new Array[Double](1000000)
        var i = 0
        while (i < rows.length) {
          rows(i) = i % 1000
          cols(i) = i / 1000
          values(i) = i * 3.14
          i += 1
        }

        whenReady(granularModel.push(rows, cols, values)) {
          identity
        }
        val result = whenReady(granularModel.pull(rows, cols)) {
          identity
        }

        result shouldEqual values
      }
    }
  }

  it should " handle large pull requests for rows"  in withMaster { _ =>
    withServers(3) { _ =>
      withClient { client =>
        val model = client.matrix[Double](1000, 1000)
        val granularModel = new GranularBigMatrix[Double](model, 10000)
        val rows = new Array[Long](1000000)
        val cols = new Array[Int](1000000)
        val values = new Array[Double](1000000)
        var i = 0
        while (i < rows.length) {
          rows(i) = i % 1000
          cols(i) = i / 1000
          values(i) = i * 3.14
          i += 1
        }

        whenReady(granularModel.push(rows, cols, values)) {
          identity
        }
        val result = whenReady(granularModel.pull((0L until 1000L).toArray)) {
          identity
        }

        i = 0
        while (i < rows.length) {
          assert(result(i % 1000)(i / 1000) == values(i))
          i += 1
        }
      }
    }
  }

  it should "have non-zero max message size" in withMaster { _ =>
    withServers(2) { _ =>
      withClient { client =>
        val model = client.matrix[Double](10, 10)
        intercept[IllegalArgumentException] {
          new GranularBigMatrix(model, 0)
        }
        intercept[IllegalArgumentException] {
          new GranularBigMatrix(model, -1)
        }
      }
    }
  }

}

Source File: GranularBigVectorSpec.scala From glint with MIT License

5 votes

package glint.vector

import scala.util.Random

import glint.SystemTest
import glint.models.client.granular.GranularBigVector
import org.scalatest.{FlatSpec, Matchers}


class GranularBigVectorSpec extends FlatSpec with SystemTest with Matchers {

  "A GranularBigVector" should "handle large push/pull requests" in withMaster { _ =>
    withServers(2) { _ =>
      withClient { client =>
        val size = 1e6.toInt
        val rng = new Random()
        rng.setSeed(42)
        val model = client.vector[Double](size)
        val granularModel = new GranularBigVector(model, 1000)
        val keys = (0 until size).map(_.toLong).toArray
        val values = Array.fill(size) { rng.nextDouble() }

        whenReady(granularModel.push(keys, values)) {
          identity
        }
        val result = whenReady(granularModel.pull(keys)) {
          identity
        }

        result shouldEqual values
      }
    }
  }

  it should "have non-zero max message size" in withMaster { _ =>
    withServers(2) { _ =>
      withClient { client =>
        val model = client.vector[Double](10)
        intercept[IllegalArgumentException] {
          val granularModel = new GranularBigVector(model, 0)
        }
        intercept[IllegalArgumentException] {
          val granularModel = new GranularBigVector(model, -1)
        }
      }
    }
  }

}

Source File: ColumnIteratorSpec.scala From glint with MIT License

5 votes

package glint.iterators

import akka.util.Timeout
import glint.SystemTest
import glint.mocking.MockBigMatrix
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.ExecutionContext
import scala.concurrent.duration._


class ColumnIteratorSpec extends FlatSpec with SystemTest with Matchers {

  "A ColumnIterator" should "iterate over all columns in order" in {

    // Construct mock matrix and data to push into it
    val nrOfRows = 2
    val nrOfCols = 4
    val mockMatrix = new MockBigMatrix[Long](nrOfRows, nrOfCols, 0, _ + _)

    val rows   = Array(0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L)
    val cols   = Array( 0,  0,  1,  1,  2,  2,  3,  3)
    val values = Array(0L,  1,  2,  3,  4,  5,  6,  7)

    whenReady(mockMatrix.push(rows, cols, values)) { identity }

    // Check whether elements are in order
    var counter = 0
    val iterator = new ColumnIterator[Long](mockMatrix)
    iterator.foreach {
      case column => column.foreach {
        case value =>
          assert(value == counter)
          counter += 1
      }
    }

  }

  it should "iterate over all columns in order with larger rows" in {

    // Construct mock matrix and data to push into it
    val nrOfRows = 4
    val nrOfCols = 2
    val mockMatrix = new MockBigMatrix[Long](nrOfRows, nrOfCols, 0, _ + _)

    val rows   = Array(0L, 1L, 2L, 3L, 0L, 1L, 2L, 3L)
    val cols   = Array( 0,  0,  0,  0,  1,  1,  1,  1)
    val values = Array(0L,  1,  2,  3,  4,  5,  6,  7)

    whenReady(mockMatrix.push(rows, cols, values)) { identity }

    // Check whether elements are in order
    var counter = 0
    val iterator = new ColumnIterator[Long](mockMatrix)
    iterator.foreach {
      case column => column.foreach {
        case value =>
          assert(value == counter)
          counter += 1
      }
    }

  }

  it should "not iterate over an empty matrix" in {
    val mockMatrix = new MockBigMatrix[Double](0, 2, 0, _ + _)

    val iterator = new ColumnIterator[Double](mockMatrix)
    assert(!iterator.hasNext)
    iterator.foreach {
      case _ => fail("This should never execute")
    }

  }

}

Source File: RowBlockIteratorSpec.scala From glint with MIT License

5 votes

package glint.iterators

import akka.util.Timeout
import glint.SystemTest
import glint.mocking.MockBigMatrix
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.ExecutionContext
import scala.concurrent.duration._


class RowBlockIteratorSpec extends FlatSpec with SystemTest with Matchers {

  "A RowBlockIterator" should "iterate over all blocks of rows in order" in {

    // Construct mock matrix and data to push into it
    val nrOfRows = 5
    val nrOfCols = 2
    val mockMatrix = new MockBigMatrix[Long](nrOfRows, nrOfCols, 0, _ + _)

    val rows   = Array(0L, 0L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L)
    val cols   = Array( 0,  1,  0,  1,  0,  1,  0,  1,  0,  1)
    val values = Array(0L,  1,  2,  3,  4,  5,  6,  7,  8,  9)

    whenReady(mockMatrix.push(rows, cols, values)) { identity }

    // Check whether elements are in order
    var counter = 0
    val iterator = new RowBlockIterator[Long](mockMatrix, 2)
    iterator.foreach {
      case rows => rows.foreach {
        case row => row.foreach {
          case value =>
            assert(value == counter)
            counter += 1
        }
      }
    }

  }

  it should "iterate over a single block" in {

    // Construct mock matrix and data to push into it
    val nrOfRows = 5
    val nrOfCols = 2
    val mockMatrix = new MockBigMatrix[Long](nrOfRows, nrOfCols, 0, _ + _)

    val rows = Array(0L, 0L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L)
    val cols = Array(0, 1, 0, 1, 0, 1, 0, 1, 0, 1)
    val values = Array(0L, 1, 2, 3, 4, 5, 6, 7, 8, 9)

    whenReady(mockMatrix.push(rows, cols, values)) {
      identity
    }

    // Check whether elements are in order
    var counter = 0
    val iterator = new RowBlockIterator[Long](mockMatrix, 7)
    val resultRows = iterator.next()
    assert(!iterator.hasNext)
    resultRows.foreach {
      case row => row.foreach {
        case value =>
          assert(value == counter)
          counter += 1
      }
    }

  }

  it should "not iterate over an empty matrix" in {
    val mockMatrix = new MockBigMatrix[Double](0, 5, 0, _ + _)

    val iterator = new RowBlockIterator[Double](mockMatrix, 3)
    assert(!iterator.hasNext)
    iterator.foreach {
      case _ => fail("This should never execute")
    }

  }

}

Source File: RowIteratorSpec.scala From glint with MIT License

5 votes

package glint.iterators

import akka.util.Timeout
import glint.SystemTest
import glint.mocking.MockBigMatrix
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.ExecutionContext
import scala.concurrent.duration._


class RowIteratorSpec extends FlatSpec with SystemTest with Matchers {

  "A RowIterator" should "iterate over all rows in order" in {

    // Construct mock matrix and data to push into it
    val nrOfRows = 5
    val nrOfCols = 2
    val mockMatrix = new MockBigMatrix[Long](nrOfRows, nrOfCols, 0, _ + _)

    val rows   = Array(0L, 0L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L)
    val cols   = Array( 0,  1,  0,  1,  0,  1,  0,  1,  0,  1)
    val values = Array(0L,  1,  2,  3,  4,  5,  6,  7,  8,  9)

    whenReady(mockMatrix.push(rows, cols, values)) { identity }

    // Check whether elements are in order
    var counter = 0
    val iterator = new RowIterator[Long](mockMatrix, 2)
    iterator.foreach {
      case row => row.foreach {
        case value =>
          assert(value == counter)
          counter += 1
      }
    }

  }

  it should "iterate over a single block" in {

    // Construct mock matrix and data to push into it
    val nrOfRows = 5
    val nrOfCols = 2
    val mockMatrix = new MockBigMatrix[Long](nrOfRows, nrOfCols, 0, _ + _)

    val rows = Array(0L, 0L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L)
    val cols = Array(0, 1, 0, 1, 0, 1, 0, 1, 0, 1)
    val values = Array(0L, 1, 2, 3, 4, 5, 6, 7, 8, 9)

    whenReady(mockMatrix.push(rows, cols, values)) {
      identity
    }

    // Check whether elements are in order
    var counter = 0
    val iterator = new RowIterator[Long](mockMatrix, 7)
    iterator.foreach {
      case row => row.foreach {
        case value =>
          assert(value == counter)
          counter += 1
      }
    }
    assert(!iterator.hasNext)

  }

  it should "not iterate over an empty matrix" in {
    val mockMatrix = new MockBigMatrix[Double](3, 0, 0, _ + _)

    val iterator = new RowBlockIterator[Double](mockMatrix, 3)
    assert(!iterator.hasNext)
    iterator.foreach {
      case _ => fail("This should never execute")
    }

  }

}

Source File: Test_Flat_03_ScalaTest.scala From LearningScala with Apache License 2.0

5 votes

package _100_assertions_and_tests

// BDD
import _100_assertions_and_tests._03_ScalaTest._
import org.scalatest.{FlatSpec, Matchers}


class Test_Flat_03_ScalaTest extends FlatSpec with Matchers {

  "The account" should "approve deposit of a positive amount" in {
    val currentBalance = balance
    val amount = 20
    deposit(amount)
    balance should be(currentBalance + amount)
  }

  it should "approve withdraw of positive amount if amount is sufficient" in {
    balance = 20
    val currentBalance = balance
    val amount = 10
    withdraw(amount)
    balance should be(currentBalance - amount)
  }

  it must "decline negative amount in withdraw" in {
    balance = 20
    val currentBalance = balance
    val amount = -20
    an[Error] must be thrownBy {
      withdraw(amount)
      balance = currentBalance - amount
    }
  }

  it must "decline withdraw if amount > balance" in {
    balance = 20
    val currentBalance = balance
    val amount = 100
    an[Error] must be thrownBy {
      withdraw(amount)
      balance = currentBalance - amount
    }
  }
}

Source File: ModelSpec.scala From scala-spark-cab-rides-predictions with MIT License

5 votes

import MockedResponse._
import com.lyft.networking.apiObjects.CostEstimate
import com.uber.sdk.rides.client.model.PriceEstimate
import models._
import org.scalatest.{FlatSpec, Matchers}

class ModelSpec extends FlatSpec with Matchers {
  behavior of "Models"
  "UberPriceModel" should "return CabPrice" in {
    import com.google.gson.Gson
    val g = new Gson

    val priceEstimate = g.fromJson(priceEstimateJson, classOf[PriceEstimate])
    val source = Location("source_test", 45f, 34f)
    val destination = Location("destination_test", 55f, 34f)

    UberPriceModel(priceEstimate, source, destination) should matchPattern {
      case CabPrice("Uber", "a1111c8c-c720-46c3-8534-2fcdd730040d", "uberX", _, Some(6.17f), 1.0, _, "source_test", "destination_test", _) =>
    }
  }

  "LyftPriceModel" should "return CabPrice" in {
    import com.google.gson.Gson

    val g = new Gson

    val costEstimate = g.fromJson(costEstimateJson, classOf[CostEstimate])
    val source = Location("source_test", 45f, 34f)
    val destination = Location("destination_test", 55f, 34f)

    LyftPriceModel(costEstimate, source, destination) should matchPattern {
      case CabPrice("Lyft", "lyft_plus", "Lyft Plus", _, Some(3.29f), 1.25, _, "source_test", "destination_test", _) =>
    }
  }

  "WeatherModel" should "return Weather" in {
    val location = Location("source_test", 45f, 34f)
    WeatherModel(weatherResponse, location) should matchPattern {
      case Weather("source_test", Some(40.38f), Some(0.64f), Some(995.66f), Some(0.83f), None, None, Some(11.05f), 1543448090) =>
    }
  }
}

Source File: LocationSpec.scala From scala-spark-cab-rides-predictions with MIT License

5 votes

import models.{Location, LocationRepository}
import org.scalatest.{FlatSpec, Matchers}

import scala.collection.Seq

class LocationSpec extends FlatSpec with Matchers {
  behavior of "Location"

  "repository locations" should " return Seq[Location]" in {
    val locations = LocationRepository.getLocations
    locations should matchPattern {
      case _: Seq[Location] =>
    }
  }

  "random pairing of locations" should "return Seq oftype of locations" in {
    val locationsTuples = LocationRepository.getPairedLocations
    locationsTuples should matchPattern {
      case _: Seq[(Location, Location)] =>
    }
  }


}

Source File: TemplateRenderingSpec.scala From avoin-voitto with MIT License

5 votes

package liigavoitto.journalist.utils

import liigavoitto.util.Logging
import org.scalatest.{FlatSpec, Matchers}

class TemplateRenderingSpec extends FlatSpec with Matchers with Logging with WeightedRandomizer {

  val sampleSize = 1000000
  val maxDeviationThreshold = 0.1

  "TemplateRendering" should "render a template without variables and preserve weight" in {
    val template = Template("This is a template.")
    val result = TemplateRendering.render(template, Map())
    result shouldBe Some(RenderedTemplate(template.template, template.weight))
  }

  it should "render a template with a variable and a custom weight" in {
    val template = Template("This is a {{template}}.", 0.5)
    val attr = Map("template" -> "text")
    val result = TemplateRendering.render(template, attr)
    result shouldBe Some(RenderedTemplate("This is a text.", 0.5))
  }

  "WeightedRandomizer" should "always pick from a list of 1" in {
    val templateList = List(RenderedTemplate("test", 1.0))
    val result = weightedRandom(templateList)
    result shouldBe Some("test")
  }

  it should "return None with an empty list" in {
    weightedRandom(List()) shouldBe None
  }

  it should "return equal amounts with default weights" in {
    def getRandom = {
      weightedRandom(List(
        RenderedTemplate("first", 1.0),
        RenderedTemplate("second", 1.0),
        RenderedTemplate("third", 1.0)
      ))
    }

    val results = (1 to sampleSize).flatMap(_ => getRandom)
    val expected = sampleSize / 3

    results.count(t => t == "first") should beCloseToExpected(expected)
    results.count(t => t == "second") should beCloseToExpected(expected)
    results.count(t => t == "third") should beCloseToExpected(expected)
  }

  it should "return different amounts with weighted templates" in {
    def getRandom = {
      weightedRandom(List(
        RenderedTemplate("first", 1.0),
        RenderedTemplate("second", 0.5)
      ))
    }

    val results = (1 to sampleSize).flatMap(_ => getRandom)
    val expected = sampleSize / 3
    results.count(t => t == "second") should beCloseToExpected(expected)
  }

  it should "work even if the templates are not sorted by weight" in {
    def getRandom = {
      weightedRandom(List(
        RenderedTemplate("first", 1.0),
        RenderedTemplate("second", 0.5),
        RenderedTemplate("third", 1.0),
        RenderedTemplate("fourth", 0.5),
        RenderedTemplate("fifth", 1.0)
      ))
    }

    val results = (1 to sampleSize).flatMap(_ => getRandom)

    // each 1.0 weight template should have 1/4 of the results
    val expected = sampleSize / 4
    results.count(t => t == "first") should beCloseToExpected(expected)
    results.count(t => t == "third") should beCloseToExpected(expected)
    results.count(t => t == "fifth") should beCloseToExpected(expected)

    // second and fourth should have 1/4 combined
    results.count(t => t == "second" || t == "fourth") should beCloseToExpected(expected)
  }

  def beCloseToExpected(expected: Double) = {
    val lowerBound = (expected - (expected * maxDeviationThreshold)).toInt
    val higherBound = (expected + (expected * maxDeviationThreshold)).toInt
    be >= lowerBound and be <= higherBound
  }
}

Source File: MustacheSpec.scala From avoin-voitto with MIT License

5 votes

package robottijournalismi.service.journalist.utils

import liigavoitto.journalist.utils.Mustache
import liigavoitto.journalist.utils.Mustache.ValueNotFoundException
import org.scalatest.{ FlatSpec, Matchers }

class MustacheSpec extends FlatSpec with Matchers {

  "Mustache" should "fail rendering when value is not found" in {
    val ctx = Map("something" -> "yes")
    assertThrows[ValueNotFoundException] {
      Mustache("testing {{something}}, {{eiole}}").apply(ctx)
    }
  }

  it should "render a tag with ':' in it" in {
    val ctx = Map("winner:n" -> "voittajan")
    Mustache("Löysin {{winner:n}}.").apply(ctx) shouldEqual "Löysin voittajan."
  }

}

Source File: TemplateUtilsSpec.scala From avoin-voitto with MIT License

5 votes

package liigavoitto.journalist.utils

import liigavoitto.journalist.MockData
import liigavoitto.scores._
import org.scalatest.{FlatSpec, Matchers}

class TemplateUtilsSpec extends FlatSpec with Matchers with TemplateUtils with MockData {

  "TemplateUtilsSpec" should "return correct numeral declensions" in {
    val numberFi = numeralWithDeclensions("number6", 6, "fi")
    val numberSv = numeralWithDeclensions("number6", 6, "sv")

    numberFi("number6:nominative") shouldBe "kuusi"
    numberFi("number6:ordinalGenitive") shouldBe "kuudennen"

    numberSv("number6:nominative") shouldBe "sex"
  }

  it should "return correct declension for name" in {
    val feedPlayerDeclensions = feedPlayerWithDeclensions("testPlayer", FeedPlayer("jkl-29837610", PlayerName("Teemu", "Väyrynen"), None))
    feedPlayerDeclensions("testPlayer") shouldBe "Teemu Väyrynen"
    feedPlayerDeclensions("testPlayer:genitive") shouldBe "Teemu Väyrysen"
    feedPlayerDeclensions("testPlayer:ablative") shouldBe "Teemu Väyryseltä"
    feedPlayerDeclensions("testPlayer:allative") shouldBe "Teemu Väyryselle"
    feedPlayerDeclensions("testPlayer:adessive") shouldBe "Teemu Väyrysellä"
    feedPlayerDeclensions("testPlayer.last") shouldBe "Väyrynen"
    feedPlayerDeclensions("testPlayer.last:genitive") shouldBe "Väyrysen"
    feedPlayerDeclensions("testPlayer.last:ablative") shouldBe "Väyryseltä"
    feedPlayerDeclensions("testPlayer.last:allative") shouldBe "Väyryselle"
    feedPlayerDeclensions("testPlayer.last:adessive") shouldBe "Väyrysellä"
  }

  it should "return correct declension for team" in {
    val teamMeta = Meta(List[Image](), None)
    val teamDeclension = teamWithDeclensions("testTeam", Team("jkl-624554857", "Lukko", "Lukko", teamMeta, None, List[Player]()), "fi")
    teamDeclension("testTeam:genitive") shouldBe "Lukon"
    teamDeclension("testTeam:accusative") shouldBe "Lukon"
    teamDeclension("testTeam:allative") shouldBe "Lukolle"
    teamDeclension("testTeam:elative") shouldBe "Lukosta"
    teamDeclension("testTeam:partitive") shouldBe "Lukkoa"
  }
}

Source File: TemplateLoaderSpec.scala From avoin-voitto with MIT License

5 votes

package liigavoitto.journalist.utils

import org.scalatest.{FlatSpec, Matchers}

class TemplateLoaderSpec extends FlatSpec with Matchers with TemplateLoader {

  val testData = load("template/loader-test.edn", "score-over-three-goals", "fi")
  "TemplateLoader" should "load templates from an edn file" in {
    testData.length shouldBe 2
    testData.head shouldBe Template("{{bestPlayer}} tykitti {{bestPlayer.goals:text}} maalia")
  }
  
  it should "parse template weight if set" in {
    testData(1).weight shouldBe 0.5
  }
}

Source File: ScoresFromFilesFetcherSpec.scala From avoin-voitto with MIT License

5 votes

package liigavoitto.fetch

import org.scalatest.{FlatSpec, Matchers}

class ScoresFromFilesFetcherSpec extends FlatSpec with Matchers {

  "ScoresFromFilesFetcher" should "fetch match data from files" in {
    val matchId = "jkl-0-2018-3814"
    val fetcher = new ScoresFromFilesFetcher(matchId)
    val data = fetcher.getEnrichedMatchData

    data.mtch.id shouldEqual matchId
    data.seriesId shouldEqual "mestis"
    data.allAwayTeamMatches.length shouldEqual 38
    data.allHomeTeamMatches.length shouldEqual 36
    data.playerStats.length shouldEqual 466
    data.leagueTable.length shouldEqual 12
  }

  it should "filter matches in match list in older games" in {
    val matchId = "jkl-0-2018-3748"
    val fetcher = new ScoresFromFilesFetcher(matchId)
    val data = fetcher.getEnrichedMatchData

    data.mtch.id shouldEqual matchId
    data.seriesId shouldEqual "mestis"
    data.allAwayTeamMatches.length shouldEqual 17
    data.allHomeTeamMatches.length shouldEqual 18
  }
}

Source File: ServerSpec.scala From seals with Apache License 2.0

5 votes

package com.example.server

import java.util.concurrent.Executors

import scala.concurrent.ExecutionContext

import cats.effect.{ IO, Blocker, ContextShift }

import org.scalatest.{ FlatSpec, Matchers, BeforeAndAfterAll }

import fs2.{ Stream, Chunk }

import scodec.bits._
import scodec.Codec

import dev.tauri.seals.scodec.Codecs._

import com.example.proto._

class ServerSpec extends FlatSpec with Matchers with BeforeAndAfterAll {

  implicit val cs: ContextShift[IO] = IO.contextShift(ExecutionContext.global)

  val ex = Executors.newCachedThreadPool()
  val ec = ExecutionContext.fromExecutor(ex)
  val bl = Blocker.liftExecutionContext(ec)
  val (sg, closeSg) = fs2.io.tcp.SocketGroup[IO](bl).allocated.unsafeRunSync()

  override def afterAll(): Unit = {
    super.afterAll()
    closeSg.unsafeRunSync()
    ex.shutdown()
  }

  "Server" should "respond to a request" in {
    val responses: Vector[Response] = Stream(
      Server.serve(Server.port, sg).drain,
      client(Server.port)
    ).parJoin(Int.MaxValue).take(1L).compile.toVector.unsafeRunSync()
    responses should === (Vector(Ok))
  }

  def client(port: Int): Stream[IO, Response] = {
    Stream.resource(sg.client[IO](Server.addr(port))).flatMap { socket =>
      val bvs: Stream[IO, BitVector] = Stream(Codec[Request].encode(ReSeed(56)).require)
      val bs: Stream[IO, Byte] = bvs.flatMap { bv =>
        Stream.chunk(Chunk.bytes(bv.bytes.toArray))
      }
      val read = bs.through(socket.writes(Server.timeout)).drain.onFinalize(socket.endOfOutput) ++
        socket.reads(Server.bufferSize, Server.timeout).chunks.map(ch => BitVector.view(ch.toArray))
      read.fold(BitVector.empty)(_ ++ _).map(bv => Codec[Response].decode(bv).require.value)
    }
  }
}

Source File: ExceptionSerializerSpec.scala From reliable-http-client with Apache License 2.0

5 votes

package rhttpc.transport.json4s

import org.json4s.{DefaultFormats, TypeHints}
import org.scalatest.{FlatSpec, Matchers, TryValues}

class ExceptionSerializerSpec extends FlatSpec with Matchers with TryValues {

  it should "round-trip serialize case class exception" in {
    roundTrip(CaseClassException(123))
  }

  it should "round-trip serialize exception with message" in {
    roundTrip(new ExceptionWithMessage("foo"))
  }

  it should "round-trip serialize exception with null message" in {
    roundTrip(new ExceptionWithMessage(null))
  }

  it should "round-trip serialize exception with message and cause" in {
    roundTrip(new ExceptionWithMessageAndCause("foo", CaseClassException(123)))
  }

  private def roundTrip(ex: Throwable): Unit = {
    implicit val formats = new DefaultFormats {
      override val typeHints: TypeHints = AllTypeHints
    } + ExceptionSerializer
    val serializer = new Json4sSerializer[Throwable]()
    val deserializer = new Json4sDeserializer[Throwable]()
    val serialized = serializer.serialize(ex)
    val deserialized = deserializer.deserialize(serialized)
    deserialized.success.value shouldEqual ex
  }

}

case class CaseClassException(x: Int) extends Exception(s"x: $x")

class ExceptionWithMessage(msg: String) extends Exception(msg) {
  def canEqual(other: Any): Boolean = other.isInstanceOf[ExceptionWithMessage]

  override def equals(other: Any): Boolean = other match {
    case that: ExceptionWithMessage =>
      (that canEqual this) &&
        getMessage == that.getMessage
    case _ => false
  }

  override def hashCode(): Int = {
    val state = Seq(getMessage)
    state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
  }
}

class ExceptionWithMessageAndCause(msg: String, cause: Throwable) extends Exception(msg, cause) {
  def canEqual(other: Any): Boolean = other.isInstanceOf[ExceptionWithMessageAndCause]

  override def equals(other: Any): Boolean = other match {
    case that: ExceptionWithMessageAndCause =>
      (that canEqual this) &&
        getMessage == that.getMessage &&
        getCause == that.getCause
    case _ => false
  }

  override def hashCode(): Int = {
    val state = Seq(getMessage, getCause)
    state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
  }
}

Source File: ConfigParserSpec.scala From reliable-http-client with Apache License 2.0

5 votes

package rhttpc.client.config

import com._
import org.scalatest.{FlatSpec, Matchers}
import rhttpc.client.proxy.{BackoffRetry, HandleAll}

import scala.concurrent.duration._

class ConfigParserSpec extends FlatSpec with Matchers {

  it should "parse config with backoff strategy" in {
    val config = typesafe.config.ConfigFactory.parseString(
      """x {
        |  queuesPrefix = "rhttpc"
        |  batchSize = 10
        |  parallelConsumers = 1
        |  retryStrategy {
        |    initialDelay = 5 seconds
        |    multiplier = 1.2
        |    maxRetries = 3
        |  }
        |}
      """.stripMargin)

    ConfigParser.parse(config, "x") shouldEqual RhttpcConfig("rhttpc", 10, 1, BackoffRetry(5.seconds, 1.2, 3, None))
  }

  it should "parse config with publish all strategy" in {
    val config = typesafe.config.ConfigFactory.parseString(
      """x {
        |  queuesPrefix = "rhttpc"
        |  batchSize = 10
        |  parallelConsumers = 1
        |  retryStrategy = handle-all
        |}
      """.stripMargin)

    ConfigParser.parse(config, "x") shouldEqual RhttpcConfig("rhttpc", 10, 1, HandleAll)
  }

  it should "parse config with backoff with deadline strategy" in {
    val config = typesafe.config.ConfigFactory.parseString(
      """x {
        |  queuesPrefix = "rhttpc"
        |  batchSize = 10
        |  parallelConsumers = 1
        |  retryStrategy {
        |    initialDelay = 5 seconds
        |    multiplier = 1.2
        |    maxRetries = 3
        |    deadline = 5 seconds
        |  }
        |}
      """.stripMargin)

    ConfigParser.parse(config, "x") shouldEqual RhttpcConfig("rhttpc", 10, 1, BackoffRetry(5 seconds, 1.2, 3, Some(5 seconds)))
  }
}

Source File: DataStreamExpressionsTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.datastream

import io.eels.Row
import io.eels.schema.{DoubleType, Field, IntType, LongType, StructType}
import org.scalatest.{FlatSpec, Matchers}

class DataStreamExpressionsTest extends FlatSpec with Matchers {

  val schema = StructType(
    Field("artist"),
    Field("year", IntType()),
    Field("album"),
    Field("sales", LongType())
  )
  val ds = DataStream.fromRows(schema,
    Row(schema, Vector("Elton John", 1969, "Empty Sky", 1433)),
    Row(schema, Vector("Elton John", 1971, "Madman Across the Water", 7636)),
    Row(schema, Vector("Elton John", 1972, "Honky Château", 2525)),
    Row(schema, Vector("Elton John", 1973, "Goodbye Yellow Brick Road", 4352)),
    Row(schema, Vector("Elton John", 1975, "Rock of the Westies", 5645)),
    Row(schema, Vector("Kate Bush", 1978, "The Kick Inside", 2577)),
    Row(schema, Vector("Kate Bush", 1978, "Lionheart", 745)),
    Row(schema, Vector("Kate Bush", 1980, "Never for Ever", 7444)),
    Row(schema, Vector("Kate Bush", 1982, "The Dreaming", 8253)),
    Row(schema, Vector("Kate Bush", 1985, "Hounds of Love", 2495))
  )

  "DataStream.filter" should "support expressions" in {
    import io.eels._
    ds.filter(select("album") === "Lionheart")
      .collectValues shouldBe Vector(Vector("Kate Bush", 1978, "Lionheart", 745))
  }

  "DataStream.addField" should "support multiply expressions" in {
    import io.eels._
    ds.filter(select("album") === "Lionheart")
      .addField(Field("woo", DoubleType), select("year") * 1.2)
      .collectValues shouldBe Vector(Vector("Kate Bush", 1978, "Lionheart", 745, BigDecimal(2373.6)))
  }

  "DataStream.addField" should "support addition expressions" in {
    import io.eels._
    ds.filter(select("album") === "Lionheart")
      .addField(Field("woo", DoubleType), select("year") + 1.2)
      .collectValues shouldBe Vector(Vector("Kate Bush", 1978, "Lionheart", 745, BigDecimal(1979.2)))
  }

  "DataStream.addField" should "support subtraction expressions" in {
    import io.eels._
    ds.filter(select("album") === "Lionheart")
      .addField(Field("woo", DoubleType), select("year") - 1.2)
      .collectValues shouldBe Vector(Vector("Kate Bush", 1978, "Lionheart", 745, BigDecimal(1976.8)))
  }

  "DataStream.addField" should "support division expressions" in {
    import io.eels._
    ds.filter(select("album") === "Lionheart")
      .addField(Field("woo", DoubleType), select("year") / 2)
      .collectValues shouldBe Vector(Vector("Kate Bush", 1978, "Lionheart", 745, 989))
  }
}

Source File: ParquetProjectionTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.parquet

import java.io.{File, FilenameFilter}

import io.eels.datastream.DataStream
import io.eels.schema.{Field, StringType, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.scalatest.{FlatSpec, Matchers}

class ParquetProjectionTest extends FlatSpec with Matchers {

  cleanUpResidualParquetTestFiles

  private val schema = StructType(
    Field("name", StringType, nullable = false),
    Field("job", StringType, nullable = false),
    Field("location", StringType, nullable = false)
  )
  private val ds = DataStream.fromValues(
    schema,
    Seq(
      Vector("clint eastwood", "actor", "carmel"),
      Vector("elton john", "musician", "pinner")
    )
  )

  private implicit val conf = new Configuration()
  private implicit val fs = FileSystem.get(new Configuration())
  private val file = new File(s"test_${System.currentTimeMillis()}.pq")
  file.deleteOnExit()
  private val path = new Path(file.toURI)

  if (fs.exists(path))
    fs.delete(path, false)

  ds.to(ParquetSink(path).withOverwrite(true))

  "ParquetSource" should "support projections" in {
    val rows = ParquetSource(path).withProjection("name").toDataStream().collect
    rows.map(_.values) shouldBe Vector(Vector("clint eastwood"), Vector("elton john"))
  }

  it should "return all data when no projection is set" in {
    val rows = ParquetSource(path).toDataStream().collect
    rows.map(_.values) shouldBe Vector(Vector("clint eastwood", "actor", "carmel"), Vector("elton john", "musician", "pinner"))
  }

  private def cleanUpResidualParquetTestFiles = {
    new File(".").listFiles(new FilenameFilter {
      override def accept(dir: File, name: String): Boolean = {
        (name.startsWith("test_") && name.endsWith(".pq")) || (name.startsWith(".test_") && name.endsWith(".pq.crc"))
      }
    }).foreach(_.delete())
  }

}

Source File: AvroAndParquetCrossCompatibilityTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.parquet

import io.eels.component.parquet.avro.{AvroParquetSink, AvroParquetSource}
import io.eels.datastream.DataStream
import io.eels.schema.{Field, StringType, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.scalatest.{FlatSpec, Matchers}

// tests that avro source/sink and avro parquet source/sink can write/read each others files
class AvroAndParquetCrossCompatibilityTest extends FlatSpec with Matchers {

  private implicit val conf = new Configuration()
  private implicit val fs = FileSystem.get(new Configuration())

  "AvroParquetSource and ParquetSource" should "be compatible" in {

    val path = new Path("cross.pq")
    if (fs.exists(path))
      fs.delete(path, false)

    val structType = StructType(
      Field("name", StringType, nullable = false),
      Field("location", StringType, nullable = false)
    )

    val ds = DataStream.fromValues(
      structType,
      Seq(
        Vector("clint eastwood", "carmel"),
        Vector("elton john", "pinner")
      )
    )

    ds.to(ParquetSink(path))
    AvroParquetSource(path).toDataStream().collect shouldBe ds.collect
    fs.delete(path, false)

    ds.to(AvroParquetSink(path))
    ParquetSource(path).toDataStream().collect shouldBe ds.collect
    fs.delete(path, false)
  }
}

Source File: KuduComponentTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.kudu

import io.eels.Row
import io.eels.datastream.DataStream
import io.eels.schema._
import org.scalatest.{FlatSpec, Matchers, Tag}

object Kudu extends Tag("kudu")

class KuduComponentTest extends FlatSpec with Matchers {

  "kudu" should "support end to end sink to source" taggedAs Kudu in {

    val schema = StructType(
      Field("planet", StringType, nullable = false, key = true),
      Field("position", StringType, nullable = true)
    )

    val ds = DataStream.fromValues(
      schema,
      Seq(
        Vector("earth", 3),
        Vector("saturn", 6)
      )
    )

    val master = "localhost:7051"
    ds.to(KuduSink(master, "mytable"))

    val rows = KuduSource(master, "mytable").toDataStream().collect
    rows shouldBe Seq(
      Row(schema, Vector("earth", "3")),
      Row(schema, Vector("saturn", "6"))
    )
  }

  it should "support all basic types" taggedAs Kudu in {

    val schema = StructType(
      Field("planet", StringType, nullable = false, key = true),
      Field("position", ByteType.Signed, nullable = false),
      Field("volume", DoubleType, nullable = false),
      Field("bytes", BinaryType, nullable = false),
      Field("gas", BooleanType, nullable = false),
      Field("distance", LongType.Signed, nullable = false)
    )

    val data = Array("earth", 3: Byte, 4515135988.632, Array[Byte](1, 2, 3), false, 83000000)

    val ds = DataStream.fromValues(schema, Seq(data))

    val master = "localhost:7051"
    ds.to(KuduSink(master, "mytable2"))

    val rows = KuduSource(master, "mytable2").toDataStream().collect
    val values = rows.head.values.toArray
    data(3) = data(3).asInstanceOf[Array[Byte]].toList
    values shouldBe data
  }
}

Source File: HiveTableFilesFnTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.hive

import java.nio.file.Paths

import com.sksamuel.exts.Logging
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hdfs.MiniDFSCluster
import org.apache.hadoop.hive.metastore.IMetaStoreClient
import org.apache.hadoop.hive.metastore.api.Table
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{FlatSpec, Matchers}

class HiveTableFilesFnTest extends FlatSpec with Matchers with Logging with MockitoSugar {

  System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA)
  val clusterPath = Paths.get("miniclusters", "cluster")
  val conf = new Configuration()
  conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, clusterPath.toAbsolutePath.toString)
  val cluster = new MiniDFSCluster.Builder(conf).build()
  implicit val fs = cluster.getFileSystem

  "HiveTableFilesFn" should "detect all files in root when no partitions" in {

    implicit val client = mock[IMetaStoreClient]
    org.mockito.Mockito.when(client.getTable("default", "mytable")).thenReturn(new Table)

    val root = new Path("tab1")
    fs.mkdirs(root)

    // table scanner will skip 0 length files
    val a = fs.create(new Path(root, "a"))
    a.write(1)
    a.close()

    val b = fs.create(new Path(root, "b"))
    b.write(1)
    b.close()

    HiveTableFilesFn("default", "mytable", fs.resolvePath(root), Nil).values.flatten.map(_.getPath.getName).toSet shouldBe Set("a", "b")
  }

  it should "ignore hidden files in root when no partitions" in {
    implicit val client = mock[IMetaStoreClient]
    org.mockito.Mockito.when(client.getTable("default", "mytable")).thenReturn(new Table)

    val root = new Path("tab2")
    fs.mkdirs(root)

    // table scanner will skip 0 length files
    val a = fs.create(new Path(root, "a"))
    a.write(1)
    a.close()

    val b = fs.create(new Path(root, "_b"))
    b.write(1)
    b.close()

    HiveTableFilesFn("default", "mytable", fs.resolvePath(root), Nil).values.flatten.map(_.getPath.getName).toSet shouldBe Set("a")
  }
}

Source File: OrcPredicateTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.orc

import java.io.{File, FilenameFilter}

import io.eels.Predicate
import io.eels.datastream.DataStream
import io.eels.schema.{Field, LongType, StringType, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

class OrcPredicateTest extends FlatSpec with Matchers with BeforeAndAfterAll {
  cleanUpResidualOrcTestFiles

  val schema = StructType(
    Field("name", StringType, nullable = true),
    Field("city", StringType, nullable = true),
    Field("age", LongType.Signed, nullable = true)
  )

  val values = Vector.fill(1000) {
    Vector("sam", "middlesbrough", 37)
  } ++ Vector.fill(1000) {
    Vector("laura", "iowa city", 24)
  }

  val ds = DataStream.fromValues(schema, values)

  implicit val conf = new Configuration()
  implicit val fs = FileSystem.get(new Configuration())
  val path = new Path("test.orc")

  if (fs.exists(path))
    fs.delete(path, false)

  new File(path.toString).deleteOnExit()

  ds.to(OrcSink(path).withRowIndexStride(1000))

  override protected def afterAll(): Unit = fs.delete(path, false)

  "OrcSource" should "support string equals predicates" in {
    conf.set("eel.orc.predicate.row.filter", "false")
    val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect
    rows.map(_.values).toSet shouldBe Set(Vector("sam", "middlesbrough", 37L))
  }

  it should "support gt predicates" in {
    conf.set("eel.orc.predicate.row.filter", "false")
    val rows = OrcSource(path).withPredicate(Predicate.gt("age", 30L)).toDataStream().collect
    rows.map(_.values).toSet shouldBe Set(Vector("sam", "middlesbrough", 37L))
  }

  it should "support lt predicates" in {
    conf.set("eel.orc.predicate.row.filter", "false")
    val rows = OrcSource(path).withPredicate(Predicate.lt("age", 30)).toDataStream().collect
    rows.map(_.values).toSet shouldBe Set(Vector("laura", "iowa city", 24L))
  }

  it should "enable row level filtering with predicates by default" in {
    conf.set("eel.orc.predicate.row.filter", "true")
    val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect
    rows.head.schema shouldBe schema
    rows.head.values shouldBe Vector("sam", "middlesbrough", 37L)
  }

  private def cleanUpResidualOrcTestFiles = {
    new File(".").listFiles(new FilenameFilter {
      override def accept(dir: File, name: String): Boolean = {
        (name.startsWith("test_") && name.endsWith(".orc")) || (name.startsWith(".test_") && name.endsWith(".orc.crc"))
      }
    }).foreach(_.delete())
  }
}

Source File: KafkaSinkTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.kafka

import java.util
import java.util.{Properties, UUID}

import io.eels.Row
import io.eels.datastream.DataStream
import io.eels.schema.{Field, StringType, StructType}
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.{Deserializer, Serializer}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.collection.JavaConverters._
import scala.util.Try

class KafkaSinkTest extends FlatSpec with Matchers with BeforeAndAfterAll {

  implicit val kafkaConfig = EmbeddedKafkaConfig(
    kafkaPort = 6001,
    zooKeeperPort = 6000
  )
  Try {
    EmbeddedKafka.start()
  }

  val schema = StructType(
    Field("name", StringType, nullable = true),
    Field("location", StringType, nullable = true)
  )

  val ds = DataStream.fromValues(
    schema,
    Seq(
      Vector("clint eastwood", UUID.randomUUID().toString),
      Vector("elton john", UUID.randomUUID().toString)
    )
  )

  "KafkaSink" should "support default implicits" ignore {

    val topic = "mytopic-" + System.currentTimeMillis()

    val properties = new Properties()
    properties.put("bootstrap.servers", s"localhost:${kafkaConfig.kafkaPort}")
    properties.put("group.id", "test")
    properties.put("auto.offset.reset", "earliest")

    val producer = new KafkaProducer[String, Row](properties, StringSerializer, RowSerializer)
    val sink = KafkaSink(topic, producer)

    val consumer = new KafkaConsumer[String, String](properties, StringDeserializer, StringDeserializer)
    consumer.subscribe(util.Arrays.asList(topic))

    ds.to(sink)
    producer.close()

    val records = consumer.poll(4000)
    records.iterator().asScala.map(_.value).toList shouldBe ds.collect.map {
      case Row(_, values) => values.mkString(",")
    }.toList
  }
}

object RowSerializer extends Serializer[Row] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def serialize(topic: String, data: Row): Array[Byte] = data.values.mkString(",").getBytes
  override def close(): Unit = ()
}

object StringSerializer extends Serializer[String] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def close(): Unit = ()
  override def serialize(topic: String, data: String): Array[Byte] = data.getBytes
}

object StringDeserializer extends Deserializer[String] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def close(): Unit = ()
  override def deserialize(topic: String, data: Array[Byte]): String = new String(data)
}

Source File: KahanSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.math

import java.math.BigDecimal

import com.twosigma.flint.util.Timer
import org.scalatest.FlatSpec

import scala.util.Random

class KahanSpec extends FlatSpec {

  "Kahan" should "sum correctly in wiki example" in {
    val kahan = new Kahan()
    var i = 0
    while (i < 1000) {
      kahan.add(1.0)
      kahan.add(1.0e100)
      kahan.add(1.0)
      kahan.add(-1.0e100)
      i += 1
    }

    assert(kahan.value === 2000.0)
  }

  it should "sum correctly for constants of Double(s)" in {
    val kahan = new Kahan()
    val x = 1000.0002
    var sum = 0.0
    val bigDecimal = new BigDecimal(x)
    var bigDecimalSum = new BigDecimal(0.0)
    var i = 0
    while (i < (Int.MaxValue >> 5)) {
      sum += x
      kahan.add(x)
      bigDecimalSum = bigDecimalSum.add(bigDecimal)
      i += 1
    }
    assert(
      Math.abs(
        bigDecimalSum
          .subtract(new BigDecimal(kahan.value))
          .doubleValue()
      ) < 1.0e-5
    )

    assert(
      Math.abs(
        bigDecimalSum
          .subtract(new BigDecimal(sum))
          .doubleValue()
      ) > 1.0
    )
  }

  it should "subtract correctly" in {
    val kahan1 = new Kahan()
    val kahan2 = new Kahan()
    val x = 1000.0002
    var i = 0
    while (i < (Int.MaxValue >> 5)) {
      kahan1.add(x)
      kahan2.add(x)
      kahan2.add(x)
      i += 1
    }
    kahan2.subtract(kahan1)
    assert(kahan2.value === kahan1.value)
  }
}

Source File: SchemaSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.timeseries

import com.twosigma.flint.timeseries.row.Schema
import org.scalatest.FlatSpec
import org.apache.spark.sql.types._

class SchemaSpec extends FlatSpec {
  "Schema" should "create a schema correctly" in {
    val schema = StructType(Array(
      StructField("time", LongType),
      StructField("price", DoubleType)
    ))
    assert(Schema("time" -> LongType, "price" -> DoubleType) == schema)
    assert(Schema("price" -> DoubleType) == schema)
  }

  it should "`of` correctly" in {
    val schema = StructType(Array(
      StructField("foo", LongType),
      StructField("price", DoubleType)
    ))
    assert(Schema.of("foo" -> LongType, "price" -> DoubleType) == schema)
  }

  it should "create a time schema correctly without specifying any column" in {
    val schema = StructType(Array(
      StructField("time", LongType)
    ))
    assert(Schema() == schema)
  }

  it should "throw exception if `time` is not a LongType" in {
    intercept[IllegalArgumentException] {
      Schema("time" -> DoubleType)
    }
  }

  it should "throw an exception if you try to cast the `time` column" in {
    val schema = Schema()

    intercept[IllegalArgumentException] {
      Schema.cast(schema, "time" -> DoubleType)
    }
  }

  it should "throw an exception if you try to cast non-existing columns" in {
    val schema = Schema()

    intercept[IllegalArgumentException] {
      Schema.cast(schema, "foo" -> DoubleType)
    }
  }

  it should "throw an exception if you try to cast non-numeric columns" in {
    val schema = Schema("foo" -> StringType)

    intercept[IllegalArgumentException] {
      Schema.cast(schema, "foo" -> DoubleType)
    }
  }

  it should "cast numeric columns" in {
    val currentSchema = Schema("foo" -> IntegerType, "bar" -> ShortType)
    val newSchema = Schema.cast(currentSchema, "foo" -> DoubleType, "bar" -> IntegerType)

    assert(Schema.of("time" -> LongType, "foo" -> DoubleType, "bar" -> IntegerType) == newSchema)
  }
}

Source File: LinkedListHolderSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.util.collection

import org.scalatest.FlatSpec
import java.util.{ LinkedList => JLinkedList }
import scala.collection.JavaConverters._

class LinkedListHolderSpec extends FlatSpec {

  "LinkedListHolder" should "dropWhile correctly" in {
    import Implicits._
    val l = new JLinkedList[Int]()
    val n = 10
    val p = 3
    l.addAll((1 to n).asJava)

    val (dropped1, rest1) = l.dropWhile { i => i > p }
    assert(dropped1.size() == 0)
    assert(rest1.toArray.deep == l.toArray.deep)

    val (dropped2, rest2) = l.dropWhile { i => i < p }
    assert(dropped2.toArray.deep == (1 until p).toArray.deep)
    assert(rest2.toArray.deep == (p to n).toArray.deep)
  }

  it should "foldLeft correctly" in {
    import com.twosigma.flint.util.collection.Implicits._
    val l = new JLinkedList[Char]()
    assert(l.foldLeft("")(_ + _) == "")
    l.add('a')
    l.add('b')
    l.add('c')
    l.add('d')
    assert(l.foldLeft("")(_ + _) == "abcd")
  }

  it should "foldRight correctly" in {
    import com.twosigma.flint.util.collection.Implicits._
    val l = new JLinkedList[Char]()
    assert(l.foldRight("")(_ + _) == "")
    l.add('a')
    l.add('b')
    l.add('c')
    l.add('d')
    assert(l.foldRight("")(_ + _) == "dcba")
  }
}

Source File: RangeSplitSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd

import org.scalatest.FlatSpec

class RangeSplitSpec extends FlatSpec {

  val rangeSplits = IndexedSeq(
    RangeSplit(Split(0), CloseOpen(2, Option(3))),
    RangeSplit(Split(1), CloseOpen(3, Option(4))),
    RangeSplit(Split(2), CloseOpen(5, Option(7))),
    RangeSplit(Split(3), CloseOpen(7, Option(10)))
  )

  "The RangeSplit" should "getNextBegin correctly" in {
    val begins = rangeSplits.map(_.range.begin)
    assert(RangeSplit.getNextBegin(2, begins) == Some(3))
    assert(RangeSplit.getNextBegin(4, begins) == Some(5))
    assert(RangeSplit.getNextBegin(6, begins) == Some(7))
    assert(RangeSplit.getNextBegin(8, begins).isEmpty)
    assert(RangeSplit.getNextBegin(1, Vector[Int]()).isEmpty)
  }

  it should "getSplitsWithinRange correctly" in {
    assert(RangeSplit.getIntersectingSplits(CloseOpen(4, Some(6)), rangeSplits) ==
      List(RangeSplit(Split(2), CloseOpen(5, Some(7)))))

    assert(RangeSplit.getIntersectingSplits(CloseOpen(4, None), rangeSplits) ==
      List(RangeSplit(Split(2), CloseOpen(5, Some(7))), RangeSplit(Split(3), CloseOpen(7, Some(10)))))

  }
}

Source File: OrderedIteratorSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd

import org.scalatest.FlatSpec

class OrderedIteratorSpec extends FlatSpec {

  "OrderedKeyValueIterator" should "iterate through an ordered collection correctly" in {
    val data = (1 to 10).map { x => (x, x) }.toArray
    assert(data.deep == OrderedIterator(data.iterator).toArray.deep)
  }

  it should "filter on a range correctly" in {
    val data = (1 to 10).map { x => (x, x) }.toArray
    var range = CloseOpen(0, Some(1))
    assert(OrderedIterator(data.iterator).filterByRange(range).length == 0)

    range = CloseOpen(0, Some(2))
    assert(OrderedIterator(data.iterator).filterByRange(range).toArray.deep == Array((1, 1)).deep)

    range = CloseOpen(3, Some(5))
    assert(OrderedIterator(data.iterator).filterByRange(range).toArray.deep == Array((3, 3), (4, 4)).deep)

    range = CloseOpen(9, Some(20))
    assert(OrderedIterator(data.iterator).filterByRange(range).toArray.deep == Array((9, 9), (10, 10)).deep)

    range = CloseOpen(11, Some(20))
    assert(OrderedIterator(data.iterator).filterByRange(range).length == 0)
  }
}

Source File: TreeAggregateSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd.function.summarize

import com.twosigma.flint.SharedSparkContext
import org.scalatest.FlatSpec
import org.scalatest.tagobjects.Slow

class TreeAggregateSpec extends FlatSpec with SharedSparkContext {

  "TreeAggregate" should "aggregate as RDD.aggregate in order for max op" taggedAs (Slow) in {
    val numOfPartitions = 1001
    val scale = 5
    val maxDepth = 5
    val rdd = sc.parallelize(1 to numOfPartitions, numOfPartitions).mapPartitionsWithIndex {
      (idx, _) => (1 to scale).map { x => idx * scale + x }.toIterator
    }

    // Use -1 as a "bad" state and propagate through the aggregation, otherwise
    // it is just simply a Math.max() operator.
    val seqOp = (u: Int, t: Int) => if (u > t || u < 0) {
      -1
    } else {
      t
    }

    val combOp = (u1: Int, u2: Int) => if (u1 >= u2 || u1 < 0 || u2 < 0) {
      -1
    } else {
      u2
    }

    val expectedAggregatedResult = rdd.max()

    (1 to maxDepth).foreach {
      depth => assert(TreeAggregate(rdd)(0, seqOp, combOp, depth) == expectedAggregatedResult)
    }
  }

  it should "aggregate as RDD.aggregate in order for string concat" taggedAs (Slow) in {
    val numOfPartitions = 1001
    val scale = 5
    val maxDepth = 5
    val rdd = sc.parallelize(1 to numOfPartitions, numOfPartitions).mapPartitionsWithIndex {
      (idx, _) => (1 to scale).map { x => s"${idx * scale + x}" }.toIterator
    }

    val seqOp = (u: String, t: String) => u + t
    val combOp = (u1: String, u2: String) => u1 + u2
    val expectedAggregatedResult = rdd.collect().mkString("")

    (1 to maxDepth).foreach {
      depth => assert(TreeAggregate(rdd)("", seqOp, combOp, depth) == expectedAggregatedResult)
    }
  }

  it should "aggregate as RDD.aggregate in order for sum" taggedAs (Slow) in {
    val numOfPartitions = 1001
    val scale = 5
    val maxDepth = 5
    val rdd = sc.parallelize(1 to numOfPartitions, numOfPartitions).mapPartitionsWithIndex {
      (idx, _) => (1 to scale).map { x => idx * scale + x }.toIterator
    }

    val seqOp = (u: Int, t: Int) => u + t
    val combOp = (u1: Int, u2: Int) => u1 + u2
    val expectedAggregatedResult = rdd.sum()

    (1 to maxDepth).foreach {
      depth => assert(TreeAggregate(rdd)(0, seqOp, combOp, depth) == expectedAggregatedResult)
    }
  }

}

Source File: TreeReduceSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd.function.summarize

import com.twosigma.flint.SharedSparkContext
import org.scalatest.FlatSpec
import org.scalatest.tagobjects.Slow

class TreeReduceSpec extends FlatSpec with SharedSparkContext {

  "TreeReduce" should "reduce in order for max op" taggedAs (Slow) in {
    val numOfPartitions = 1023
    val scale = 5
    val maxDepth = 5
    val rdd = sc.parallelize(1 to numOfPartitions, numOfPartitions).mapPartitionsWithIndex {
      (idx, _) => (1 to scale).map { x => idx * scale + x }.toIterator
    }

    // Use -1 as a "bad" state and propagate through the aggregation, otherwise
    // it is just simply a Math.max() operator.
    val op = (u1: Int, u2: Int) => if (u1 >= u2 || u1 < 0 || u2 < 0) {
      -1
    } else {
      u2
    }

    val expectedReducedResult = rdd.max()

    (1 to maxDepth).foreach {
      depth => assert(TreeReduce(rdd)(op, depth) == expectedReducedResult)
    }
  }

  it should "reduce in order for string concat" taggedAs (Slow) in {
    val numOfPartitions = 1111
    val scale = 5
    val maxDepth = 5
    val rdd = sc.parallelize(1 to numOfPartitions, numOfPartitions).mapPartitionsWithIndex {
      (idx, _) => (1 to scale).map { x => s"${idx * scale + x}" }.toIterator
    }

    val f = (u1: String, u2: String) => u1 + u2
    val expectedReducedResult = rdd.collect().mkString("")

    (1 to maxDepth).foreach {
      depth => assert(TreeReduce(rdd)(f, depth) == expectedReducedResult)
    }
  }

  it should "reduce in order for sum op" taggedAs (Slow) in {
    val numOfPartitions = 1023
    val scale = 5
    val maxDepth = 5
    val rdd = sc.parallelize(1 to numOfPartitions, numOfPartitions).mapPartitionsWithIndex {
      (idx, _) => (1 to scale).map { x => idx * scale + x }.toIterator
    }

    val expectedReducedResult = rdd.sum()

    val f = (u1: Int, u2: Int) => u1 + u2

    (1 to maxDepth).foreach {
      depth => assert(TreeReduce(rdd)(f, depth) == expectedReducedResult)
    }
  }

}

Source File: RegressionSummarizerSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd.function.summarize.summarizer

import breeze.linalg.{ DenseVector, DenseMatrix }
import org.scalatest.FlatSpec

class RegressionSummarizerSpec extends FlatSpec {

  "RegressionSummarizer" should "transform from RegressRow correctly" in {
    val x: Array[RegressionRow] = Array(
      RegressionRow(time = 0L, x = Array(1d, 2d), y = 3d, weight = 4d),
      RegressionRow(time = 0L, x = Array(4d, 5d), y = 6d, weight = 16d)
    )

    val (response1, predictor1, yw1) = RegressionSummarizer.transform(x, shouldIntercept = true, isWeighted = true)
    assert(response1.equals(DenseMatrix(Array(2d, 2d, 4d), Array(4d, 16d, 20d))))
    assert(predictor1.equals(DenseVector(Array(6d, 24d))))
    assert(yw1.deep == Array((3d, 4d), (6d, 16d)).deep)

    val (response2, predictor2, yw2) = RegressionSummarizer.transform(x, shouldIntercept = true, isWeighted = false)
    assert(response2.equals(DenseMatrix(Array(1d, 1d, 2d), Array(1d, 4d, 5d))))
    assert(predictor2.equals(DenseVector(Array(3d, 6d))))
    assert(yw2.deep == Array((3d, 1d), (6d, 1d)).deep)

    val (response3, predictor3, yw3) = RegressionSummarizer.transform(x, shouldIntercept = false, isWeighted = true)
    assert(response3.equals(DenseMatrix(Array(2d, 4d), Array(16d, 20d))))
    assert(predictor3.equals(DenseVector(Array(6d, 24d))))
    assert(yw3.deep == Array((3d, 4d), (6d, 16d)).deep)

    val (response4, predictor4, yw4) = RegressionSummarizer.transform(x, shouldIntercept = false, isWeighted = false)
    assert(response4.equals(DenseMatrix(Array(1d, 2d), Array(4d, 5d))))
    assert(predictor4.equals(DenseVector(Array(3d, 6d))))
    assert(yw4.deep == Array((3d, 1d), (6d, 1d)).deep)
  }
}

Source File: LagWindowSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd.function.summarize.summarizer.regression

import org.scalatest.FlatSpec

class LagWindowSpec extends FlatSpec {

  "LagWindow" should "give an `AbsoluteTimeLagWindow` as expect" in {
    val lagWindow = LagWindow.absolute(10L)
    assert(!lagWindow.shouldKeep(20L, 10L, 0))
    assert(!lagWindow.shouldKeep(20L, 10L, Int.MaxValue))
    assert(lagWindow.shouldKeep(15L, 10L, 0))
    assert(lagWindow.shouldKeep(15L, 10L, Int.MaxValue))
    assert(!lagWindow.shouldKeep(25L, 10L, 0))
    assert(!lagWindow.shouldKeep(25L, 10L, Int.MaxValue))
  }

  it should "give a `CountLagWindow` as expect" in {
    val countLagWindow = LagWindow.count(3, 10)
    assert(countLagWindow.shouldKeep(20L, 10L, 0))
    assert(countLagWindow.shouldKeep(20L, 10L, 1))
    assert(countLagWindow.shouldKeep(20L, 10L, 2))
    assert(countLagWindow.shouldKeep(20L, 10L, 3))
    assert(countLagWindow.shouldKeep(20L, 10L, 4))
    assert(!countLagWindow.shouldKeep(20L, 10L, 5))

    assert(countLagWindow.shouldKeep(15L, 10L, 0))
    assert(countLagWindow.shouldKeep(15L, 10L, 1))
    assert(countLagWindow.shouldKeep(15L, 10L, 2))
    assert(countLagWindow.shouldKeep(15L, 10L, 3))
    assert(countLagWindow.shouldKeep(15L, 10L, 4))
    assert(!countLagWindow.shouldKeep(15L, 10L, 5))

    assert(!countLagWindow.shouldKeep(25L, 10L, 0))
    assert(!countLagWindow.shouldKeep(25L, 10L, 1))
    assert(!countLagWindow.shouldKeep(25L, 10L, 2))
    assert(!countLagWindow.shouldKeep(25L, 10L, 3))
    assert(!countLagWindow.shouldKeep(25L, 10L, 4))
    assert(!countLagWindow.shouldKeep(25L, 10L, 5))
  }

  "LagWindowQueue" should "work with an `AbsoluteTimeLagWindow` as expect" in {
    val window = LagWindow.absolute(10L)
    val lagWindowQueue = new LagWindowQueue[Int](window)
    lagWindowQueue.enqueue(0, 0)
    lagWindowQueue.enqueue(5, 5)
    lagWindowQueue.enqueue(10, 10)
    assert(lagWindowQueue.length == 2)
    assert(lagWindowQueue.head.timestamp == 5L)
    assert(lagWindowQueue.last.timestamp == 10L)
    lagWindowQueue.enqueue(100, 100)
    assert(lagWindowQueue.length == 1)
    assert(lagWindowQueue.last.timestamp == 100L)
  }

  it should "work with a `CountLagWindow` as expect" in {
    val window = LagWindow.count(3, 10L)
    val lagWindowQueue = new LagWindowQueue[Int](window)
    lagWindowQueue.enqueue(0, 0)
    lagWindowQueue.enqueue(5, 5)
    lagWindowQueue.enqueue(7, 7)
    lagWindowQueue.enqueue(10, 10)
    lagWindowQueue.enqueue(10, 10)

    assert(lagWindowQueue.length == 4)
    assert(lagWindowQueue.head.timestamp == 0L)
    assert(lagWindowQueue.last.timestamp == 10L)

    lagWindowQueue.enqueue(11, 11)
    assert(lagWindowQueue.length == 4)
    assert(lagWindowQueue.head.timestamp == 5L)

    lagWindowQueue.enqueue(100, 100)
    assert(lagWindowQueue.length == 1)
    assert(lagWindowQueue.head.timestamp == 100)

    lagWindowQueue.clear()
    assert(lagWindowQueue.length == 0)
  }
}

Source File: OverlappedOrderedRDDSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd

import com.twosigma.flint.SharedSparkContext
import org.apache.spark.rdd.RDD
import org.scalatest.FlatSpec

class OverlappedOrderedRDDSpec extends FlatSpec with SharedSparkContext {

  val numSlices: Int = 3

  val sliceLength: Int = 4

  var rdd: RDD[(Int, Int)] = _

  var orderedRdd: OrderedRDD[Int, Int] = _

  var overlappedOrderedRdd: OverlappedOrderedRDD[Int, Int] = _

  private def window(t: Int): (Int, Int) = (t - 2, t)

  override def beforeAll() {
    super.beforeAll()
    val s = sliceLength
    rdd = sc.parallelize(0 until numSlices, numSlices).flatMap {
      i => (1 to s).map { j => i * s + j }
    }.map { x => (x, x) }
    orderedRdd = OrderedRDD.fromRDD(rdd, KeyPartitioningType.Sorted)
    overlappedOrderedRdd = OverlappedOrderedRDD(orderedRdd, window)
  }

  "The OverlappedOrderedRDD" should "be constructed from `OrderedRDD` correctly" in {
    assert(overlappedOrderedRdd.rangeSplits.deep == orderedRdd.rangeSplits.deep)
    val benchmark = Array(1, 2, 3, 4, 5, 4, 5, 6, 7, 8, 9, 8, 9, 10, 11, 12).map { x => (x, x) }
    assert(overlappedOrderedRdd.collect().deep == benchmark.deep)
  }

  it should "be able to remove overlapped rows to get an `OrderedRDD` correctly" in {
    assert(overlappedOrderedRdd.rangeSplits.deep == orderedRdd.rangeSplits.deep)
    assert(overlappedOrderedRdd.nonOverlapped().collect().deep == orderedRdd.collect().deep)
  }

  it should "`mapPartitionsWithIndexOverlapped` correctly" in {
    val mapped = overlappedOrderedRdd.mapPartitionsWithIndexOverlapped(
      (index, iterator) => iterator.map { case (k, v) => (k, v * 2) }
    )
    val benchmark = Array(1, 2, 3, 4, 5, 4, 5, 6, 7, 8, 9, 8, 9, 10, 11, 12).map { x => (x, 2 * x) }
    assert(mapped.collect().deep == benchmark.deep)
  }
}

Source File: RangeMergeJoinSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd

import com.twosigma.flint.rdd.function.join.RangeMergeJoin
import org.apache.spark.Partition
import org.scalatest.FlatSpec

class RangeMergeJoinSpec extends FlatSpec {
  val thisSplits = IndexedSeq(
    RangeSplit(Split(0), CloseOpen(1, Some(2))),
    RangeSplit(Split(1), CloseOpen(2, Some(3))),
    RangeSplit(Split(2), CloseOpen(3, Some(4))),
    RangeSplit(Split(3), CloseOpen(4, Some(5))),
    RangeSplit(Split(4), CloseOpen(5, None))
  )

  val thatSplits = IndexedSeq(
    RangeSplit(Split(0), CloseOpen(1, Some(3))),
    RangeSplit(Split(1), CloseOpen(3, Some(7))),
    RangeSplit(Split(2), CloseOpen(7, None))
  )

  "The RangeMergeJoin" should "`mergeSplits` with no tolerance correctly" in {
    val benchmark = List(
      RangeMergeJoin(
        CloseOpen(1, Some(2)),
        List(RangeSplit(Split(0), CloseOpen(1, Some(2)))),
        List(RangeSplit(Split(0), CloseOpen(1, Some(3))))
      ),
      RangeMergeJoin(
        CloseOpen(2, Some(3)),
        List(RangeSplit(Split(1), CloseOpen(2, Some(3)))),
        List(RangeSplit(Split(0), CloseOpen(1, Some(3))))
      ),
      RangeMergeJoin(
        CloseOpen(3, Some(4)),
        List(RangeSplit(Split(2), CloseOpen(3, Some(4)))),
        List(RangeSplit(Split(1), CloseOpen(3, Some(7))))
      ),
      RangeMergeJoin(
        CloseOpen(4, Some(5)),
        List(RangeSplit(Split(3), CloseOpen(4, Some(5)))),
        List(RangeSplit(Split(1), CloseOpen(3, Some(7))))
      ),
      RangeMergeJoin(
        CloseOpen(5, Some(7)),
        List(RangeSplit(Split(4), CloseOpen(5, None))),
        List(RangeSplit(Split(1), CloseOpen(3, Some(7))))
      ),
      RangeMergeJoin(
        CloseOpen(7, None),
        List(RangeSplit(Split(4), CloseOpen(5, None))),
        List(RangeSplit(Split(2), CloseOpen(7, None)))
      )
    )
    assertResult(benchmark) { RangeMergeJoin.mergeSplits(thisSplits, thatSplits) }
  }

  it should "`mergeSplits` with some tolerance correctly" in {
    val benchmark = List(
      RangeMergeJoin(
        CloseOpen(1, Some(2)),
        List(RangeSplit(Split(0), CloseOpen(1, Some(2)))),
        List(RangeSplit(Split(0), CloseOpen(1, Some(3))))
      ),
      RangeMergeJoin(
        CloseOpen(2, Some(3)),
        List(RangeSplit(Split(0), CloseOpen(1, Some(2))), RangeSplit(Split(1), CloseOpen(2, Some(3)))),
        List(RangeSplit(Split(0), CloseOpen(1, Some(3))))
      ),
      RangeMergeJoin(
        CloseOpen(3, Some(4)),
        List(RangeSplit(Split(1), CloseOpen(2, Some(3))), RangeSplit(Split(2), CloseOpen(3, Some(4)))),
        List(RangeSplit(Split(0), CloseOpen(1, Some(3))), RangeSplit(Split(1), CloseOpen(3, Some(7))))
      ),
      RangeMergeJoin(
        CloseOpen(4, Some(5)),
        List(RangeSplit(Split(2), CloseOpen(3, Some(4))), RangeSplit(Split(3), CloseOpen(4, Some(5)))),
        List(RangeSplit(Split(1), CloseOpen(3, Some(7))))
      ),
      RangeMergeJoin(
        CloseOpen(5, Some(7)),
        List(RangeSplit(Split(3), CloseOpen(4, Some(5))), RangeSplit(Split(4), CloseOpen(5, None))),
        List(RangeSplit(Split(1), CloseOpen(3, Some(7))))
      ),
      RangeMergeJoin(
        CloseOpen(7, None),
        List(RangeSplit(Split(4), CloseOpen(5, None))),
        List(RangeSplit(Split(1), CloseOpen(3, Some(7))), RangeSplit(Split(2), CloseOpen(7, None)))
      )
    )

    assertResult(benchmark) { RangeMergeJoin.mergeSplits(thisSplits, thatSplits, { x: Int => x - 1 }) }
  }
}

Source File: PointRDDExtensionsSpec.scala From reactiveinflux-spark with Apache License 2.0

5 votes

package com.pygmalios.reactiveinflux.extensions

import com.holdenkarau.spark.testing.SharedSparkContext
import com.pygmalios.reactiveinflux.Point.Measurement
import com.pygmalios.reactiveinflux._
import com.pygmalios.reactiveinflux.extensions.PointRDDExtensionsSpec._
import com.pygmalios.reactiveinflux.spark._
import com.pygmalios.reactiveinflux.spark.extensions.PointRDDExtensions
import org.joda.time.{DateTime, DateTimeZone}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfter, FlatSpec}

import scala.concurrent.duration._

@RunWith(classOf[JUnitRunner])
class PointRDDExtensionsSpec extends FlatSpec with SharedSparkContext
  with BeforeAndAfter {

  before {
    withInflux(_.create())
  }

  after {
    withInflux(_.drop())
  }

  behavior of "saveToInflux"

  it should "write single point to Influx" in {
    val points = List(point1)
    val rdd = sc.parallelize(points)

    // Execute
    rdd.saveToInflux()

    // Assert
    assert(PointRDDExtensions.totalBatchCount == 1)
    assert(PointRDDExtensions.totalPointCount == 1)
    val result = withInflux(
      _.query(Query(s"SELECT * FROM $measurement1"))
      .result
      .singleSeries)

    assert(result.rows.size == 1)

    val row = result.rows.head
    assert(row.time == point1.time)
    assert(row.values.size == 5)
  }

  it should "write 1000 points to Influx" in {
    val points = (1 to 1000).map { i =>
      Point(
        time = point1.time.plusMinutes(i),
        measurement = point1.measurement,
        tags = point1.tags,
        fields = point1.fields
      )
    }
    val rdd = sc.parallelize(points)

    // Execute
    rdd.saveToInflux()

    // Assert
    assert(PointRDDExtensions.totalBatchCount == 8)
    assert(PointRDDExtensions.totalPointCount == 1000)
    val result = withInflux(
      _.query(Query(s"SELECT * FROM $measurement1"))
        .result
        .singleSeries)

    assert(result.rows.size == 1000)
  }
}

object PointRDDExtensionsSpec {
  implicit val params: ReactiveInfluxDbName = ReactiveInfluxDbName("test")
  implicit val awaitAtMost: Duration = 1.second

  val measurement1: Measurement = "measurement1"
  val point1 = Point(
    time        = new DateTime(1983, 1, 10, 7, 43, 10, 3, DateTimeZone.UTC),
    measurement = measurement1,
    tags        = Map("tagKey1" -> "tagValue1", "tagKey2" -> "tagValue2"),
    fields      = Map("fieldKey1" -> StringFieldValue("fieldValue1"), "fieldKey2" -> BigDecimalFieldValue(10.7)))
}

Source File: IntegrationSpec.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0

5 votes

package kvstore

import akka.actor.{ Actor, Props, ActorRef, ActorSystem }
import akka.testkit.{ TestProbe, ImplicitSender, TestKit }
import org.scalatest.{ BeforeAndAfterAll, FlatSpec, Matchers }
import scala.concurrent.duration._
import org.scalatest.FunSuiteLike
import org.scalactic.ConversionCheckedTripleEquals

class IntegrationSpec(_system: ActorSystem) extends TestKit(_system)
    with FunSuiteLike
        with Matchers
    with BeforeAndAfterAll
    with ConversionCheckedTripleEquals
    with ImplicitSender
    with Tools {

  import Replica._
  import Replicator._
  import Arbiter._

  def this() = this(ActorSystem("ReplicatorSpec"))

  override def afterAll: Unit = system.shutdown()

  
  }

Source File: NestedTest.scala From TwoTails with Apache License 2.0

5 votes

package twotails

import org.scalatest.{ FlatSpec, Matchers }

class Nested{
  @mutualrec final def nest(z: Int): Int ={
    @mutualrec def one(x: Int): Int = if(0 < x) two(x-1) else 0
    @mutualrec def two(x: Int): Int = if(0 < x) one(x-2) else 0

    if(0 < z) one(z) else aha(z)
  }

  @mutualrec final def aha(z: Int): Int = nest(z+1)

  def thing(y: Int) ={
    class Yo{
      @mutualrec final def one(x: Int): Int = if(0 < x) two(x-1) else 0
      @mutualrec final def two(x: Int): Int = if(0 < x) one(x-2) else 0
    }

    (new Yo).two(y)
  }

  def other(y: Int) ={
    object Yo{
      @mutualrec final def one(x: Int): Int = if(0 < x) two(x-1) else 0
      @mutualrec final def two(x: Int): Int = if(0 < x) one(x-2) else 0
    }

    Yo.one(y)
  }

  class Foo{
    @mutualrec final def one(x: Int): Int = if(0 < x) two(x-1) else 0
    @mutualrec final def two(x: Int): Int = if(0 < x) one(x-2) else 0
  }

  def something(y: Int): Int ={
  	@mutualrec def one(x: Int): Int = if(0 < x) two(x-1) else 0
    @mutualrec def two(x: Int): Int = if(0 < x) one(x-2) else 0

    one(y)
  }

  { //just a block which will be discarded
  	@mutualrec def one(x: Int): Int = if(0 < x) two(x-1) else 0
    @mutualrec def two(x: Int): Int = if(0 < x) one(x-2) else 0
  }

  def dis(y: Int) ={
  	{ //another block which will be discarded but with a name clash
  	  @mutualrec def one(x: Int): Int = if(0 < x) two(x-1) else 1
      @mutualrec def two(x: Int): Int = if(0 < x) one(x-2) else 1
    }

    @mutualrec def one(x: Int): Int = if(0 < x) two(x-1) else 0
    @mutualrec def two(x: Int): Int = if(0 < x) one(x-2) else 0

    one(y)
  }

  val that ={ xy: Int =>
  	@mutualrec def one(x: Int): Int = if(0 < x) two(x-1) else 0
    @mutualrec def two(x: Int): Int = if(0 < x) one(x-2) else 0

    one(xy)
  }
}

class NestedTest extends FlatSpec with Matchers{
  val fourK = 400000

  "A nested class within a def which has annotated methods" should "not throw a StackOverflow" in{
  	val nest = new Nested

  	nest.thing(fourK) should equal (0)
  }

  "A nested object within a def which has annotated methods" should "not throw a StackOverflow" in{
  	val nest = new Nested

  	nest.other(fourK) should equal (0)
  }

  "A nested class within a class which has annotated methods" should "not throw a StackOverflow" in{
  	val nest = new Nested
  	val foo = new nest.Foo

  	foo.one(fourK) should equal (0)
  }

  "A nested set of annotated methods with a name clashing nested block" should "not throw a StackOverflow" in{
    val nest = new Nested

    nest.dis(fourK) should equal(0)
  }
}

Source File: MultiRecTest.scala From TwoTails with Apache License 2.0

5 votes

package twotails

import org.scalatest.{ FlatSpec, Matchers }
import java.lang.StackOverflowError

class Cup{
  @mutualrec final def one(x: Int, y: Int = 0): Int = if(0 < x) two(x-1, y) else y
  @mutualrec final def two(u: Int, v: Int = 0): Int = if(0 < u) one(u-1, v) else v

  @mutualrec final def three(x: Int, y: Int = 1): Int = if(0 < x) four(x-1, y) else y
  @mutualrec final def four(u: Int, v: Int = 1): Int = if(0 < u) three(u-1, v) else v
}

class Bowl{
  @mutualrec final def one[A](x: Int, y: A): A = if(0 < x) two(x-1, y) else y
  @mutualrec final def two[A](u: Int, v: A): A = if(0 < u) one(u-1, v) else v

  @mutualrec final def three(x: Int, y: Int = 1): Int = if(0 < x) four(x-1, y) else y
  @mutualrec final def four(u: Int, v: Int = 1): Int = if(0 < u) three(u-1, v) else v
}

class Plate{
  @mutualrec final def zero(x: Int): Int = if(x < 0) x else one(x-1)
  @mutualrec final def one(x: Int): Int = if(x < 0) two(x, x) else zero(x-1)

  @mutualrec final def two(x: Int, y: Int): Int = if(x < 0) y else three(x-1, y+1)
  @mutualrec final def three(x: Int, y: Int): Int = if(x < 0) one(x) else two(x-1, y+1)
}

class Saucer{
  @mutualrec final def zero(x: Int): Int = if(x < 0) x else one(x-1)
  @mutualrec final def one(x: Int): Int = if(x < 0) zero(x-1) else two(x, x)

  @mutualrec final def two(x: Int, y: Int): Int = if(x < 0) y else three(x-1, y+1)
  @mutualrec final def three(x: Int, y: Int): Int = if(x < 0) two(x-1, y+1) else one(x)
}

class MultiRecTest extends FlatSpec with Matchers{
  val fourK = 400000

  "a class with two sets of mutually recursive functions" should "just work" in{
  	val cup = new Cup
  	cup.one(fourK) should equal(0)
  	cup.three(fourK) should equal(1)
  }

  "a class with two sets of mutually recursive functions but different types" should "just work" in{
  	val bowl = new Bowl
  	bowl.one(fourK, "a") should equal("a")
  	bowl.three(fourK) should equal(1)
  }

  "a class with recursive functions of different types" should "not be transformed together" in{
    val saucer = new Saucer

    intercept[StackOverflowError]{
      saucer.three(fourK, fourK)
    }
  }
}

Source File: DefaultArgumentTest.scala From TwoTails with Apache License 2.0

5 votes

package twotails

import org.scalatest.{ FlatSpec, Matchers }

class SpeedBump{
  @mutualrec final def one(x: Int, y: Int = 1): Int = if(0 < x) two(x-1) else y
  @mutualrec final def two(x: Int, y: Int = 2): Int = if(0 < x) one(x-1) else y
}

class Pothole{
  @mutualrec final def one(x: Int)(y: Int, z: Int = 1): Int = if(0 < x) two(x-1)(y) else z
  @mutualrec final def two(x: Int)(y: Int, z: Int = 2): Int = if(0 < x) one(x-1)(y) else z
}

class Ditch{
  @mutualrec final def one(x: Int, y: Int, z: Int = 1): Int = if(0 < x) two(x = x-1, y = y) else z
  @mutualrec final def two(x: Int, y: Int, z: Int = 2): Int = if(0 < x) one(y = y, x = x-1) else z
}

class GuardRail{
  @mutualrec final def one(v: Int, x: Int, y: Int = 1, z: Int = 1): Int = if(0 < v) two(v-1, x, z=z) else y
  @mutualrec final def two(v: Int, x: Int, y: Int = 2, z: Int = 2): Int = if(0 < v) one(v-1, x, z=z) else y
}

class DefaultArgumentTest extends FlatSpec with Matchers{
  val fourK = 400000

  "mutually recursive functions with default args" should "use the default args" in{
  	val sb = new SpeedBump
  	sb.one(fourK) should not equal sb.two(fourK)
  }

  "mutually recursive functions with default args and multi-param lists" should "use the default args" in{
  	val pt = new Pothole
  	pt.one(fourK)(fourK) should not equal pt.two(fourK)(fourK)
  }

  "mutually recursive function with default args called by name" should "use the default args" in{
  	val dt = new Ditch
  	dt.one(fourK, fourK) should not equal dt.two(fourK, fourK)
  }

  "mutually recursive function with default args with mixed calls" should "use the default args" in{
  	val gr = new GuardRail
  	gr.one(fourK, fourK) should not equal gr.two(fourK, fourK)
  }
}

Source File: ObjectTest.scala From TwoTails with Apache License 2.0

5 votes

package twotails

import org.scalatest.{ FlatSpec, Matchers }
import java.lang.StackOverflowError

object Blappy{
  @mutualrec final def one(x: Int): Int = if(0 < x) two(x-1) else 0
  @mutualrec final def two(x: Int): Int = if(0 < x) one(x-1) else 0
}

class ObjectTest extends FlatSpec with Matchers{
  val fourK = 400000

  "Two mutually recursive, single argument, annotated methods on an object" should "not throw a StackOverflow" in{
    Blappy.one(fourK) should equal (0)
  }
}

Source File: ArgumentListTest.scala From TwoTails with Apache License 2.0

5 votes

package twotails

import org.scalatest.{ FlatSpec, Matchers }

class Bippy{
  @mutualrec final def one(x: Int, y: Int = 1): Int = if(0 < x) two(y, x) else 0
  @mutualrec final def two(x: Int, y: Int = 1): Int = if(0 < x) one(x-1, y-1) else 0
}

class Baz{
  @mutualrec final def one(x: Int)(y: Int): Int = if(0 < x) two(y)(x) else 0
  @mutualrec final def two(x: Int)(y: Int): Int = if(0 < x) one(x-1)(y-1) else 0
}

class Bazooka{
  @mutualrec final def one(x: Int)(y: Int)(z: Int): Int = if(0 < x) two(y)(x)(z) else z
  @mutualrec final def two(x: Int)(y: Int)(z: Int): Int = if(0 < x) one(x-1)(y-1)(z+1) else z
}

class ArgumentListTest extends FlatSpec with Matchers{
  val fourK = 400000

  "Two mutually recursive, double-argument list, annotated methods" should "not throw a StackOverflow" in{
  	val c = new Baz

  	c.one(fourK)(fourK) should equal (0)
  }

  "Two mutually recursive, multi-argument list, annotated methods" should "not throw a StackOverflow" in{
    val baz = new Bazooka

    baz.one(fourK)(fourK)(0) should equal (fourK)
  }
}

Source File: RetimeSpec.scala From barstools with BSD 3-Clause "New" or "Revised" License

5 votes

// See LICENSE for license details.

package barstools.tapeout.transforms.retime.test

import chisel3._
import firrtl._
import org.scalatest.{FlatSpec, Matchers}
import chisel3.experimental._
import chisel3.util.HasBlackBoxInline
import chisel3.iotesters._
import barstools.tapeout.transforms.retime._

class RetimeSpec extends FlatSpec with Matchers {
  def normalized(s: String): String = {
    require(!s.contains("\n"))
    s.replaceAll("\\s+", " ").trim
  }
  def uniqueDirName[T](gen: => T, name: String): String = {
    val genClassName = gen.getClass.getName
    name + genClassName.hashCode.abs
  }

  behavior of "retime library"

  it should "pass simple retime module annotation" in {
    val gen = () => new RetimeModule()
    val dir = uniqueDirName(gen, "RetimeModule")
    chisel3.Driver.execute(Array("-td", s"test_run_dir/$dir", "-foaf", s"test_run_dir/$dir/final"), gen) shouldBe a [ChiselExecutionSuccess]

    val lines = io.Source.fromFile(s"test_run_dir/$dir/test_run_dir/$dir/final.anno.json").getLines().map(normalized).mkString("\n")
    lines should include("barstools.tapeout.transforms.retime.RetimeTransform")
  }

  // TODO(azidar): need to fix/add instance annotations
  ignore should "pass simple retime instance annotation" in {
    val gen = () => new RetimeInstance()
    val dir = uniqueDirName(gen, "RetimeInstance")
    chisel3.Driver.execute(Array("-td", s"test_run_dir/$dir", "-foaf", s"test_run_dir/$dir/final.anno"), gen) shouldBe a [ChiselExecutionSuccess]

    val lines = io.Source.fromFile(s"test_run_dir/$dir/final.anno").getLines().map(normalized).toSeq
    lines should contain ("Annotation(ComponentName(instance, ModuleName(RetimeInstance,CircuitName(RetimeInstance))),class barstools.tapeout.transforms.retime.RetimeTransform,retime)")
  }
}

class RetimeModule extends Module with RetimeLib {
  val io = IO(new Bundle {
    val in = Input(UInt(15.W))
    val out = Output(UInt(15.W))
  })
  io.out := io.in
  retime(this)
}

class MyModule extends Module with RetimeLib {
  val io = IO(new Bundle {
    val in = Input(UInt(15.W))
    val out = Output(UInt(15.W))
  })
  io.out := io.in
}

class RetimeInstance extends Module with RetimeLib {
  val io = IO(new Bundle {
    val in = Input(UInt(15.W))
    val out = Output(UInt(15.W))
  })
  val instance = Module(new MyModule)
  retime(instance)
  instance.io.in := io.in
  io.out := instance.io.out
}

Source File: MainServiceSpec.scala From akka-api-gateway-example with MIT License

5 votes

package jp.co.dzl.example.akka.api

import akka.http.scaladsl.Http
import com.typesafe.config.ConfigFactory
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{ BeforeAndAfterAll, Matchers, FlatSpec }

import scala.concurrent.Await
import scala.concurrent.duration.Duration

class MainServiceSpec extends FlatSpec with Matchers with BeforeAndAfterAll with ScalaFutures with MainService {
  override protected def afterAll: Unit = {
    Await.result(system.terminate(), Duration.Inf)
  }

  it should "inject configuration of http" in {
    val config = ConfigFactory.load()

    host shouldEqual config.getString("http.listen.host")
    port shouldEqual config.getInt("http.listen.port")
  }

  it should "bind and handle" in {
    val http = Http().bindAndHandle(handler.routes, host, port)
    http.futureValue.localAddress.getPort shouldEqual port
    http.futureValue.unbind()
  }
}

Source File: HttpClientSpec.scala From akka-api-gateway-example with MIT License

5 votes

package jp.co.dzl.example.akka.api.service

import akka.actor.ActorSystem
import akka.stream.scaladsl.Flow
import org.scalatest.{ BeforeAndAfterAll, Matchers, FlatSpec }

import scala.concurrent.Await
import scala.concurrent.duration.Duration

class HttpClientSpec extends FlatSpec with Matchers with BeforeAndAfterAll {
  implicit val system = ActorSystem("http-client-spec")
  implicit val executor = system.dispatcher

  override protected def afterAll: Unit = {
    Await.result(system.terminate(), Duration.Inf)
  }

  "#conectionHttps" should "return outgoing connection flow" in {
    val httpClient = new HttpClientImpl(system)
    val connection = httpClient.connectionHttps("127.0.0.1", 8000, 5)

    connection shouldBe a[Flow[_, _, _]]
  }
}

Source File: GitHubSpec.scala From akka-api-gateway-example with MIT License

5 votes

package jp.co.dzl.example.akka.api.service

import akka.actor.ActorSystem
import akka.http.scaladsl.model.headers.RawHeader
import akka.http.scaladsl.model.{ HttpMethods, HttpRequest, HttpResponse }
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{ Flow, Source }
import akka.stream.testkit.scaladsl.TestSink
import org.scalamock.scalatest.MockFactory
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{ BeforeAndAfterAll, FlatSpec, Matchers }

import scala.concurrent.Await
import scala.concurrent.duration.Duration

class GitHubSpec extends FlatSpec with Matchers with ScalaFutures with BeforeAndAfterAll with MockFactory {
  implicit val system = ActorSystem("github-spec")
  implicit val executor = system.dispatcher
  implicit val materializer = ActorMaterializer()

  override protected def afterAll: Unit = {
    Await.result(system.terminate(), Duration.Inf)
  }

  "#from" should "merge original headers to github request" in {
    val github = new GitHubImpl("127.0.0.1", 8000, 5, mock[HttpClient])
    val request = HttpRequest(HttpMethods.GET, "/")
      .addHeader(RawHeader("host", "dummy"))
      .addHeader(RawHeader("timeout-access", "dummy"))

    val result = Source.single(HttpRequest(HttpMethods.GET, "/v1/github/users/xxxxxx"))
      .via(github.from(request))
      .runWith(TestSink.probe[HttpRequest])
      .request(1)
      .expectNext()

    result.headers.filter(_.lowercaseName() == "host") shouldBe empty
    result.headers.filter(_.lowercaseName() == "timeout-access") shouldBe empty
    result.headers.filter(_.lowercaseName() == "x-forwarded-host") shouldNot be(empty)
  }

  "#send" should "connect using http client" in {
    val httpResponse = HttpResponse()
    val httpClient = mock[HttpClient]
    (httpClient.connectionHttps _).expects(*, *, *).returning(Flow[HttpRequest].map(_ => httpResponse))

    val github = new GitHubImpl("127.0.0.1", 8000, 5, httpClient)
    val result = Source.single(HttpRequest(HttpMethods.GET, "/"))
      .via(github.send)
      .runWith(TestSink.probe[HttpResponse])
      .request(1)
      .expectNext()

    result shouldBe httpResponse
  }
}

Source File: SqsClientSpec.scala From akka-stream-sqs with Apache License 2.0

5 votes

package me.snov.akka.sqs.client

import com.amazonaws.ClientConfiguration
import com.amazonaws.auth.AWSCredentialsProvider
import com.amazonaws.services.sqs.{AmazonSQS, AmazonSQSAsync}
import com.amazonaws.services.sqs.model.{ReceiveMessageRequest, ReceiveMessageResult}
import org.scalatest.mockito.MockitoSugar.mock
import org.scalatest.{FlatSpec, Matchers}
import org.mockito.Mockito._
import org.mockito.ArgumentMatchers._

class SqsClientSpec extends FlatSpec with Matchers {

  it should "call AWS client" in {

    val awsClient = mock[AmazonSQSAsync]

    val sqsClientSettings = SqsSettings(
      awsCredentialsProvider = Some(mock[AWSCredentialsProvider]),
      awsClientConfiguration = Some(mock[ClientConfiguration]),
      awsClient = Some(awsClient),
      queueUrl = ""
    )
    val sqsClient = SqsClient(sqsClientSettings)
    val receiveMessageResult = mock[ReceiveMessageResult]

    when(awsClient.receiveMessage(any[ReceiveMessageRequest])).thenReturn(receiveMessageResult)

    sqsClient.receiveMessage()

    verify(receiveMessageResult).getMessages
  }

  it should "pass parameters with ReceiveMessageRequest" in {

    val awsClient = mock[AmazonSQSAsync]

    val sqsClientSettings = SqsSettings(
      queueUrl = "",
      maxNumberOfMessages = 9,
      waitTimeSeconds = 7,
      awsCredentialsProvider = Some(mock[AWSCredentialsProvider]),
      awsClientConfiguration = Some(mock[ClientConfiguration]),
      awsClient = Some(awsClient),
      visibilityTimeout = Some(75)
    )
    val sqsClient = SqsClient(sqsClientSettings)
    val receiveMessageResult = mock[ReceiveMessageResult]

    val receiveMessageRequest = new ReceiveMessageRequest()
        .withQueueUrl("")
        .withMaxNumberOfMessages(9)
        .withVisibilityTimeout(75)
        .withWaitTimeSeconds(7)

    when(awsClient.receiveMessage(receiveMessageRequest)).thenReturn(receiveMessageResult)

    sqsClient.receiveMessage()

    verify(receiveMessageResult).getMessages
  }
}

Source File: SqsClientSettingsSpec.scala From akka-stream-sqs with Apache License 2.0

5 votes

package me.snov.akka.sqs.client

import com.amazonaws.ClientConfiguration
import com.amazonaws.auth.AWSCredentialsProvider
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration
import com.typesafe.config.ConfigFactory
import org.scalatest.{FlatSpec, Matchers}
import org.scalatest.mockito.MockitoSugar._

class SqsClientSettingsSpec extends FlatSpec with Matchers {

  it should "parse configuration" in {
    val conf = ConfigFactory.parseString(
      """
        reactive-sqs {
          endpoint = "http://localhost:9324/"
          region = "eu-west-1"
          queue-url = "http://localhost:9324/queue/queue1"
          max-number-of-messages = 10
          visibility-timeout = 60
          wait-time-seconds = 5
        }
      """)
      .getConfig("reactive-sqs")

    val settings = SqsSettings(
      conf,
      Some(mock[AWSCredentialsProvider]),
      Some(mock[ClientConfiguration])
    )

    settings.endpoint.get.getServiceEndpoint shouldBe "http://localhost:9324/"
    settings.endpoint.get.getSigningRegion shouldBe "eu-west-1"
    settings.queueUrl shouldBe "http://localhost:9324/queue/queue1"
    settings.maxNumberOfMessages shouldBe 10
    settings.visibilityTimeout shouldBe Some(60)
    settings.waitTimeSeconds shouldBe 5
  }

  it should "support optional parameters" in {
    val conf = ConfigFactory.parseString(
      """
        reactive-sqs {
          queue-url = "http://localhost:9324/queue/queue1"
          wait-time-seconds = 5
        }
      """)
      .getConfig("reactive-sqs")

    val settings = SqsSettings(
      conf,
      Some(mock[AWSCredentialsProvider]),
      Some(mock[ClientConfiguration])
    )

    settings.endpoint shouldBe None
    settings.queueUrl shouldBe "http://localhost:9324/queue/queue1"
    settings.maxNumberOfMessages shouldBe 10
    settings.visibilityTimeout shouldBe None
    settings.waitTimeSeconds shouldBe 5
  }
}

Source File: SqsAckSinkShapeSpec.scala From akka-stream-sqs with Apache License 2.0

5 votes

package me.snov.akka.sqs.shape

import akka.Done
import akka.stream.scaladsl.{Keep, Sink}
import akka.stream.testkit.scaladsl.TestSource
import com.amazonaws.handlers.AsyncHandler
import com.amazonaws.services.sqs.model._
import me.snov.akka.sqs._
import me.snov.akka.sqs.client.SqsClient
import org.mockito.Mockito._
import org.mockito.ArgumentMatchers._
import org.mockito.invocation.InvocationOnMock
import org.mockito.stubbing.Answer
import org.scalatest.mockito.MockitoSugar.mock
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.Await
import scala.concurrent.duration._

class SqsAckSinkShapeSpec extends FlatSpec with Matchers with DefaultTestContext {
  it should "delete messages on Ack" in {

    val sqsClient = mock[SqsClient]
    when(sqsClient.deleteAsync(any(), any())).thenAnswer(
      new Answer[Object] {
        override def answer(invocation: InvocationOnMock): Object = {
          val receiptHandle = invocation.getArgument[String](0)
          val callback = invocation.getArgument[AsyncHandler[DeleteMessageRequest, DeleteMessageResult]](1)
          callback.onSuccess(
            new DeleteMessageRequest().withReceiptHandle(receiptHandle),
            new DeleteMessageResult
          )
          None
        }
      }
    )

    val (probe, future) = TestSource.probe[MessageActionPair]
      .toMat(Sink.fromGraph(SqsAckSinkShape(sqsClient)))(Keep.both)
      .run()

    probe
      .sendNext((new Message().withReceiptHandle("123"), Ack()))
      .sendComplete()

    Await.result(future, 1.second) shouldBe Done
    verify(sqsClient, times(1)).deleteAsync(any(), any())
  }

  it should "requeue messages on RequeueWithDelay" in {

    val sqsClient = mock[SqsClient]
    when(sqsClient.sendWithDelayAsync(any[String], any[Int], any())).thenAnswer(
      new Answer[Object] {
        override def answer(invocation: InvocationOnMock): Object = {
          val body = invocation.getArgument[String](0)
          val delay = invocation.getArgument[Int](1)
          val callback = invocation.getArgument[AsyncHandler[SendMessageRequest, SendMessageResult]](2)
          callback.onSuccess(
            new SendMessageRequest().withMessageBody(body).withDelaySeconds(delay),
            new SendMessageResult().withMessageId("12345")
          )
          None
        }
      }
    )

    val (probe, future) = TestSource.probe[MessageActionPair]
      .toMat(Sink.fromGraph(SqsAckSinkShape(sqsClient)))(Keep.both)
      .run()

    probe
      .sendNext((new Message().withBody("foo"), RequeueWithDelay(9)))
      .sendComplete()

    Await.result(future, 100.second) shouldBe Done
    verify(sqsClient, times(1)).sendWithDelayAsync(any(), any(), any())
  }
}

Source File: SearchInputWithRulesSpec.scala From smui with Apache License 2.0

5 votes

package models

import java.sql.Connection

import models.rules.{SynonymRule, SynonymRuleId}
import org.scalatest.{FlatSpec, Matchers}
import utils.WithInMemoryDB

class SearchInputWithRulesSpec extends FlatSpec with Matchers with WithInMemoryDB with TestData {

  private val tag = InputTag.create(None, Some("tenant"), "MO", exported = true)

  "SearchInputWithRules" should "load lists with hundreds of entries successfully" in {
    db.withConnection { implicit conn =>
      SolrIndex.insert(indexDe)
      SolrIndex.insert(indexEn)
      InputTag.insert(tag)

      insertInputs(300, indexDe.id, "term_de")
      insertInputs(200, indexEn.id, "term_en")

      val inputsDe = SearchInputWithRules.loadWithUndirectedSynonymsAndTagsForSolrIndexId(indexDe.id)
      inputsDe.size shouldBe 300
      for (input <- inputsDe) {
        input.term should startWith("term_de_")
        input.tags.size shouldBe 1
        input.tags.head.displayValue shouldBe "tenant:MO"
        input.synonymRules.size shouldBe 1 // Only undirected synonyms should be loaded
        input.synonymRules.head.term should startWith("term_de_synonym_")
      }

      SearchInputWithRules.loadWithUndirectedSynonymsAndTagsForSolrIndexId(indexEn.id).size shouldBe 200
    }
  }

  private def insertInputs(count: Int, indexId: SolrIndexId, termPrefix: String)(implicit conn: Connection): Unit = {
    for (i <- 0 until count) {
      val input = SearchInput.insert(indexId, s"${termPrefix}_$i")
      SynonymRule.updateForSearchInput(input.id, Seq(
        SynonymRule(SynonymRuleId(), SynonymRule.TYPE_UNDIRECTED, s"${termPrefix}_synonym_$i", isActive = true),
        SynonymRule(SynonymRuleId(), SynonymRule.TYPE_DIRECTED, s"${termPrefix}_directedsyn_$i", isActive = true),
      ))
      TagInputAssociation.updateTagsForSearchInput(input.id, Seq(tag.id))
    }
  }

  "SearchInputWithRules" should "be (de)activatable" in {
    db.withConnection { implicit conn =>
      SolrIndex.insert(indexDe)

      val input = SearchInput.insert(indexDe.id, "my input")
      input.isActive shouldBe true

      SearchInput.update(input.id, input.term, false, input.comment)
      SearchInput.loadById(input.id).get.isActive shouldBe false

      SearchInput.update(input.id, input.term, true, input.comment)
      SearchInput.loadById(input.id).get.isActive shouldBe true
    }
  }

  "SearchInputWithRules" should "have a modifiable comment" in {
    db.withConnection { implicit conn =>
      SolrIndex.insert(indexDe)

      val input = SearchInput.insert(indexDe.id, "my input")
      input.comment shouldBe ""

      SearchInput.update(input.id, input.term, input.isActive, "My #magic comment.")
      SearchInput.loadById(input.id).get.comment shouldBe "My #magic comment."

      SearchInput.update(input.id, input.term, input.isActive, "My #magic comment - updated.")
      SearchInput.loadById(input.id).get.comment shouldBe "My #magic comment - updated."
    }
  }

}

Source File: RulesTxtDeploymentServiceSpec.scala From smui with Apache License 2.0

5 votes

package models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils
import org.scalatest.{FlatSpec, Matchers}

class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase {

  private lazy val service = injector.instanceOf[RulesTxtDeploymentService]
  private var inputIds: Seq[SearchInputId] = Seq.empty

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    createTestCores()
    inputIds = createTestRule()
  }

  private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith =>
                           |	SYNONYM: mercury
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |mercury =>
                           |	SYNONYM: aerosmith
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |shipping =>
                           |	DECORATE: REDIRECT http://xyz.com/shipping
                           |	@{
                           |	  "_log" : "${ruleIds.last}"
                           |	}@""".stripMargin

  "RulesTxtDeploymentService" should "generate rules files with correct file names" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    rulesTxt.solrIndexId shouldBe core1Id
    rulesTxt.decompoundRules shouldBe empty
    rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds)

    rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp"
    rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt"
  }

  it should "validate the rules files correctly" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty

    val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file"))
    service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction")
  }

  it should "provide a zip file with all rules files" in {
    val out = new ByteArrayOutputStream()
    service.writeAllRulesTxtFilesAsZipFileToStream(out)

    val bytes = out.toByteArray
    val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes))
    val firstEntry = zipStream.getNextEntry
    firstEntry.getName shouldBe "rules_core1.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds)
    val secondEntry = zipStream.getNextEntry
    secondEntry.getName shouldBe "rules_core2.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe ""
  }

}

Source File: DBCompatibilitySpec.scala From smui with Apache License 2.0

5 votes

package models

import java.time.LocalDateTime

import models.rules._
import org.scalatest.{FlatSpec, Matchers}
import play.api.db.Database

abstract class DBCompatibilitySpec extends FlatSpec with Matchers with TestData {

  protected def db: Database

  // Set millis/nanos of second to 0 since MySQL does not save them
  // and so comparisons would fail if they were set
  private val now = LocalDateTime.now().withNano(0)

  "Most important DB queries" should "work using this database" in {
    db.withConnection { implicit conn =>
      SolrIndex.insert(indexDe)
      SolrIndex.loadNameById(indexDe.id) shouldBe indexDe.name
      SolrIndex.listAll shouldBe Seq(indexDe)

      val tag = InputTag(InputTagId(), Some(indexDe.id), Some("testProperty"), "testValue",
        exported = true, predefined = false, now)
      InputTag.insert(tag)
      InputTag.loadAll() shouldBe Seq(tag)

      val input = SearchInput.insert(indexDe.id, "test")
      val inputWithRules = SearchInputWithRules(input.id, input.term,
        List(SynonymRule(SynonymRuleId(), SynonymRule.TYPE_UNDIRECTED, "testSynonym", isActive = true)),
        List(UpDownRule(UpDownRuleId(), UpDownRule.TYPE_UP, 5, "upDownTerm", isActive = true)),
        List(FilterRule(FilterRuleId(), "filterTerm", isActive = true)),
        List(DeleteRule(DeleteRuleId(), "deleteTerm", isActive = true)),
        List(RedirectRule(RedirectRuleId(), "/testTarget", isActive = true)),
        List(tag),
        true,
        "Some search input comment."
      )
      SearchInputWithRules.update(inputWithRules)
      SearchInputWithRules.loadById(input.id) shouldBe Some(inputWithRules)

      SearchInputWithRules.loadWithUndirectedSynonymsAndTagsForSolrIndexId(indexDe.id) shouldBe Seq(
        inputWithRules.copy(upDownRules = Nil, filterRules = Nil, deleteRules = Nil, redirectRules = Nil)
      )

      SearchInputWithRules.delete(input.id)
      SearchInputWithRules.loadById(input.id) shouldBe None

      val field1 = SuggestedSolrField.insert(indexDe.id, "title")
      val field2 = SuggestedSolrField.insert(indexDe.id, "description")
      SuggestedSolrField.listAll(indexDe.id).toSet shouldBe Set(field1, field2)

      InputTag.deleteByIds(Seq(tag.id))
      InputTag.loadAll() shouldBe Nil
    }
  }

}

Source File: DistancesSpec.scala From FunctionalProgrammingExercises with MIT License

5 votes

package com.cutajarjames.exercise1

import org.scalatest.{FlatSpec, Matchers}

class DistancesSpec extends FlatSpec with Matchers {


  it should "output an empty list if empty input" in {
    (new Distances).allDistancesFurther(Array(), 10) shouldEqual Seq()
  }

  it should "output an empty list if only one input" in {
    (new Distances).allDistancesFurther(Array((1,2)), 10) shouldEqual Seq()
  }

  it should "output only one distance for two points" in {
    (new Distances).allDistancesFurther(Array((0,0), (30,40)), 10).map(math.round(_).toInt) shouldEqual Seq(50)
  }

  it should "output only an empty list for two points if min distance is smaller" in {
    (new Distances).allDistancesFurther(Array((0,0), (30,40)), 100) shouldEqual Seq()
  }

  it should "output 10 distance points for 5 points" in {
    val inputs = Array((0,0), (30,40), (25,3), (78,22), (97,12))
    (new Distances).allDistancesFurther(inputs, -1).map(math.round(_).toInt) shouldEqual Seq(50, 25, 81, 98, 37, 51, 73, 56, 73, 21)
  }

  it should "output 9 distance points for 5 points if one gets filtered" in {
    val inputs = Array((0,0), (30,40), (25,34), (78,22), (97,12))
    (new Distances).allDistancesFurther(inputs, 10).map(math.round(_).toInt) shouldEqual Seq(50, 42, 81, 98, 51, 73, 54, 75, 21)
  }

  it should "output empty list for 5 points if all gets filtered" in {
    val inputs = Array((0,0), (30,40), (25,34), (78,22), (97,12))
    (new Distances).allDistancesFurther(inputs, 100).map(math.round(_).toInt) shouldEqual Seq()
  }

}

Source File: SimpleSolverSpec.scala From FunctionalProgrammingExercises with MIT License

5 votes

package com.cutajarjames.exercise6

import org.scalatest.{FlatSpec, Matchers}

class SimpleSolverSpec extends FlatSpec with Matchers {
  it should "be able to do 3 plus 2" in {
    (new SimpleSolver).solve(Addition(3,2)) shouldEqual Some(5.0)
  }

  it should "be able to do 3 times 2" in {
    (new SimpleSolver).solve(Multiplication(3,2)) shouldEqual Some(6.0)
  }

  it should "be able to do 3 minus 2" in {
    (new SimpleSolver).solve(Subtraction(3,2)) shouldEqual Some(1.0)
  }

  it should "be able to do 3 div 2" in {
    (new SimpleSolver).solve(Division(3,2)) shouldEqual Some(1.5)
  }

  it should "be able to do 3 div 0 by return None" in {
    (new SimpleSolver).solve(Division(3,0)) shouldEqual None
  }
}

Source File: FindingHomeSpec.scala From FunctionalProgrammingExercises with MIT License

5 votes

package com.cutajarjames.exercise4

import org.scalatest.{FlatSpec, Matchers}

class FindingHomeSpec extends FlatSpec with Matchers {
  it should "output the user.home" in {
    System.setProperty("user.home", "/Users/cutajar")
    System.setProperty("doc.home", "/documents")
    System.setProperty("appdata.home", "/appdata")
    (new FindingHome).findAHome() shouldEqual Some("/Users/cutajar")
  }

  it should "output the doc.home" in {
    System.getProperties.remove("user.home")
    System.setProperty("doc.home", "/documents")
    System.setProperty("appdata.home", "/appdata")
    (new FindingHome).findAHome() shouldEqual Some("/documents")
  }

  it should "output the /appdata" in {
    System.getProperties.remove("user.home")
    System.getProperties.remove("doc.home")
    System.setProperty("appdata.home", "/appdata")
    (new FindingHome).findAHome() shouldEqual Some("/appdata")
  }

  it should "output None if none of the properties are set" in {
    System.getProperties.remove("user.home")
    System.getProperties.remove("doc.home")
    System.getProperties.remove("appdata.home")
    (new FindingHome).findAHome() shouldEqual None
  }

  it should "output None if properties have a whitespace emtpy string" in {
    System.setProperty("user.home", " ")
    System.setProperty("doc.home", "   ")
    System.setProperty("appdata.home", "")
    (new FindingHome).findAHome() shouldEqual None
  }
}

Source File: SecretSantaSpec.scala From FunctionalProgrammingExercises with MIT License

5 votes

package com.cutajarjames.exercise4

import org.scalatest.{FlatSpec, Matchers}

class SecretSantaSpec extends FlatSpec with Matchers {
  it should "output the correct pairing for 3 items" in {
    val pairing = (new SecretSanta).generateGiftPairing(List("James", "Ruth", "Isabel"))
    pairing.keys shouldEqual Set("James", "Ruth", "Isabel")
    pairing.values.size shouldEqual 3
    pairing.values.toSet shouldEqual Set("James", "Ruth", "Isabel")
    pairing.foreach{case (v,k) => v shouldNot be(k)}
  }

  it should "output the correct pairing for 10 items" in {
    val pairing = (new SecretSanta).generateGiftPairing(List("a", "b", "c", "d", "e", "f", "g", "h", "i", "k"))
    pairing.keys shouldEqual Set("a", "b", "c", "d", "e", "f", "g", "h", "i", "k")
    pairing.values.size shouldEqual 10
    pairing.values.toSet shouldEqual Set("a", "b", "c", "d", "e", "f", "g", "h", "i", "k")
    pairing.foreach{case (v,k) => v shouldNot be(k)}
  }

  it should "output self pairing in case of one name" in {
    val pairing = (new SecretSanta).generateGiftPairing(List("James"))
    pairing.size shouldEqual 1
    pairing.get("James") should contain("James")
  }

}

Source File: SwapElementsSpec.scala From FunctionalProgrammingExercises with MIT License

5 votes

package com.cutajarjames.exercise7

import org.scalatest.{FlatSpec, Matchers}

class SwapElementsSpec extends FlatSpec with Matchers {
  it should "swap the first two elements with list of 4 items" in {
    (new SwapElements).swapFirstAndSecond(List(1, 2, 3, 4)) shouldEqual List(2, 1, 3, 4)
  }

  it should "not swap anything if the list is one element" in {
    (new SwapElements).swapFirstAndSecond(List(9)) shouldEqual List(9)
  }

  it should "not swap anything if the list is empty" in {
    (new SwapElements).swapFirstAndSecond(List()) shouldEqual List()
  }

  it should "swap the first two elements with list of 2 items" in {
    (new SwapElements).swapFirstAndSecond(List(5, 9)) shouldEqual List(9, 5)
  }
}

Source File: LazyPrimesSpec.scala From FunctionalProgrammingExercises with MIT License

5 votes

package com.cutajarjames.exercise3

import org.scalatest.{FlatSpec, Matchers}

class LazyPrimesSpec extends FlatSpec with Matchers {

  it should "output the correct sequence of the first 100 primes" in {
    (new LazyPrimes).allPrimes.take(100) shouldEqual Seq(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
      47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
      167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281,
      283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421,
      431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541)
  }

  it should "output the correct sequence of the 1000th prime" in {
    (new LazyPrimes).allPrimes(1000) shouldEqual 7927
  }

}

Source File: HyperLogLog.scala From spark-hyperloglog with Apache License 2.0

5 votes

package com.mozilla.spark.sql.hyperloglog.test

import com.mozilla.spark.sql.hyperloglog.aggregates._
import com.mozilla.spark.sql.hyperloglog.functions._
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions._
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{FlatSpec, Matchers}

class HyperLogLogTest extends FlatSpec with Matchers{
 "Algebird's HyperLogLog" can "be used from Spark" in {
  val sparkConf = new SparkConf().setAppName("HyperLogLog")
  sparkConf.setMaster(sparkConf.get("spark.master", "local[1]"))

  val sc = new SparkContext(sparkConf)
  val sqlContext = new SQLContext(sc)
  import sqlContext.implicits._

  val hllMerge = new HyperLogLogMerge
  sqlContext.udf.register("hll_merge", hllMerge)
  sqlContext.udf.register("hll_create", hllCreate _)
  sqlContext.udf.register("hll_cardinality", hllCardinality _)

  val frame = sc.parallelize(List("a", "b", "c", "c"), 4).toDF("id")
  val count = frame
    .select(expr("hll_create(id, 12) as hll"))
    .groupBy()
    .agg(expr("hll_cardinality(hll_merge(hll)) as count"))
    .collect()
  count(0)(0) should be (3)
 }
}

Source File: GenericFlatSpecSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.test

import org.scalatest.FlatSpec

import org.apache.spark.sql.Dataset


class GenericFlatSpecSuite extends FlatSpec with SharedSparkSession {
  import testImplicits._

  private def ds = Seq((1, 1), (2, 1), (3, 2), (4, 2), (5, 3), (6, 3), (7, 4), (8, 4)).toDS

  "A Simple Dataset" should "have the specified number of elements" in {
    assert(8 === ds.count)
  }
  it should "have the specified number of unique elements" in {
      assert(8 === ds.distinct.count)
  }
  it should "have the specified number of elements in each column" in {
    assert(8 === ds.select("_1").count)
    assert(8 === ds.select("_2").count)
  }
  it should "have the correct number of distinct elements in each column" in {
    assert(8 === ds.select("_1").distinct.count)
    assert(4 === ds.select("_2").distinct.count)
  }
}

Source File: AccessTokenSpec.scala From akka-http-oauth2-client with Apache License 2.0

5 votes

package com.github.dakatsuka.akka.http.oauth2.client

import akka.actor.ActorSystem
import akka.http.scaladsl.model.{ HttpEntity, HttpResponse, StatusCodes }
import akka.http.scaladsl.model.ContentTypes.`application/json`
import akka.stream.{ ActorMaterializer, Materializer }
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{ Millis, Seconds, Span }
import org.scalatest.{ BeforeAndAfterAll, DiagrammedAssertions, FlatSpec }

import scala.concurrent.{ Await, ExecutionContext }
import scala.concurrent.duration.Duration

class AccessTokenSpec extends FlatSpec with DiagrammedAssertions with ScalaFutures with BeforeAndAfterAll {
  implicit val system: ActorSystem        = ActorSystem()
  implicit val ec: ExecutionContext       = system.dispatcher
  implicit val materializer: Materializer = ActorMaterializer()
  implicit val defaultPatience: PatienceConfig =
    PatienceConfig(timeout = Span(5, Seconds), interval = Span(700, Millis))

  override def afterAll(): Unit = {
    Await.ready(system.terminate(), Duration.Inf)
  }

  behavior of "AccessToken"

  it should "apply from HttpResponse" in {
    val accessToken  = "xxx"
    val tokenType    = "bearer"
    val expiresIn    = 86400
    val refreshToken = "yyy"

    val httpResponse = HttpResponse(
      status = StatusCodes.OK,
      headers = Nil,
      entity = HttpEntity(
        `application/json`,
        s"""
           |{
           |  "access_token": "$accessToken",
           |  "token_type": "$tokenType",
           |  "expires_in": $expiresIn,
           |  "refresh_token": "$refreshToken"
           |}
         """.stripMargin
      )
    )

    val result = AccessToken(httpResponse)

    whenReady(result) { token =>
      assert(token.accessToken == accessToken)
      assert(token.tokenType == tokenType)
      assert(token.expiresIn == expiresIn)
      assert(token.refreshToken.contains(refreshToken))
    }
  }
}

Source File: VectorClockOpsTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.vectorclocks

import org.scalatest.{FlatSpec, Matchers}

class VectorClockOpsTest extends FlatSpec with Matchers {

  behavior of "Vector Clock Ops"

  it should "create Vector Clock instance from plain string" in {
    "A:2".toVectorClock[String]           shouldBe VectorClock(Map("A" -> Counter(2)))
    "A:1, B:2".toVectorClock[String]      shouldBe VectorClock(Map("A" -> Counter(1), "B" -> Counter(2)))
    "A:1, B:1, C:1".toVectorClock[String] shouldBe VectorClock(Map("A" -> Counter(1), "B" -> Counter(1), "C" -> Counter(1)))
  }

  it should "create Vector Clock instance from plain string with numerical ids" in {
    import VectorClockOps.intAsId

    ("1:2": VectorClock[Int])       shouldBe VectorClock(Map(1 -> Counter(2)))
    ("1:2, 2:10": VectorClock[Int]) shouldBe VectorClock(Map(1 -> Counter(2), 2 -> Counter(10)))
  }
}

Source File: DataTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db

import java.util.UUID

import justin.db.consistenthashing.NodeId
import justin.db.replica.PreferenceList
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class DataTest extends FlatSpec with Matchers {

  behavior of "Data"

  it should "update its empty inner Vector Clock based on preference list" in {
    // given
    val data           = Data(id = UUID.randomUUID(), value = "some value")
    val preferenceList = PreferenceList(NodeId(1), List(NodeId(5), NodeId(8)))

    // when
    val updatedData = Data.updateVclock(data, preferenceList)

    // then
    val expectedVclock = VectorClock[NodeId](Map(
      NodeId(1) -> Counter(1),
      NodeId(5) -> Counter(1),
      NodeId(8) -> Counter(1))
    )
    updatedData shouldBe Data(data.id, data.value, expectedVclock, updatedData.timestamp)
  }

  it should "increase vector clock's counter of repeated nodeId when updating data" in {
    // given
    val data           = Data(id = UUID.randomUUID(), value = "some value")
    val preferenceList = PreferenceList(NodeId(1), List(NodeId(1), NodeId(1)))

    // when
    val updatedData = Data.updateVclock(data, preferenceList)

    // then
    val expectedVclock = VectorClock[NodeId](Map(
      NodeId(1) -> Counter(3)
    ))
    updatedData shouldBe Data(data.id, data.value, expectedVclock, data.timestamp)
  }

  it should "increase already existed vector clock's counter when updating data" in {
    // given
    val initVClock     = VectorClock[NodeId](Map(NodeId(1) -> Counter(3)))
    val data = Data(id = UUID.randomUUID(), value = "some value", initVClock)
    val preferenceList = PreferenceList(NodeId(1), List(NodeId(5), NodeId(8)))

    // when
    val updatedData = Data.updateVclock(data, preferenceList)

    // then
    val expectedVclock = VectorClock[NodeId](Map(
      NodeId(1) -> Counter(4),
      NodeId(5) -> Counter(1),
      NodeId(8) -> Counter(1))
    )
    updatedData shouldBe Data(data.id, data.value, expectedVclock, data.timestamp)
  }
}

Source File: RegisterNodeSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.RegisterNode
import justin.db.consistenthashing.NodeId
import org.scalatest.{FlatSpec, Matchers}

class RegisterNodeSerializerTest extends FlatSpec with Matchers {

  behavior of "RegisterNode Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[RegisterNode], RegisterNodeSerializer)

    // object
    val serializedData = RegisterNode(NodeId(1))

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[RegisterNode])

    serializedData shouldBe deserializedData
  }
}

Source File: SerializerInitTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import com.esotericsoftware.kryo.Kryo
import org.scalatest.{FlatSpec, Matchers}

class SerializerInitTest extends FlatSpec with Matchers {

  behavior of "SerializerInit"

  it should "init Kryo serializer" in {
    val kryo = new Kryo()
    val serializerInit = new SerializerInit()
    serializerInit.customize(kryo)

    // cluster
    val classId_50 = 50
    kryo.getRegistration(classId_50).getId          shouldBe 50
    kryo.getRegistration(classId_50).getSerializer  shouldBe RegisterNodeSerializer
    kryo.getRegistration(classId_50).getType        shouldBe classOf[justin.db.actors.protocol.RegisterNode]


    // write -- request
    val classId_60 = 60
    kryo.getRegistration(classId_60).getId          shouldBe 60
    kryo.getRegistration(classId_60).getSerializer  shouldBe StorageNodeWriteDataLocalSerializer
    kryo.getRegistration(classId_60).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeWriteDataLocal]

    // write -- responses
    val classId_70 = 70
    kryo.getRegistration(classId_70).getId          shouldBe 70
    kryo.getRegistration(classId_70).getSerializer  shouldBe StorageNodeWriteResponseSerializer
    kryo.getRegistration(classId_70).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeFailedWrite]

    val classId_71 = 71
    kryo.getRegistration(classId_71).getId          shouldBe 71
    kryo.getRegistration(classId_71).getSerializer  shouldBe StorageNodeWriteResponseSerializer
    kryo.getRegistration(classId_71).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeSuccessfulWrite]

    val classId_72 = 72
    kryo.getRegistration(classId_72).getId          shouldBe 72
    kryo.getRegistration(classId_72).getSerializer  shouldBe StorageNodeWriteResponseSerializer
    kryo.getRegistration(classId_72).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeConflictedWrite]

    // read - request
    val classId_80 = 80
    kryo.getRegistration(classId_80).getId          shouldBe 80
    kryo.getRegistration(classId_80).getSerializer  shouldBe StorageNodeLocalReadSerializer
    kryo.getRegistration(classId_80).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeLocalRead]

    // read - responses
    val classId_90 = 90
    kryo.getRegistration(classId_90).getId          shouldBe 90
    kryo.getRegistration(classId_90).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_90).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeFoundRead]

    val classId_91 = 91
    kryo.getRegistration(classId_91).getId          shouldBe 91
    kryo.getRegistration(classId_91).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_91).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeConflictedRead]

    val classId_92 = 92
    kryo.getRegistration(classId_92).getId          shouldBe 92
    kryo.getRegistration(classId_92).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_92).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeNotFoundRead]

    val classId_93 = 93
    kryo.getRegistration(classId_93).getId          shouldBe 93
    kryo.getRegistration(classId_93).getSerializer  shouldBe StorageNodeReadResponseSerializer
    kryo.getRegistration(classId_93).getType        shouldBe classOf[justin.db.actors.protocol.StorageNodeFailedRead]
  }
}

Source File: DataSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class DataSerializerTest extends FlatSpec with Matchers {

  behavior of "Data Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[justin.db.Data], DataSerializer)

    // object
    val vClock         = VectorClock[NodeId](Map(NodeId(1) -> Counter(3)))
    val timestamp      = System.currentTimeMillis()
    val serializedData = Data(id = UUID.randomUUID(), value = "some value", vClock, timestamp)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[Data])

    serializedData shouldBe deserializedData
  }
}

Source File: StorageNodeWriteDataLocalSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.Data
import justin.db.actors.protocol.StorageNodeWriteDataLocal
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeWriteDataLocalSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeWriteDataLocal Serializer"

  it should "serialize/deserialize StorageNodeWriteDataLocal" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer)

    // object
    val data = Data(
      id        = UUID.randomUUID(),
      value     = "some value",
      vclock    = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))),
      timestamp = System.currentTimeMillis()
    )
    val serializedData = StorageNodeWriteDataLocal(data)

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeWriteDataLocal])

    serializedData shouldBe deserializedData
  }
}

Source File: StorageNodeLocalReadSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.kryo

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.actors.protocol.StorageNodeLocalRead
import org.scalatest.{FlatSpec, Matchers}

class StorageNodeLocalReadSerializerTest extends FlatSpec with Matchers {

  behavior of "StorageNodeLocalReader Serializer"

  it should "serialize/deserialize correctly" in {
    // kryo init
    val kryo = new Kryo()
    kryo.register(classOf[StorageNodeLocalRead], StorageNodeLocalReadSerializer)

    // object
    val serializedData = StorageNodeLocalRead(UUID.randomUUID())

    // serialization
    val bos    = new ByteArrayOutputStream()
    val output = new Output(bos)
    val _      = kryo.writeObject(output, serializedData)
    output.flush()

    // deserialization
    val bis              = new ByteArrayInputStream(bos.toByteArray)
    val input            = new Input(bis)
    val deserializedData = kryo.readObject(input, classOf[StorageNodeLocalRead])

    serializedData shouldBe deserializedData
  }
}

Source File: IsPrimaryOrReplicaTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.replica

import java.util.UUID

import justin.db.consistenthashing.{NodeId, Ring}
import justin.db.storage.PluggableStorageProtocol.DataOriginality
import org.scalatest.{FlatSpec, Matchers}

class IsPrimaryOrReplicaTest extends FlatSpec with Matchers {

  behavior of "Data Originality Resolver"

  it should "reason exemplary data's id as a replica" in {
    // given
    val nodeId   = NodeId(0)
    val ring     = Ring.apply(nodesSize = 3, partitionsSize = 21)
    val resolver = new IsPrimaryOrReplica(nodeId, ring)
    val id       = UUID.fromString("179d6eb0-681d-4277-9caf-3d6d60e9faf9")

    // when
    val originality = resolver.apply(id)

    // then
    originality shouldBe a[DataOriginality.Replica]
  }

  it should "reason exemplary data's id as a primary" in {
    // given
    val nodeId   = NodeId(0)
    val ring     = Ring.apply(nodesSize = 3, partitionsSize = 21)
    val resolver = new IsPrimaryOrReplica(nodeId, ring)
    val id       = UUID.fromString("16ec44cd-5b4e-4b38-a647-206c1dc11b50")

    // when
    val originality = resolver.apply(id)

    // then
    originality shouldBe a[DataOriginality.Primary]
  }
}

Source File: ReplicaLocalWriterTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.replica.write

import java.util.UUID

import justin.db.Data
import justin.db.actors.protocol.{StorageNodeConflictedWrite, StorageNodeFailedWrite, StorageNodeSuccessfulWrite}
import justin.db.consistenthashing.NodeId
import justin.db.storage.PluggableStorageProtocol.{Ack, DataOriginality, StorageGetData}
import justin.db.storage.{GetStorageProtocol, JustinData, PutStorageProtocol}
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.concurrent.duration._

class ReplicaLocalWriterTest extends FlatSpec with Matchers with ScalaFutures {

  behavior of "Replica Local Writer"

  override implicit def patienceConfig: PatienceConfig = PatienceConfig(10.seconds, 50.millis)

  
  it should "fail to write predecessor to already stored data" in {
    // given
    val id      = UUID.randomUUID()
    val data    = Data(id, "some-value", VectorClock(Map(NodeId(1) -> Counter(2))))
    val newData = Data(id, "some-value-2", VectorClock(Map(NodeId(1) -> Counter(1))))
    val writer = new ReplicaLocalWriter(new GetStorageProtocol with PutStorageProtocol {
      override def get(id: UUID)(resolveOriginality: (UUID) => DataOriginality): Future[StorageGetData] = Future.successful(StorageGetData.Single(data))
      override def put(data: JustinData)(resolveOriginality: (UUID) => DataOriginality): Future[Ack] = ???
    })

    // when
    val result = writer.apply(newData, null)

    // then
    whenReady(result) { _ shouldBe StorageNodeFailedWrite(id) }
  }

  it should "get conflicted write when trying to save new data with conflicted vector clock comparing to already existed one" in {
    // given
    val id      = UUID.randomUUID()
    val data    = Data(id, "some-value", VectorClock(Map(NodeId(1) -> Counter(1))))
    val newData = Data(id, "some-value-2", VectorClock(Map(NodeId(2) -> Counter(1))))
    val writer = new ReplicaLocalWriter(new GetStorageProtocol with PutStorageProtocol {
      override def get(id: UUID)(resolveOriginality: (UUID) => DataOriginality): Future[StorageGetData] = Future.successful(StorageGetData.Single(data))
      override def put(data: JustinData)(resolveOriginality: (UUID) => DataOriginality): Future[Ack] = Ack.future
    })

    // when
    val result = writer.apply(newData, null)

    // then
    whenReady(result) { _ shouldBe StorageNodeConflictedWrite(data, newData) }
  }

  it should "get successful write when trying to save new data with consequent vector clock comparing to already existed one" in {
    // given
    val id      = UUID.randomUUID()
    val data    = Data(id, "some-value", VectorClock(Map(NodeId(1) -> Counter(1))))
    val newData = Data(id, "some-value-2", VectorClock(Map(NodeId(1) -> Counter(2))))
    val writer = new ReplicaLocalWriter(new GetStorageProtocol with PutStorageProtocol {
      override def get(id: UUID)(resolveOriginality: (UUID) => DataOriginality): Future[StorageGetData] = Future.successful(StorageGetData.Single(data))
      override def put(data: JustinData)(resolveOriginality: (UUID) => DataOriginality): Future[Ack] = Ack.future
    })

    // when
    val result = writer.apply(newData, null)

    // then
    whenReady(result) { _ shouldBe StorageNodeSuccessfulWrite(id) }
  }
}

Source File: ReplicaWriteAgreementTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.replica.write

import java.util.UUID

import justin.db.actors.protocol.{StorageNodeFailedWrite, StorageNodeSuccessfulWrite}
import justin.db.replica.W
import org.scalatest.{FlatSpec, Matchers}

class ReplicaWriteAgreementTest extends FlatSpec with Matchers {

  behavior of "Reach Consensus of Replicated Writes"

  it should "agreed on \"SuccessfulWrite\" if number of successful write is not less than client expectations" in {
    // given
    val w = W(2)
    val writes = List(StorageNodeSuccessfulWrite(UUID.randomUUID()), StorageNodeSuccessfulWrite(UUID.randomUUID()), StorageNodeFailedWrite(UUID.randomUUID()))

    // when
    val result = new ReplicaWriteAgreement().reach(w)(writes)

    // then
    result shouldBe WriteAgreement.Ok
  }

  it should "agreed on \"NotEnoughWrites\" if number of successful write is less than client expectations" in {
    // given
    val w = W(3)
    val writes = List(StorageNodeSuccessfulWrite(UUID.randomUUID()), StorageNodeSuccessfulWrite(UUID.randomUUID()), StorageNodeFailedWrite(UUID.randomUUID()))

    // when
    val result = new ReplicaWriteAgreement().reach(w)(writes)

    // then
    result shouldBe WriteAgreement.NotEnoughWrites
  }
}

Source File: ReplicaLocalReaderTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.replica.read

import java.util.UUID

import justin.db.Data
import justin.db.actors.protocol.{StorageNodeFailedRead, StorageNodeFoundRead, StorageNodeNotFoundRead}
import justin.db.consistenthashing.NodeId
import justin.db.storage.GetStorageProtocol
import justin.db.storage.PluggableStorageProtocol.{DataOriginality, StorageGetData}
import justin.db.vectorclocks.VectorClock
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.concurrent.Future

class ReplicaLocalReaderTest extends FlatSpec with Matchers with ScalaFutures {

  behavior of "Replica Local Reader"

  override implicit def patienceConfig: PatienceConfig = PatienceConfig(10.seconds, 50.millis)

  it should "found data for existing key" in {
    // given
    val id   = UUID.randomUUID()
    val data = Data(id, "value", VectorClock[NodeId]().increase(NodeId(1)))
    val service = new ReplicaLocalReader(new GetStorageProtocol {
      override def get(id: UUID)(resolveOriginality: (UUID) => DataOriginality): Future[StorageGetData] = {
        Future.successful(StorageGetData.Single(data))
      }
    })

    // when
    val result = service.apply(id, null)

    // then
    whenReady(result) { _ shouldBe StorageNodeFoundRead(data) }
  }

  it should "not found data for non-existing key" in {
    // given
    val id = UUID.randomUUID()
    val service = new ReplicaLocalReader(new GetStorageProtocol {
      override def get(id: UUID)(resolveOriginality: (UUID) => DataOriginality): Future[StorageGetData] = {
        Future.successful(StorageGetData.None)
      }
    })

    // when
    val result = service.apply(id, null)

    // then
    whenReady(result) { _ shouldBe StorageNodeNotFoundRead(id) }
  }

  it should "recover failure reading" in {
    // given
    val id = UUID.randomUUID()
    val service = new ReplicaLocalReader(new GetStorageProtocol {
      override def get(id: UUID)(resolveOriginality: (UUID) => DataOriginality): Future[StorageGetData] = Future.failed(new Exception)
    })

    // when
    val result = service.apply(id, null)

    // then
    whenReady(result) { _ shouldBe StorageNodeFailedRead(id) }
  }
}

Source File: UnmarshallersTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.httpapi

import java.util.UUID

import org.scalatest.{FlatSpec, Matchers}
import spray.json.{DeserializationException, JsNumber, JsString}

class UnmarshallersTest extends FlatSpec with Matchers {

  behavior of "Unmarshaller"

  it should "encode JSON into UUID" in {
    val uuid = UUID.randomUUID()
    val jsString = JsString(uuid.toString)

    Unmarshallers.UuidFormat.read(jsString) shouldBe uuid
  }

  it should "decode UUID into JSON" in {
    val uuid = UUID.randomUUID()
    val expectedJSON = Unmarshallers.UuidFormat.write(uuid)

    expectedJSON shouldBe JsString(uuid.toString)
  }

  it should "handle not expected format of JSON" in {
    val jsNumber = JsNumber(1)

    intercept[DeserializationException] {
      Unmarshallers.UuidFormat.read(jsNumber)
    }
  }

  it should "handle wrong format of UUID" in {
    val fakeUUID = "1-2-3-4"
    val jsString = JsString(fakeUUID)

    intercept[DeserializationException] {
      Unmarshallers.UuidFormat.read(jsString)
    }
  }
}

Source File: VectorClockHeaderTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.httpapi

import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class VectorClockHeaderTest extends FlatSpec with Matchers {

  behavior of "Vector Clock Header"

  it should "parse string and create Vector Clock instance upon it" in {
    // given
    val encoded = "W1siMSIsMV0sWyIyIiwyXSxbIjMiLDldXQ=="

    // when
    val vClockHeader = VectorClockHeader.parse(encoded).get

    // then
    vClockHeader.vectorClock shouldBe VectorClock(Map(NodeId(1) -> Counter(1), NodeId(2) -> Counter(2), NodeId(3) -> Counter(9)))
  }

  it should "stringify Vector Clock instance" in {
    // given
    val vClock = VectorClock(Map(NodeId(1) -> Counter(1), NodeId(2) -> Counter(2), NodeId(3) -> Counter(9)))

    // when
    val encoded = VectorClockHeader(vClock).value()

    // then
    encoded shouldBe "W1siMSIsMV0sWyIyIiwyXSxbIjMiLDldXQ=="
  }

  it should "throw an Exception for not parsable Vector Clock" in {
    val vClock = null

    intercept[VectorClockHeaderException] {
      val encoded = VectorClockHeader(vClock).value()
    }
  }

  it should "render header in response" in {
    VectorClockHeader(null).renderInResponses() shouldBe true
  }

  it should "render header in request" in {
    VectorClockHeader(null).renderInRequests() shouldBe true
  }
}

Source File: JustinDirectivesTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.httpapi

import akka.http.scaladsl.server.directives._
import akka.http.scaladsl.testkit.ScalatestRouteTest
import justin.db.consistenthashing.NodeId
import justin.db.vectorclocks.{Counter, VectorClock}
import org.scalatest.{FlatSpec, Matchers}

class JustinDirectivesTest extends FlatSpec with Matchers with ScalatestRouteTest
  with RouteDirectives
  with JustinDirectives {

  behavior of "Justin Directives"

  it should "provide empty VectorClock instance when no header is passed" in {
    Get("/") ~> withVectorClockHeader(x => complete(x.vectorClock.toString)) ~> check {
      responseAs[String] shouldBe VectorClockHeader.empty.vectorClock.toString
    }
  }

  it should "provide instance of VectorClock build upon passed header" in {
    val vClock = VectorClock(Map(NodeId(1) -> Counter(1), NodeId(2) -> Counter(2), NodeId(3) -> Counter(9)))
    val header = VectorClockHeader(vClock)

    Get("/").addHeader(header) ~> withVectorClockHeader(x => complete(x.vectorClock.toString)) ~> check {
      responseAs[String] shouldBe vClock.toString
    }
  }
}

Source File: JustinDataSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.{Input, Output}
import justin.db.storage.RocksDBStorage.JustinDataSerializer
import org.scalatest.{FlatSpec, Matchers}

class JustinDataSerializerTest extends FlatSpec with Matchers {

  behavior of "JustinDataSerializer"

  it should "serialize/deserialize JustinData with Kryo" in {
    val kryo = new Kryo()
    val data = JustinData(
      id        = UUID.randomUUID,
      value     = "to jest przykladowa wartość",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )

    // serialize
    val output = new Output(new ByteArrayOutputStream())
    JustinDataSerializer.write(kryo, output, data)
    val dataBytes = output.getBuffer

    // deserialize
    val input = new Input(new ByteArrayInputStream(dataBytes))
    JustinDataSerializer.read(kryo, input, classOf[JustinData]) shouldBe data
  }
}

Source File: RocksDBStorageTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.nio.file.Files
import java.util.UUID

import justin.db.storage.PluggableStorageProtocol.{Ack, DataOriginality, StorageGetData}
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._

class RocksDBStorageTest extends FlatSpec with Matchers  with ScalaFutures {

  behavior of "RocksDBStorage"

  it should "save 3 payloads and read them" in {
    val journal = Files.createTempDirectory("rocksdb")
    val rocksdb = new RocksDBStorage(journal.toFile)
    val data1 = JustinData(
      id        = UUID.randomUUID,
      value     = "1",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )
    val data2 = JustinData(
      id        = UUID.randomUUID,
      value     = "1",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )
    val data3 = JustinData(
      id        = UUID.randomUUID,
      value     = "3",
      vclock    = "vclock-value",
      timestamp = 1234124L
    )
    val dataOriginality = DataOriginality.Primary(ringPartitionId = 1)

    // PUT
    rocksdb.put(data1)(_ => dataOriginality).futureValue shouldBe Ack
    rocksdb.put(data2)(_ => dataOriginality).futureValue shouldBe Ack
    rocksdb.put(data3)(_ => dataOriginality).futureValue shouldBe Ack

    // GET
    rocksdb.get(data3.id)(_ => dataOriginality).futureValue shouldBe StorageGetData.Single(data3)
    rocksdb.get(data2.id)(_ => dataOriginality).futureValue shouldBe StorageGetData.Single(data2)
    rocksdb.get(data1.id)(_ => dataOriginality).futureValue shouldBe StorageGetData.Single(data1)
  }

  override implicit def patienceConfig: PatienceConfig = PatienceConfig(10.seconds, 50.millis)
}

Source File: UUIDSerializerTest.scala From JustinDB with Apache License 2.0

5 votes

package justin.db.storage

import java.io.ByteArrayInputStream
import java.util.UUID

import com.esotericsoftware.kryo.Kryo
import com.esotericsoftware.kryo.io.Input
import justin.db.storage.RocksDBStorage.UUIDSerializer
import org.scalatest.{FlatSpec, Matchers}

class UUIDSerializerTest extends FlatSpec with Matchers {

  behavior of "UUIDSerializer"

  it should "serialize/deserialize UUID with Kryo" in {
    val uuid = UUID.randomUUID()
    val kryo = new Kryo()

    // serialize
    val bytes = RocksDBStorage.uuid2bytes(kryo, uuid)

    // deserialize
    val input = new Input(new ByteArrayInputStream(bytes))
    val id = UUIDSerializer.read(kryo, input, classOf[UUID])

    uuid shouldBe id
  }
}

Source File: AddClassAnnotationEditorTest.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.project.edit.java

import com.atomist.param.SimpleParameterValues
import com.atomist.parse.java.ParsingTargets
import com.atomist.project.edit.{FailedModificationAttempt, SuccessfulModification}
import com.atomist.rug.kind.java.AddClassAnnotationEditor
import org.scalatest.{FlatSpec, Matchers}

class AddClassAnnotationEditorTest extends FlatSpec with Matchers {

  // Adds a Mysterious annotation to any class
  val addFoobarAnnotationEditor = new AddClassAnnotationEditor(
    coit => true,
    annotationPackageName = Some("com.megacorp"),
    annotationName = "Mysterious",
    javaSourcePath = ""
  )

  val args = SimpleParameterValues.Empty

  it should "apply annotations where needed" in {
    val as = ParsingTargets.SpringIoGuidesRestServiceSource
    assert(addFoobarAnnotationEditor.applicability(as).canApply === true)
    addFoobarAnnotationEditor.modify(as, args) match {
      case sma: SuccessfulModification =>
        val javaFiles = sma.result.files.filter(_.name.endsWith(".java"))
        assert(javaFiles.size === 2)
        javaFiles.foreach(f => {
          f.content contains "@Mysterious" shouldBe true
        })
      case f: FailedModificationAttempt => fail
      case _ => fail
    }
  }

  it should "recognize that annotation is applied using FQN without import" is pending
}

Source File: PackageFinderTest.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.project.edit.java

import com.atomist.rug.kind.java.support.{PackageFinder, PackageInfo}
import com.atomist.source.{EmptyArtifactSource, SimpleFileBasedArtifactSource, StringFileArtifact}
import org.scalatest.{FlatSpec, Matchers}

class PackageFinderTest extends FlatSpec with Matchers {

  it should "find default package in no-package source" in {
    val src = "public class Hello {}"
    PackageFinder.findPackage(src) should equal("")
  }

  it should "find non default package on first line" in {
    val src = "package com.foo.bar;\npublic class Hello {}"
    PackageFinder.findPackage(src) should equal("com.foo.bar")
  }

  it should "find non default package after comment" in {
    val src =
      """
        |
        |
        |package com.foo.bar;
        |
        |public class Hello {
        |}
      """.stripMargin
    PackageFinder.findPackage(src) should equal("com.foo.bar")
  }

  it should "find no packages in empty project" in {
    PackageFinder.packages(new EmptyArtifactSource("")) should be(empty)
  }

  it should "find default package in root" in {
    PackageFinder.packages(
      new SimpleFileBasedArtifactSource("", StringFileArtifact("Hello.java", "public class Hello {}"))
    ) should equal(Seq(PackageInfo("", 1)))
  }

  it should "find explicit package in root" in {
    PackageFinder.packages(
      new SimpleFileBasedArtifactSource("", StringFileArtifact("com/foo/bar/Hello.java", "package com.foo.bar;\npublic class Hello {}"))
    ) should equal(Seq(PackageInfo("com.foo.bar", 1)))
  }

  it should "find and count explicit packages in root" in {
    PackageFinder.packages(
      new SimpleFileBasedArtifactSource("",
        Seq(
          StringFileArtifact("com/foo/bar/Hello.java", "package com.foo.bar;\npublic class Hello {}"),
          StringFileArtifact("com/foo/bar/Hello2.java", "package com.foo.bar;\npublic class Hello2 {}")
        ))
    ) should equal(Seq(PackageInfo("com.foo.bar", 2)))
  }

}

Source File: MustacheMergeToolTest.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.project.common.template

import com.atomist.source.file.ClassPathArtifactSource
import com.atomist.source.{EmptyArtifactSource, SimpleFileBasedArtifactSource, StringFileArtifact}
import org.scalatest.{FlatSpec, Matchers}

class MustacheMergeToolTest extends FlatSpec with Matchers {

  import MustacheSamples._

  it should "fail with empty backing ArtifactSource" in {
    val mmt = new MustacheMergeTool(EmptyArtifactSource(""))
    an[IllegalArgumentException] should be thrownBy mmt.mergeToFile(FirstContext, "any.mustache")
  }

  it should "succeed with first template" in {
    val templateName = "first.mustache"
    val mmt = new MustacheMergeTool(new SimpleFileBasedArtifactSource("foo", StringFileArtifact(templateName, First)))
    for (i <- 1 to 3) {
      val r = mmt.mergeToFile(FirstContext, templateName).content
      r should equal(FirstExpected)
    }
  }

  val templateName = "first.mustache"
  val static1 = StringFileArtifact("static1", "test")
  val doubleDynamic = StringFileArtifact("location_was_{{in_ca}}.txt_.mustache", First)
  val straightTemplate = StringFileArtifact(templateName, First)
  val templateAs = new SimpleFileBasedArtifactSource("",
    Seq(
      straightTemplate,
      static1,
      doubleDynamic
    ))
  val cpTemplateAs = ClassPathArtifactSource.toArtifactSource("mustache/test.mustache")

  // This actually tests MergeHelper, not just MergeTool functionality
  it should "process template files" in {
    val mmt = new MustacheMergeTool(templateAs)
    val files = mmt.processTemplateFiles(FirstContext, templateAs.allFiles)
    assert(files.size === 3)
    val expectedPath = "location_was_true.txt"
    // First.mustache
    assert(files.map(f => f.path).toSet === Set(static1.path, expectedPath, "first.mustache"))
    assert(files.find(_.path.equals(expectedPath)).get.content === FirstExpected)
  }

  it should "process classpath template files" in {
    val mmt = new MustacheMergeTool(cpTemplateAs)
    val files = mmt.processTemplateFiles(FirstContext, cpTemplateAs.allFiles)
    assert(files.size === 1)
    val expectedPath = "G'day Chris. You just scored 10000 dollars. But the ATO has hit you with tax so you'll only get 6000.0"
    assert(files.map(f => f.path).toSet === Set("test.mustache"))
    assert(files.head.content === expectedPath)
  }

  it should "process template ArtifactSource" in {
    val mmt = new MustacheMergeTool(templateAs)
    val files = mmt.processTemplateFiles(FirstContext, templateAs).allFiles
    assert(files.size === 3)
    val expectedPath = "location_was_true.txt"
    // First.mustache
    assert(files.map(f => f.path).toSet === Set(static1.path, expectedPath, "first.mustache"))
    assert(files.find(_.path.equals(expectedPath)).get.content === FirstExpected)
  }

  val mt = new MustacheMergeTool(new EmptyArtifactSource(""))

  it should "strip .mustache extension" in {
    val name = "template_.mustache"
    mt.toInPlaceFilePath(name) should equal ("template")
  }

  it should "strip .scaml extension" in {
    val name = "template_.scaml"
    mt.toInPlaceFilePath(name) should equal ("template")
  }
}

Source File: UnaryEstimatorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.base.unary

import com.salesforce.op.UID
import com.salesforce.op.features.Feature
import com.salesforce.op.features.types._
import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.{DoubleType, MetadataBuilder, StructField, StructType}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class UnaryEstimatorTest extends OpEstimatorSpec[Real, UnaryModel[Real, Real], UnaryEstimator[Real, Real]] {

  
  val expectedResult = Seq(0.0, 0.8, 0.4, 0.2, 1.0).map(_.toReal)

}

class MinMaxNormEstimator(uid: String = UID[MinMaxNormEstimator])
  extends UnaryEstimator[Real, Real](operationName = "minMaxNorm", uid = uid) {

  def fitFn(dataset: Dataset[Real#Value]): UnaryModel[Real, Real] = {
    val grouped = dataset.groupBy()
    val maxVal = grouped.max().first().getDouble(0)
    val minVal = grouped.min().first().getDouble(0)
    new MinMaxNormEstimatorModel(min = minVal, max = maxVal, operationName = operationName, uid = uid)
  }
}

final class MinMaxNormEstimatorModel private[op](val min: Double, val max: Double, operationName: String, uid: String)
  extends UnaryModel[Real, Real](operationName = operationName, uid = uid) {
  def transformFn: Real => Real = _.v.map(v => (v - min) / (max - min)).toReal
}

Source File: PredictionTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features.types

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class PredictionTest extends FlatSpec with TestCommon {
  import Prediction.Keys._

  Spec[Prediction] should "extend FeatureType" in {
    Prediction(1.0) shouldBe a[FeatureType]
    Prediction(1.0) shouldBe a[OPMap[_]]
    Prediction(1.0) shouldBe a[NumericMap]
    Prediction(1.0) shouldBe a[RealMap]
  }
  it should "error if prediction is missing" in {
    intercept[NonNullableEmptyException](new Prediction(null))
    intercept[NonNullableEmptyException](new Prediction(Map.empty))
    intercept[NonNullableEmptyException](Map.empty[String, Double].toPrediction)
    intercept[NonNullableEmptyException]((null: Map[String, Double]).toPrediction)
    assertPredictionError(new Prediction(Map("a" -> 1.0)))
    assertPredictionError(Map("a" -> 1.0, "b" -> 2.0).toPrediction)
    assertInvalidKeysError(new Prediction(Map(PredictionName -> 2.0, "a" -> 1.0)))
  }
  it should "compare values correctly" in {
    Prediction(1.0).equals(Prediction(1.0)) shouldBe true
    Prediction(1.0).equals(Prediction(0.0)) shouldBe false
    Prediction(1.0, Array(1.0), Array.empty[Double]).equals(Prediction(1.0)) shouldBe false
    Prediction(1.0, Array(1.0), Array(2.0, 3.0)).equals(Prediction(1.0, Array(1.0), Array(2.0, 3.0))) shouldBe true

    Map(PredictionName -> 5.0).toPrediction shouldBe a[Prediction]
  }
  it should "return prediction" in {
    Prediction(2.0).prediction shouldBe 2.0
  }
  it should "return raw prediction" in {
    Prediction(2.0).rawPrediction shouldBe Array()
    Prediction(1.0, Array(1.0, 2.0), Array.empty[Double]).rawPrediction shouldBe Array(1.0, 2.0)
    Prediction(1.0, (1 until 200).map(_.toDouble).toArray, Array.empty[Double]).rawPrediction shouldBe
      (1 until 200).map(_.toDouble).toArray
  }
  it should "return probability" in {
    Prediction(3.0).probability shouldBe Array()
    Prediction(1.0, Array.empty[Double], Array(1.0, 2.0)).probability shouldBe Array(1.0, 2.0)
    Prediction(1.0, Array.empty[Double], (1 until 200).map(_.toDouble).toArray).probability shouldBe
      (1 until 200).map(_.toDouble).toArray
  }
  it should "return score" in {
    Prediction(4.0).score shouldBe Array(4.0)
    Prediction(1.0, Array(2.0, 3.0), Array.empty[Double]).score shouldBe Array(1.0)
    Prediction(1.0, Array.empty[Double], Array(2.0, 3.0)).score shouldBe Array(2.0, 3.0)
  }
  it should "have a nice .toString method implementation" in {
    Prediction(4.0).toString shouldBe
      "Prediction(prediction = 4.0, rawPrediction = Array(), probability = Array())"
    Prediction(1.0, Array(2.0, 3.0), Array.empty[Double]).toString shouldBe
      "Prediction(prediction = 1.0, rawPrediction = Array(2.0, 3.0), probability = Array())"
    Prediction(1.0, Array.empty[Double], Array(2.0, 3.0)).toString shouldBe
      "Prediction(prediction = 1.0, rawPrediction = Array(), probability = Array(2.0, 3.0))"
  }

  private def assertPredictionError(f: => Unit) =
    intercept[NonNullableEmptyException](f).getMessage shouldBe
      s"Prediction cannot be empty: value map must contain '$PredictionName' key"

  private def assertInvalidKeysError(f: => Unit) =
    intercept[IllegalArgumentException](f).getMessage shouldBe
      s"requirement failed: value map must only contain valid keys: '$PredictionName' or " +
        s"starting with '$RawPredictionName' or '$ProbabilityName'"

}

Source File: JavaConversionTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features.types

import java.util

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class JavaConversionTest extends FlatSpec with TestCommon {

  Spec[JavaConversionTest] should "convert java Map to TextMap" in {
    type T = util.HashMap[String, String]
    null.asInstanceOf[T].toTextMap shouldEqual TextMap(Map())
    val j = new T()
    j.toTextMap shouldEqual TextMap(Map())
    j.put("A", "a")
    j.put("B", null)
    j.toTextMap shouldEqual TextMap(Map("A" -> "a", "B" -> null))
  }

  it should "convert java Map to MultiPickListMap" in {
    type T = util.HashMap[String, java.util.HashSet[String]]
    null.asInstanceOf[T].toMultiPickListMap shouldEqual MultiPickListMap(Map())
    val j = new T()
    j.toMultiPickListMap shouldEqual MultiPickListMap(Map())
    val h = new util.HashSet[String]()
    h.add("X")
    h.add("Y")
    j.put("test", h)
    j.put("test2", null)
    j.toMultiPickListMap shouldEqual MultiPickListMap(Map("test" -> Set("X", "Y"), "test2" -> Set()))
  }

  it should "convert java Map to IntegralMap" in {
    type T = util.HashMap[String, java.lang.Long]
    null.asInstanceOf[T].toIntegralMap shouldEqual IntegralMap(Map())
    val j = new T()
    j.toIntegralMap shouldEqual IntegralMap(Map())
    j.put("test", java.lang.Long.valueOf(17))
    j.put("test2", null)
    j.toIntegralMap.v("test") shouldEqual 17L
    j.toIntegralMap.v("test2") shouldEqual (null: java.lang.Long)
  }

  it should "convert java Map to RealMap" in {
    type T = util.HashMap[String, java.lang.Double]
    null.asInstanceOf[T].toRealMap shouldEqual RealMap(Map())
    val j = new T()
    j.toRealMap shouldEqual RealMap(Map())
    j.put("test", java.lang.Double.valueOf(17.5))
    j.put("test2", null)
    j.toRealMap.v("test") shouldEqual 17.5
    j.toRealMap.v("test2") shouldEqual (null: java.lang.Double)
  }

  it should "convert java Map to BinaryMap" in {
    type T = util.HashMap[String, java.lang.Boolean]
    null.asInstanceOf[T].toBinaryMap shouldEqual RealMap(Map())
    val j = new T()
    j.toBinaryMap shouldEqual RealMap(Map())
    j.put("test1", java.lang.Boolean.TRUE)
    j.put("test0", java.lang.Boolean.FALSE)
    j.put("test2", null)
    j.toBinaryMap.v("test1") shouldEqual true
    j.toBinaryMap.v("test0") shouldEqual false
    j.toBinaryMap.v("test2") shouldEqual (null: java.lang.Boolean)
  }

}

Source File: GeolocationTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features.types

import com.salesforce.op.test.TestCommon
import org.apache.lucene.spatial3d.geom.{GeoPoint, PlanetModel}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class GeolocationTest extends FlatSpec with TestCommon {
  val PaloAlto: (Double, Double) = (37.4419, -122.1430)

  Spec[Geolocation] should "extend OPList[Double]" in {
    val myGeolocation = new Geolocation(List.empty[Double])
    myGeolocation shouldBe a[FeatureType]
    myGeolocation shouldBe a[OPCollection]
    myGeolocation shouldBe a[OPList[_]]
  }

  it should "behave on missing data" in {
    val sut = new Geolocation(List.empty[Double])
    sut.lat.isNaN shouldBe true
    sut.lon.isNaN shouldBe true
    sut.accuracy shouldBe GeolocationAccuracy.Unknown
  }

  it should "not accept missing value" in {
    assertThrows[IllegalArgumentException](new Geolocation(List(PaloAlto._1)))
    assertThrows[IllegalArgumentException](new Geolocation(List(PaloAlto._1, PaloAlto._2)))
    assertThrows[IllegalArgumentException](new Geolocation((PaloAlto._1, PaloAlto._2, 123456.0)))
  }

  it should "compare values correctly" in {
    new Geolocation(List(32.399, 154.213, 6.0)).equals(new Geolocation(List(32.399, 154.213, 6.0))) shouldBe true
    new Geolocation(List(12.031, -23.44, 6.0)).equals(new Geolocation(List(32.399, 154.213, 6.0))) shouldBe false
    FeatureTypeDefaults.Geolocation.equals(new Geolocation(List(32.399, 154.213, 6.0))) shouldBe false
    FeatureTypeDefaults.Geolocation.equals(FeatureTypeDefaults.Geolocation) shouldBe true
    FeatureTypeDefaults.Geolocation.equals(Geolocation(List.empty[Double])) shouldBe true

    (35.123, -94.094, 5.0).toGeolocation shouldBe a[Geolocation]
  }

  it should "correctly generate a Lucene GeoPoint object" in {
    val myGeo = new Geolocation(List(32.399, 154.213, 6.0))
    myGeo.toGeoPoint shouldBe new GeoPoint(PlanetModel.WGS84, math.toRadians(myGeo.lat), math.toRadians(myGeo.lon))
  }

}

Source File: ListTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features.types

import com.salesforce.op.test.TestCommon
import org.apache.lucene.spatial3d.geom.{GeoPoint, PlanetModel}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class ListTest extends FlatSpec with TestCommon {

  
  Spec[DateTimeList] should "extend OPList[Long]" in {
    val myDateTimeList = new DateTimeList(List.empty[Long])
    myDateTimeList shouldBe a[FeatureType]
    myDateTimeList shouldBe a[OPCollection]
    myDateTimeList shouldBe a[OPList[_]]
    myDateTimeList shouldBe a[DateList]
  }
  it should "compare values correctly" in {
    new DateTimeList(List(456L, 13L)) shouldBe new DateTimeList(List(456L, 13L))
    new DateTimeList(List(13L, 456L)) should not be new DateTimeList(List(456L, 13L))
    FeatureTypeDefaults.DateTimeList should not be new DateTimeList(List(456L, 13L))
    FeatureTypeDefaults.DateTimeList shouldBe DateTimeList(List.empty[Long])

    List(12237834L, 4890489839L).toDateTimeList shouldBe a[DateTimeList]
  }


}

Source File: OPVectorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features.types

import com.salesforce.op.test.TestCommon
import com.salesforce.op.utils.spark.RichVector._
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class OPVectorTest extends FlatSpec with TestCommon {

  val vectors = Seq(
    Vectors.sparse(4, Array(0, 3), Array(1.0, 1.0)).toOPVector,
    Vectors.dense(Array(2.0, 3.0, 4.0)).toOPVector,
    // Purposely added a very large sparse vector to verify the efficiency
    Vectors.sparse(100000000, Array(1), Array(777.0)).toOPVector
  )

  Spec[OPVector] should "be empty" in {
    val zero = Vectors.zeros(0)
    new OPVector(zero).isEmpty shouldBe true
    new OPVector(zero).nonEmpty shouldBe false
    zero.toOPVector shouldBe a[OPVector]
  }

  it should "error on size mismatch" in {
    val ones = Array.fill(vectors.size)(Vectors.sparse(1, Array(0), Array(1.0)).toOPVector)
    for {
      (v1, v2) <- vectors.zip(ones)
      res <- Seq(() => v1 + v2, () => v1 - v2, () => v1 dot v2)
    } intercept[IllegalArgumentException](res()).getMessage should {
      startWith("requirement failed: Vectors must") and include("same length")
    }
  }

  it should "compare values" in {
    val zero = Vectors.zeros(0)
    new OPVector(zero) shouldBe new OPVector(zero)
    new OPVector(zero).value shouldBe zero

    Vectors.dense(Array(1.0, 2.0)).toOPVector shouldBe Vectors.dense(Array(1.0, 2.0)).toOPVector
    Vectors.sparse(5, Array(3, 4), Array(1.0, 2.0)).toOPVector shouldBe
      Vectors.sparse(5, Array(3, 4), Array(1.0, 2.0)).toOPVector
    Vectors.dense(Array(1.0, 2.0)).toOPVector should not be Vectors.dense(Array(2.0, 2.0)).toOPVector
    new OPVector(Vectors.dense(Array(1.0, 2.0))) should not be Vectors.dense(Array(2.0, 2.0)).toOPVector
    OPVector.empty shouldBe new OPVector(zero)
  }

  it should "'+' add" in {
    for {(v1, v2) <- vectors.zip(vectors)} {
      (v1 + v2) shouldBe (v1.value + v2.value).toOPVector
    }
  }

  it should "'-' subtract" in {
    for {(v1, v2) <- vectors.zip(vectors)} {
      (v1 - v2) shouldBe (v1.value - v2.value).toOPVector
    }
  }

  it should "compute dot product" in {
    for {(v1, v2) <- vectors.zip(vectors)} {
      (v1 dot v2) shouldBe (v1.value dot v2.value)
    }
  }

  it should "combine" in {
    for {(v1, v2) <- vectors.zip(vectors)} {
      v1.combine(v2) shouldBe v1.value.combine(v2.value).toOPVector
      v1.combine(v2, v2, v1) shouldBe v1.value.combine(v2.value, v2.value, v1.value).toOPVector
    }
  }

}

Source File: FeatureSparkTypeTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features

import com.salesforce.op.features.types.FeatureType
import com.salesforce.op.test.{TestCommon, TestSparkContext}
import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
import org.apache.spark.sql.types._
import org.junit.runner.RunWith
import org.scalatest.{Assertion, FlatSpec}
import org.scalatest.junit.JUnitRunner

import scala.reflect.runtime.universe._

@RunWith(classOf[JUnitRunner])
class FeatureSparkTypeTest extends FlatSpec with TestCommon {
  val primitiveTypes = Seq(
    (DoubleType, weakTypeTag[types.Real], DoubleType),
    (FloatType, weakTypeTag[types.Real], DoubleType),
    (LongType, weakTypeTag[types.Integral], LongType),
    (IntegerType, weakTypeTag[types.Integral], LongType),
    (ShortType, weakTypeTag[types.Integral], LongType),
    (ByteType, weakTypeTag[types.Integral], LongType),
    (DateType, weakTypeTag[types.Date], LongType),
    (TimestampType, weakTypeTag[types.DateTime], LongType),
    (StringType, weakTypeTag[types.Text], StringType),
    (BooleanType, weakTypeTag[types.Binary], BooleanType),
    (VectorType, weakTypeTag[types.OPVector], VectorType)
  )

  val nonNullable = Seq(
    (DoubleType, weakTypeTag[types.RealNN], DoubleType),
    (FloatType, weakTypeTag[types.RealNN], DoubleType)
  )

  private def mapType(v: DataType) = MapType(StringType, v, valueContainsNull = true)
  private def arrType(v: DataType) = ArrayType(v, containsNull = true)

  val collectionTypes = Seq(
    (arrType(LongType), weakTypeTag[types.DateList], arrType(LongType)),
    (arrType(DoubleType), weakTypeTag[types.Geolocation], arrType(DoubleType)),
    (arrType(StringType), weakTypeTag[types.TextList], arrType(StringType)),
    (mapType(StringType), weakTypeTag[types.TextMap], mapType(StringType)),
    (mapType(DoubleType), weakTypeTag[types.RealMap], mapType(DoubleType)),
    (mapType(LongType), weakTypeTag[types.IntegralMap], mapType(LongType)),
    (mapType(BooleanType), weakTypeTag[types.BinaryMap], mapType(BooleanType)),
    (mapType(arrType(StringType)), weakTypeTag[types.MultiPickListMap], mapType(arrType(StringType))),
    (mapType(arrType(DoubleType)), weakTypeTag[types.GeolocationMap], mapType(arrType(DoubleType)))
  )

  Spec(FeatureSparkTypes.getClass) should "assign appropriate feature type tags for valid types and versa" in {
    primitiveTypes.map(scala.Function.tupled(assertTypes()))
  }

  it should "assign appropriate feature type tags for valid non-nullable types and versa" in {
    nonNullable.map(scala.Function.tupled(assertTypes(isNullable = false)))
  }

  it should "assign appropriate feature type tags for collection types and versa" in {
    collectionTypes.map(scala.Function.tupled(assertTypes()))
  }

  it should "error for unsupported types" in {
    val error = intercept[IllegalArgumentException](FeatureSparkTypes.featureTypeTagOf(BinaryType, isNullable = false))
    error.getMessage shouldBe "Spark BinaryType is currently not supported"
  }

  it should "error for unknown types" in {
    val unknownType = NullType
    val error = intercept[IllegalArgumentException](FeatureSparkTypes.featureTypeTagOf(unknownType, isNullable = false))
    error.getMessage shouldBe s"No feature type tag mapping for Spark type $unknownType"
  }

  def assertTypes(
    isNullable: Boolean = true
  )(
    sparkType: DataType,
    featureType: WeakTypeTag[_ <: FeatureType],
    expectedSparkType: DataType
  ): Assertion = {
    FeatureSparkTypes.featureTypeTagOf(sparkType, isNullable) shouldBe featureType
    FeatureSparkTypes.sparkTypeOf(featureType) shouldBe expectedSparkType
  }

}

Source File: RichStructTypeTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.spark

import com.salesforce.op.test.TestSparkContext
import org.apache.spark.sql.functions._
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class RichStructTypeTest extends FlatSpec with TestSparkContext {

  import com.salesforce.op.utils.spark.RichStructType._

  case class Human
  (
    name: String,
    age: Double,
    height: Double,
    heightIsNull: Double,
    isBlueEyed: Double,
    gender: Double,
    testFeatNegCor: Double
  )

  // scalastyle:off
  val humans = Seq(
    Human("alex",     32,  5.0,  0,  1,  1,  0),
    Human("alice",    32,  4.0,  1,  0,  0,  1),
    Human("bob",      32,  6.0,  1,  1,  1,  0),
    Human("charles",  32,  5.5,  0,  1,  1,  0),
    Human("diana",    32,  5.4,  1,  0,  0,  1),
    Human("max",      32,  5.4,  1,  0,  0,  1)
  )
  // scalastyle:on

  val humansDF = spark.createDataFrame(humans).select(col("*"), col("name").as("(name)_blarg_123"))
  val schema = humansDF.schema

  Spec[RichStructType] should "find schema fields by name (case insensitive)" in {
    schema.findFields("name").map(_.name) shouldBe Seq("name", "(name)_blarg_123")
    schema.findFields("blArg").map(_.name) shouldBe Seq("(name)_blarg_123")
  }

  it should "find schema fields by name (case sensitive)" in {
    schema.findFields("Name", ignoreCase = false) shouldBe Seq.empty
    schema.findFields("aGe", ignoreCase = false) shouldBe Seq.empty
    schema.findFields("age", ignoreCase = false).map(_.name) shouldBe Seq("age")
  }

  it should "fail on duplication" in {
    the[IllegalArgumentException] thrownBy schema.findField("a")
  }

  it should "throw an error if no such name" in {
    the[IllegalArgumentException] thrownBy schema.findField("???")
  }

}

Source File: TimeBasedAggregatorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.aggregators

import com.salesforce.op.features.FeatureBuilder
import com.salesforce.op.features.types._
import com.salesforce.op.stages.FeatureGeneratorStage
import com.salesforce.op.test.TestCommon
import org.joda.time.Duration
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class TimeBasedAggregatorTest extends FlatSpec with TestCommon {

  private val data = Seq(TimeBasedTest(100L, 1.0, "a", Map("a" -> "a")),
    TimeBasedTest(200L, 2.0, "b", Map("b" -> "b")),
    TimeBasedTest(300L, 3.0, "c", Map("c" -> "c")),
    TimeBasedTest(400L, 4.0, "d", Map("d" -> "d")),
    TimeBasedTest(500L, 5.0, "e", Map("e" -> "e")),
    TimeBasedTest(600L, 6.0, "f", Map("f" -> "f"))
  )

  private val timeExt = Option((d: TimeBasedTest) => d.time)

  Spec[LastAggregator[_]] should "return the most recent event" in {
    val feature = FeatureBuilder.Real[TimeBasedTest].extract(_.real.toRealNN)
      .aggregate(LastReal).asPredictor
    val aggregator = feature.originStage.asInstanceOf[FeatureGeneratorStage[TimeBasedTest, _]].featureAggregator
    val extracted = aggregator.extract(data, timeExt, CutOffTime.NoCutoff())
    extracted shouldBe Real(Some(6.0))
  }

  it should "return the most recent event within the time window" in {
    val feature = FeatureBuilder.Text[TimeBasedTest].extract(_.string.toText)
      .aggregate(LastText).asResponse
    val aggregator = feature.originStage.asInstanceOf[FeatureGeneratorStage[TimeBasedTest, _]].featureAggregator
    val extracted = aggregator.extract(data, timeExt, CutOffTime.UnixEpoch(300L),
      responseWindow = Option(new Duration(201L)))
    extracted shouldBe Text(Some("e"))
  }

  it should "return the feature type empty value when no events are passed in" in {
    val feature = FeatureBuilder.TextMap[TimeBasedTest].extract(_.map.toTextMap)
      .aggregate(LastTextMap).asPredictor
    val aggregator = feature.originStage.asInstanceOf[FeatureGeneratorStage[TimeBasedTest, _]].featureAggregator
    val extracted = aggregator.extract(Seq(), timeExt, CutOffTime.NoCutoff())
    extracted shouldBe TextMap.empty
  }

  Spec[FirstAggregator[_]] should "return the first event" in {
    val feature = FeatureBuilder.TextAreaMap[TimeBasedTest].extract(_.map.toTextAreaMap)
      .aggregate(FirstTextAreaMap).asResponse
    val aggregator = feature.originStage.asInstanceOf[FeatureGeneratorStage[TimeBasedTest, _]].featureAggregator
    val extracted = aggregator.extract(data, timeExt, CutOffTime.UnixEpoch(301L))
    extracted shouldBe TextAreaMap(Map("d" -> "d"))
  }

  it should "return the first event within the time window" in {
    val feature = FeatureBuilder.Currency[TimeBasedTest].extract(_.real.toCurrency)
      .aggregate(FirstCurrency).asPredictor
    val aggregator = feature.originStage.asInstanceOf[FeatureGeneratorStage[TimeBasedTest, _]].featureAggregator
    val extracted = aggregator.extract(data, timeExt, CutOffTime.UnixEpoch(400L),
      predictorWindow = Option(new Duration(201L)))
    extracted shouldBe Currency(Some(2.0))
  }

  it should "return the feature type empty value when no events are passed in" in {
    val feature = FeatureBuilder.State[TimeBasedTest].extract(_.string.toState)
      .aggregate(FirstState).asPredictor
    val aggregator = feature.originStage.asInstanceOf[FeatureGeneratorStage[TimeBasedTest, _]].featureAggregator
    val extracted = aggregator.extract(Seq(), timeExt, CutOffTime.NoCutoff())
    extracted shouldBe State.empty
  }
}

case class TimeBasedTest(time: Long, real: Double, string: String, map: Map[String, String])

Source File: RichGenericRecordTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.avro

import com.salesforce.op.test.{TestCommon, TestSparkContext}
import com.salesforce.op.utils.io.avro.AvroInOut
import org.apache.avro.generic.GenericRecord
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class RichGenericRecordTest extends FlatSpec with Matchers with TestSparkContext with TestCommon {

  import com.salesforce.op.utils.avro.RichGenericRecord._

  val dataPath = resourceFile(parent = "../test-data", name = s"PassengerData.avro").getPath
  val passengerData = AvroInOut.read[GenericRecord](dataPath).getOrElse(throw new Exception("Couldn't read data"))
  val firstRow = passengerData.sortBy(_.get("passengerId").toString.toInt).first

  Spec[RichGenericRecord] should "get value of Int" in {
    val id = firstRow.getValue[Int]("passengerId")
    id shouldBe Some(1)
  }

  it should "get value of Double" in {
    val survived = firstRow.getValue[Double]("survived")
    survived shouldBe Some(0.0)
  }

  it should "get value of Long" in {
    val height = firstRow.getValue[Long]("height")
    height shouldBe Some(168L)
  }

  it should "get value of String" in {
    val gender = firstRow.getValue[String]("gender")
    gender shouldBe Some("Female")
  }

  it should "get value of Char" in {
    val gender = firstRow.getValue[Char]("gender")
    gender shouldBe Some("Female")
  }

  it should "get value of Float" in {
    val age = firstRow.getValue[Float]("age")
    age shouldBe Some(32.0)
  }

  it should "get value of Short" in {
    val weight = firstRow.getValue[Short]("weight")
    weight shouldBe Some(67)
  }

  it should "throw error for invalid field" in {
    val error = intercept[IllegalArgumentException](firstRow.getValue[Short]("invalidField"))
    error.getMessage shouldBe "requirement failed: invalidField is not found in Avro schema!"
  }
}

Source File: SpecialDoubleSerializerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.json

import com.salesforce.op.test.TestCommon
import org.json4s.jackson.JsonMethods._
import org.json4s.{DefaultFormats, Extraction, Formats}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class SpecialDoubleSerializerTest extends FlatSpec with TestCommon {

  val data = Map(
    "normal" -> Seq(-1.1, 0.0, 2.3),
    "infs" -> Seq(Double.NegativeInfinity, Double.PositiveInfinity),
    "minMax" -> Seq(Double.MinValue, Double.MaxValue),
    "nan" -> Seq(Double.NaN)
  )

  Spec[SpecialDoubleSerializer] should behave like
    readWriteDoubleValues(data)(
      json = """{"normal":[-1.1,0.0,2.3],"infs":["-Infinity","Infinity"],"minMax":[-1.7976931348623157E308,1.7976931348623157E308],"nan":["NaN"]}""" // scalastyle:off
    )(DefaultFormats + new SpecialDoubleSerializer)

  Spec[SpecialDoubleSerializer] + " (with big decimal)" should behave like
    readWriteDoubleValues(data)(
      json = """{"normal":[-1.1,0.0,2.3],"infs":["-Infinity","Infinity"],"minMax":[-1.7976931348623157E+308,1.7976931348623157E+308],"nan":["NaN"]}""" // scalastyle:off
    )(DefaultFormats.withBigDecimal + new SpecialDoubleSerializer)


  def readWriteDoubleValues(input: Map[String, Seq[Double]])(json: String)(implicit formats: Formats): Unit = {
    it should "write double entries" in {
      compact(Extraction.decompose(input)) shouldBe json
    }
    it should "read double entries" in {
      val parsed = parse(json).extract[Map[String, Seq[Double]]]
      parsed.keys shouldBe input.keys
      parsed zip input foreach {
        case (("nan", a), ("nan", b)) => a.foreach(_.isNaN shouldBe true)
        case ((_, a), (_, b)) => a should contain theSameElementsAs b
      }
    }
  }
}

Source File: RichTupleTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.tuples

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import com.salesforce.op.utils.tuples.RichTuple._

@RunWith(classOf[JUnitRunner])
class RichTupleTest extends FlatSpec with TestCommon {
  Spec(RichTuple.getClass) should "map a function to provided elements in tuples" in {
    val res = (Some(1), Some(2)).map((x, y) => x + y)
    res.get shouldBe 3
  }

  it should "map on empty tuples" in {
    val none: (Option[String], Option[String]) = None -> None
    none.map((x, y) => x + y) shouldBe None
  }

  it should "map the function with no effect for left param alone" in {
    val res = (Some(1), None).map((x, y) => x + y)
    res shouldBe Some(1)
  }

  it should "map the function with no effect for right param alone" in {
    val res = (None, Some(1)).map((x, y) => x + y)
    res shouldBe Some(1)
  }
}

Source File: SequenceAggregatorsTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.spark

import com.salesforce.op.test.TestSparkContext
import com.salesforce.op.utils.spark.SequenceAggregators.{SeqMapDouble, SeqMapLong}
import org.apache.spark.sql.Row
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class SequenceAggregatorsTest extends FlatSpec with TestSparkContext {
  import spark.implicits._
  val meanSeqMapDouble = SequenceAggregators.MeanSeqMapDouble(2)
  val modeSeqMapLong = SequenceAggregators.ModeSeqMapLong(2)
  val modeSeqNullInt = SequenceAggregators.ModeSeqNullInt(2)
  implicit val encMeanMapAgg = meanSeqMapDouble.outputEncoder
  implicit val encModeMapAgg = modeSeqMapLong.outputEncoder

  Spec(SequenceAggregators.getClass) should "correctly compute the mean by key of maps" in {
    val data = Seq(
      (Map("a" -> 1.0, "b" -> 5.0), Map("z" -> 10.0)),
      (Map("c" -> 11.0), Map("y" -> 3.0, "x" -> 0.0)),
      (Map.empty[String, Double], Map.empty[String, Double])
    ).toDF("f1", "f2").map(Helper.toSeqMapDouble)

    val res = data.select(meanSeqMapDouble.toColumn).first()
    res shouldBe Seq(Map("a" -> 1.0, "c" -> 11.0, "b" -> 5.0), Map("z" -> 10.0, "y" -> 3.0, "x" -> 0.0))
  }

  it should "correctly compute the mean by key of maps again" in {
    val meanData = Seq(
      (Map("a" -> 1.0, "b" -> 5.0), Map("y" -> 4.0, "x" -> 0.0, "z" -> 10.0)),
      (Map("a" -> -3.0, "b" -> 3.0, "c" -> 11.0), Map("y" -> 3.0, "x" -> 0.0)),
      (Map("a" -> 1.0, "b" -> 5.0), Map("y" -> 1.0, "x" -> 0.0, "z" -> 5.0))
    ).toDF("f1", "f2").map(Helper.toSeqMapDouble)

    val res = meanData.select(meanSeqMapDouble.toColumn).first()
    res shouldBe Seq(Map("a" -> -1.0 / 3, "c" -> 11.0, "b" -> 13.0 / 3), Map("z" -> 7.5, "y" -> 8.0 / 3, "x" -> 0.0))
  }

  it should "correctly compute the mode by key of maps" in {
    val data = Seq(
      (Map("a" -> 1L, "b" -> 5L), Map("z" -> 10L)),
      (Map("c" -> 11L), Map("y" -> 3L, "x" -> 0L)),
      (Map.empty[String, Long], Map.empty[String, Long])
    ).toDF("f1", "f2").map(Helper.toSeqMapLong)

    val res = data.select(modeSeqMapLong.toColumn).first()
    res shouldBe Seq(Map("a" -> 1L, "b" -> 5L, "c" -> 11L), Map("x" -> 0L, "y" -> 3L, "z" -> 10L))
  }

  it should "correctly compute the mode by key of maps again" in {
    val modeData = Seq(
      (Map("a" -> 1L, "b" -> 5L), Map("y" -> 4L, "x" -> 0L, "z" -> 10L)),
      (Map("a" -> -3L, "b" -> 3L, "c" -> 11L), Map("y" -> 3L, "x" -> 0L)),
      (Map("a" -> 1L, "b" -> 5L), Map("y" -> 1L, "x" -> 0L, "z" -> 5L))
    ).toDF("f1", "f2").map(Helper.toSeqMapLong)

    val res = modeData.select(modeSeqMapLong.toColumn).first()
    res shouldBe Seq(Map("a" -> 1L, "b" -> 5L, "c" -> 11L), Map("x" -> 0L, "y" -> 1L, "z" -> 5L))
  }

  it should "correctly compute the mode" in {
    val data = Seq(
      (Some(3L), None),
      (Some(3L), Some(2L)),
      (Some(1L), Some(5L))
    ).toDF("f1", "f2").map(r => Seq(if (r.isNullAt(0)) None else Option(r.getLong(0)),
      if (r.isNullAt(1)) None else Option(r.getLong(1))))

    val res = data.select(modeSeqNullInt.toColumn).first()
    res shouldBe Seq(3L, 2L)
  }
}

private object Helper {
  def toSeqMapDouble(r: Row): SeqMapDouble = Seq(r.getMap[String, Double](0).toMap, r.getMap[String, Double](1).toMap)
  def toSeqMapLong(r: Row): SeqMapLong = Seq(r.getMap[String, Long](0).toMap, r.getMap[String, Long](1).toMap)
}

Source File: TextUtilsTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.text

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class TextUtilsTest extends FlatSpec with TestCommon {
  Spec(TextUtils.getClass) should "concat strings" in {
    TextUtils.concat("Left", "Right", ",") shouldBe "Left,Right"
  }

  it should "concat with no effect for right half alone" in {
    TextUtils.concat("", "Right", ",") shouldBe "Right"
  }

  it should "concat with no effect for left half alone" in {
    TextUtils.concat("Left", "", ",") shouldBe "Left"
  }

  it should "concat empty strings" in {
    TextUtils.concat("", "", ",") shouldBe ""
  }

  it should "clean a string with special chars" in {
    TextUtils.cleanString("A string wit#h %bad pun&ctuation mark<=>s") shouldBe "AStringWitHBadPunCtuationMarkS"
  }

  it should "clean an Option(string) with special chars" in {
    val testString: Option[String] = Some("A string wit#h %bad pun&ctuation mark<=>s")
    TextUtils.cleanOptString(testString) shouldBe Some("AStringWitHBadPunCtuationMarkS")
  }
}

Source File: DateTimeUtilsTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.date

import com.salesforce.op.test.TestCommon
import org.joda.time.{DateTime, DateTimeZone}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class DateTimeUtilsTest extends FlatSpec with TestCommon {

  val dateStr = "2017-03-29T14:00:07.000Z"
  val date = DateTime.parse(dateStr)
  val now = DateTime.now(DateTimeZone.UTC)

  Spec(DateTimeUtils.getClass) should "parse date in Iso format" in {
    DateTimeUtils.parse(dateStr) shouldBe date.getMillis
  }

  it should "parse date in yyyy-MM-dd HH:mm:ss.SSS format" in {
    val formattedStr = "2017-03-29 14:00:07.000"
    DateTimeUtils.parse(formattedStr) shouldBe date.getMillis
  }

  it should "parse Unix" in {
    DateTimeUtils.parseUnix(now.getMillis) shouldBe now.toString("YYYY/MM/dd")
  }

  it should "get range between two dates" in {
    val numberOfDays = 500
    val diff = DateTimeUtils.getRange(
      date.minusDays(numberOfDays).toString("YYYY/MM/dd"),
      date.toString("YYYY/MM/dd")
    )
    diff.length shouldBe numberOfDays + 1
  }

  it should "get date difference days from start date" in {
    val datePlusDays = DateTimeUtils.getDatePlusDays(now.toString("YYYY/MM/dd"), 31)
    datePlusDays shouldBe now.plusDays(31).toString("YYYY/MM/dd")
  }
}

Source File: AvroInOutTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.io.avro

import java.io.{File, FileNotFoundException, FileWriter}
import java.nio.file.Paths

import com.salesforce.op.test.TestSparkContext
import com.salesforce.op.utils.io.avro.AvroInOut._
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.rdd.RDD
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class AvroInOutTest extends FlatSpec with TestSparkContext {
  val avroSchemaPath = s"$testDataDir/PassengerDataAll.avsc"
  val avroFilePath = s"$testDataDir/PassengerDataAll.avro"
  val avroFileRecordCount = 891
  val hdfs: FileSystem = FileSystem.get(sc.hadoopConfiguration)
  lazy val avroTemp: String = tempDir + "/avro-inout-test"

  Spec(AvroInOut.getClass) should "creates RDD from an avro file" in {
    val res = readPathSeq(avroFilePath, withCount = true, deepCopy = true, persist = false)
    res shouldBe a[RDD[_]]
    res.count shouldBe avroFileRecordCount
  }

  it should "creates RDD from a sequence of avro files" in {
    val res = readPathSeq(s"$avroFilePath,$avroFilePath")
    res.count shouldBe avroFileRecordCount*2
  }

  it should "create RDD from a mixed sequence of valid and invalid avro files" in {
    val res = readPathSeq(s"badfile/path1,$avroFilePath,badfile/path2,$avroFilePath,badfile/path3")
    res.count shouldBe avroFileRecordCount*2
  }

  it should "throw an error if passed in avro files are invalid" in {
    val error = intercept[IllegalArgumentException](readPathSeq("badfile/path1,badfile/path2"))
    error.getMessage shouldBe "No valid directory found in path 'badfile/path1,badfile/path2'"
  }

  it should "creates Some(RDD) from an avro file" in {
    val res = read(avroFilePath)
    res.size shouldBe 1
    res.get shouldBe an[RDD[_]]
    res.get.count shouldBe avroFileRecordCount
  }

  it should "create None from an invalid avro file" in {
    val res = read("badfile/path")
    res shouldBe None
  }

  Spec[AvroWriter[_]] should "writeAvro to filesystem" in {
    val avroData = readPathSeq(avroFilePath).asInstanceOf[RDD[GenericRecord]]
    val avroSchema = loadFile(avroSchemaPath)

    val error = intercept[FileNotFoundException](hdfs.listStatus(new Path(avroTemp)))
    error.getMessage shouldBe s"File $avroTemp does not exist"

    AvroWriter(avroData).writeAvro(avroTemp, avroSchema)
    val hdfsFiles = hdfs.listStatus(new Path(avroTemp)) filter (x => x.getPath.getName.contains("part"))
    val res = readPathSeq((for { x <- hdfsFiles } yield avroTemp + "/" + x.getPath.getName).mkString(","))
    res.count shouldBe avroFileRecordCount
  }

  it should "checkPathsExist" in {
    val tmpDir = Paths.get(File.separator, "tmp").toFile
    val f1 = new File(tmpDir, "avroinouttest")
    f1.delete()
    val w = new FileWriter(f1)
    w.write("just checking")
    w.close()
    val f2 = new File(tmpDir, "thisfilecannotexist")
    f2.delete()
    val f3 = new File(tmpDir, "this file cannot exist")
    f3.delete()
    assume(f1.exists && !f2.exists && !f3.exists)

    // check for one dir being invalid in the path amongst two
    selectExistingPaths(s"$f1,$f2") shouldBe f1.toString

    // check if all dirs in the path are invalid then we get an exception
    intercept[IllegalArgumentException] { selectExistingPaths(f2.toString) }

    // also, check if all dirs in the path are invalid ( in a different way ) then we get an exception
    intercept[IllegalArgumentException] { selectExistingPaths(f3.toString) }

    // check for one dir being invalid ( in a different way ) in the path amongst the two dirs in it
    selectExistingPaths(s"$f1,$f3") shouldBe f1.toString

    // check for paths order insensitivity
    selectExistingPaths(s"$f3,$f1") shouldBe f1.toString

    // check for an exception if the path is an empty string
    intercept[IllegalArgumentException] { selectExistingPaths("") }
  }

}

Source File: CSVToAvroTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.io.csv

import com.salesforce.op.test.{Passenger, TestSparkContext}
import org.apache.spark.SparkException
import org.apache.spark.rdd.RDD
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class CSVToAvroTest extends FlatSpec with TestSparkContext {
  val avroSchema: String = loadFile(s"$resourceDir/PassengerSchemaModifiedDataTypes.avsc")
  val csvReader: CSVInOut = new CSVInOut(CSVOptions(header = true))
  lazy val csvRDD: RDD[Seq[String]] = csvReader.readRDD(s"$resourceDir/PassengerDataModifiedDataTypes.csv")
  lazy val csvFileRecordCount: Long = csvRDD.count

  Spec(CSVToAvro.getClass) should "convert RDD[Seq[String]] to RDD[GenericRecord]" in {
    val res = CSVToAvro.toAvro(csvRDD, avroSchema)
    res shouldBe a[RDD[_]]
    res.count shouldBe csvFileRecordCount
  }

  it should "convert RDD[Seq[String]] to RDD[T]" in {
    val res = CSVToAvro.toAvroTyped[Passenger](csvRDD, avroSchema)
    res shouldBe a[RDD[_]]
    res.count shouldBe csvFileRecordCount
  }

  it should "throw an error for nested schema" in {
    val invalidAvroSchema = loadFile(s"$resourceDir/PassengerSchemaNestedTypeCSV.avsc")
    val exceptionMsg = "CSV should be a flat file and not have nested records (unsupported column(Sex schemaType=ENUM)"
    val error = intercept[SparkException](CSVToAvro.toAvro(csvRDD, invalidAvroSchema).count())
    error.getCause.getMessage shouldBe exceptionMsg
  }

  it should "throw an error for mis-matching schema fields" in {
    val invalidAvroSchema = loadFile(s"$resourceDir/PassengerSchemaInvalidField.avsc")
    val error = intercept[SparkException](CSVToAvro.toAvro(csvRDD, invalidAvroSchema).count())
    error.getCause.getMessage shouldBe "Mismatch number of fields in csv record and avro schema"
  }

  it should "throw an error for bad data" in {
    val invalidDataRDD = csvReader.readRDD(s"$resourceDir/PassengerDataContentTypeMisMatch.csv")
    val error = intercept[SparkException](CSVToAvro.toAvro(invalidDataRDD, avroSchema).count())
    error.getCause.getMessage shouldBe "Boolean column not actually a boolean. Invalid value: 'fail'"
  }
}

Source File: CSVInOutTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.io.csv

import com.salesforce.op.test.TestSparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{AnalysisException, DataFrame}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class CSVInOutTest extends FlatSpec with TestSparkContext {
  private val csvReader = new CSVInOut(CSVOptions(header = true))
  private val csvFile = s"$testDataDir/PassengerDataAllWithHeader.csv"

  Spec[CSVInOut] should "throw error for bad file paths with DataFrame" in {
    val error = intercept[AnalysisException](csvReader.readDataFrame("/bad/file/path/read/dataframe"))
    error.getMessage should endWith ("Path does not exist: file:/bad/file/path/read/dataframe;")
  }

  it should "throw error for bad file paths with RDD" in {
    val error = intercept[AnalysisException](csvReader.readRDD("/bad/file/path/read/rdd"))
    error.getMessage should endWith ("Path does not exist: file:/bad/file/path/read/rdd;")
  }

  it should "read a CSV file to DataFrame" in {
    val res = csvReader.readDataFrame(csvFile)
    res shouldBe a[DataFrame]
    res.count shouldBe 891
  }

  it should "read a CSV file to RDD" in {
    val res = csvReader.readRDD(csvFile)
    res shouldBe a[RDD[_]]
    res.count shouldBe 891
  }
}

Source File: UIDTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class UIDTest extends FlatSpec with TestCommon {

  Spec(UID.getClass) should "generate UIDs" in {
    (1 to 100000).map(_ => UID[UIDTest]).toSet.size shouldBe 100000
  }

  it should "allow counting UIDs" in {
    val start = UID.count()
    (1 to 100).foreach(_ => UID[UIDTest])
    val end = UID.count()
    end - start shouldBe 100
  }

  it should "allow reset UIDs to a specific count" in {
    val count = UID.count()
    val first = (1 to 100).map(_ => UID[UIDTest])
    UID.reset(count)
    val second = (1 to 100).map(_ => UID[UIDTest])
    first should contain theSameElementsAs second
    UID.reset()[UIDTest] shouldBe "UIDTest_000000000001"
  }

  it should "allow reset UIDs" in {
    UID.reset()
    val first = (1 to 100).map(_ => UID[UIDTest])
    UID.reset()
    val second = (1 to 100).map(_ => UID[UIDTest])
    first should contain theSameElementsAs second
  }

  it should "parse from string" in {
    UID.reset().fromString(UID[UIDTest]) shouldBe ("UIDTest", "000000000001")
  }

  it should "error on invalid string" in {
    intercept[IllegalArgumentException](UID.fromString("foo")).getMessage shouldBe "Invalid UID: foo"
  }
}

Source File: FeatureHistoryTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op

import com.salesforce.op.FeatureHistory.{OriginFeatureKey, StagesKey}
import com.salesforce.op.test.TestCommon
import org.apache.spark.sql.types.MetadataBuilder
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class FeatureHistoryTest extends FlatSpec with TestCommon {

  val feature1 = "feature1"
  val feature2 = "feature2"
  val stage1 = "stage1"
  val stage2 = "stage2"

  Spec[FeatureHistory] should "convert a feature history to metadata" in {
    val featureHistory = FeatureHistory(originFeatures = Seq(feature1, feature2), stages = Seq(stage1, stage2))

    val featureHistoryMetadata = featureHistory.toMetadata

    featureHistoryMetadata.contains(OriginFeatureKey) shouldBe true
    featureHistoryMetadata.contains(StagesKey) shouldBe true

    featureHistoryMetadata.getStringArray(OriginFeatureKey) shouldBe Array(feature1, feature2)
    featureHistoryMetadata.getStringArray(StagesKey) shouldBe Array(stage1, stage2)
  }

  it should "merge two instances" in {
    val featureHistory1 = FeatureHistory(originFeatures = Seq(feature1), stages = Seq(stage1))
    val featureHistory2 = FeatureHistory(originFeatures = Seq(feature2), stages = Seq(stage2))

    val featureHistoryCombined = featureHistory1.merge(featureHistory2)
    featureHistoryCombined.originFeatures shouldBe Seq(feature1, feature2)
    featureHistoryCombined.stages shouldBe Seq(stage1, stage2)
  }

  it should "create a metadata for a map" in {
    val featureHistory1 = FeatureHistory(originFeatures = Seq(feature1), stages = Seq(stage1))
    val featureHistory2 = FeatureHistory(originFeatures = Seq(feature2), stages = Seq(stage2))

    val map = Map(("1" -> featureHistory1), ("2" -> featureHistory2))
    val featureHistoryMetadata = FeatureHistory.toMetadata(map)

    featureHistoryMetadata.contains("1") shouldBe true
    featureHistoryMetadata.contains("2") shouldBe true

    val f1 = featureHistoryMetadata.getMetadata("1")

    f1.contains(OriginFeatureKey) shouldBe true
    f1.contains(StagesKey) shouldBe true

    f1.getStringArray(OriginFeatureKey) shouldBe Array(feature1)
    f1.getStringArray(StagesKey) shouldBe Array(stage1)

    val f2 = featureHistoryMetadata.getMetadata("2")

    f2.contains(OriginFeatureKey) shouldBe true
    f2.contains(StagesKey) shouldBe true

    f2.getStringArray(OriginFeatureKey) shouldBe Array(feature2)
    f2.getStringArray(StagesKey) shouldBe Array(stage2)
  }

  it should "create a map from metadata" in {

    val featureHistory1 = FeatureHistory(originFeatures = Seq(feature1), stages = Seq(stage1))
    val featureHistory2 = FeatureHistory(originFeatures = Seq(feature2), stages = Seq(stage2))

    val featureHistoryMapMetadata = new MetadataBuilder()
      .putMetadata("1", featureHistory1.toMetadata)
      .putMetadata("2", featureHistory2.toMetadata)
      .build()

    val featureHistoryMap = FeatureHistory.fromMetadataMap(featureHistoryMapMetadata)

    featureHistoryMap.contains("1") shouldBe true
    featureHistoryMap("1") shouldBe featureHistory1

    featureHistoryMap.contains("2") shouldBe true
    featureHistoryMap("2") shouldBe featureHistory2
  }
}

Source File: JoinedReadersTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.readers

import com.salesforce.op.aggregators.CutOffTime
import com.salesforce.op.test._
import org.joda.time.{DateTimeConstants, Duration}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class JoinedReadersTest extends FlatSpec with PassengerSparkFixtureTest {

  val sparkReader = DataReaders.Aggregate.csv[SparkExample](
    path = Some("../test-data/SparkExample.csv"),
    schema = SparkExample.getClassSchema.toString,
    key = _.getLabel.toString,
    aggregateParams = AggregateParams(None, CutOffTime.NoCutoff())
  )

  val passengerReader = DataReaders.Conditional.avro[Passenger](
    path = Some(passengerAvroPath), // Path should be optional so can also pass in as a parameter
    key = _.getPassengerId.toString, // Entity to score
    conditionalParams = ConditionalParams(
      timeStampFn = _.getRecordDate.toLong, // Record field which defines the date for the rest of the columns
      targetCondition = _.getBoarded >= 1471046600, // Function to figure out if target event has occurred
      responseWindow = None, // How many days after target event to include in response aggregation
      predictorWindow = None, // How many days before target event to include in predictor aggregation
      timeStampToKeep = TimeStampToKeep.Min
    )
  )

  Spec[JoinedReader[_, _]] should "take any kind of reader as the leftmost input" in {
    profileReader.innerJoin(sparkReader) shouldBe a[JoinedDataReader[_, _]]
    dataReader.outerJoin(sparkReader) shouldBe a[JoinedDataReader[_, _]]
    passengerReader.leftOuterJoin(sparkReader) shouldBe a[JoinedDataReader[_, _]]

  }

  it should "allow simple readers for right inputs" in {
    sparkReader.innerJoin(profileReader).joinType shouldBe JoinTypes.Inner
    sparkReader.outerJoin(profileReader).joinType shouldBe JoinTypes.Outer
    sparkReader.leftOuterJoin(profileReader).joinType shouldBe JoinTypes.LeftOuter
  }

  it should "have all subreaders correctly ordered" in {
    val joinedReader = profileReader.innerJoin(sparkReader).outerJoin(dataReader)
    joinedReader.subReaders should contain theSameElementsAs Seq(profileReader, sparkReader, dataReader)
  }

  it should "correctly set leftKey in left outer and inner joins" in {
    dataReader.leftOuterJoin(sparkReader, joinKeys = JoinKeys(leftKey = "id")).joinKeys.leftKey shouldBe "id"
    dataReader.innerJoin(sparkReader, joinKeys = JoinKeys(leftKey = "id")).joinKeys.leftKey shouldBe "id"
  }

  it should "throw an error if you try to perform a self join" in {
    a[IllegalArgumentException] should be thrownBy {
      dataReader.innerJoin(dataReader)
    }
  }

  it should "throw an error if you try to use the same reader twice" in {
    a[IllegalArgumentException] should be thrownBy {
      dataReader.innerJoin(sparkReader).innerJoin(dataReader)
    }
  }

  it should "throw an error if you try to read the same data type twice with different readers" in {
    a[IllegalArgumentException] should be thrownBy {
      passengerReader.innerJoin(sparkReader).outerJoin(dataReader)
    }
  }

  it should "throw an error if you try to use an invalid key combination" in {
    a[RuntimeException] should be thrownBy {
      dataReader.innerJoin(sparkReader, joinKeys = JoinKeys(resultKey = DataFrameFieldNames.KeyFieldName))
        .generateDataFrame(Array.empty)
    }
  }

  it should "produce a JoinedAggregateDataReader when withSecondaryAggregation is called" in {
    val joinedReader = profileReader.innerJoin(sparkReader)
    val timeFilter = TimeBasedFilter(
      condition = new TimeColumn(boardedTime),
      primary = new TimeColumn(boardedTime),
      timeWindow = Duration.standardDays(DateTimeConstants.DAYS_PER_WEEK)
    )
    joinedReader.withSecondaryAggregation(timeFilter) shouldBe a[JoinedAggregateDataReader[_, _]]
  }

}

Source File: ParquetProductReaderTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.readers

import com.salesforce.op.features.FeatureBuilder
import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestCommon, TestSparkContext}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

// Need this case class to be external to (not nested in) ParquetProductReaderTest for spark sql to work correctly.
// Fields in the case class are case-sensitive and should exactly match the parquet column names.
case class PassengerType
(
  PassengerId: Int,
  Survived: Int,
  Pclass: Option[Int],
  Name: Option[String],
  Sex: String,
  Age: Option[Double],
  SibSp: Option[Int],
  Parch: Option[Int],
  Ticket: String,
  Fare: Double,
  Cabin: Option[String],
  Embarked: Option[String]
)

@RunWith(classOf[JUnitRunner])
class ParquetProductReaderTest extends FlatSpec with TestSparkContext with TestCommon {
  def passengerFilePath: String = s"$testDataDir/PassengerDataAll.parquet"

  val parquetRecordCount = 891

  import spark.implicits._
  val dataReader = new ParquetProductReader[PassengerType](
    readPath = Some(passengerFilePath),
    key = _.PassengerId.toString
  )

  Spec[ParquetProductReader[_]] should "read in data correctly" in {
    val data = dataReader.readDataset().collect()
    data.foreach(_ shouldBe a[PassengerType])
    data.length shouldBe parquetRecordCount
  }

  it should "read in byte arrays as valid strings" in {
    val caseReader = DataReaders.Simple.parquetCase[PassengerType](
      path = Some(passengerFilePath),
      key = _.PassengerId.toString
    )

    val records = caseReader.readDataset().collect()
    records.collect { case r if r.PassengerId == 1 => r.Ticket } shouldBe Array("A/5 21171")
  }

  it should "map the columns of data to types defined in schema" in {
    val caseReader = DataReaders.Simple.parquetCase[PassengerType](
      path = Some(passengerFilePath),
      key = _.PassengerId.toString
    )

    val records = caseReader.readDataset().collect()
    records(0).Survived shouldBe a[java.lang.Integer]
    records(0).Fare shouldBe a[java.lang.Double]
    records(0).Ticket shouldBe a[java.lang.String]
    records.collect { case r if r.PassengerId == 1 => r.Age } shouldBe Array(Some(22.0))
  }

  it should "generate a dataframe" in {
    val tokens = FeatureBuilder.TextList[PassengerType]
      .extract(p => p.Name.map(_.split(" ")).toSeq.flatten.toTextList)
      .asPredictor
    val data = dataReader.generateDataFrame(rawFeatures = Array(tokens)).collect()

    data.collect { case r if r.get(0) == "3" => r.get(1) } shouldBe Array(Array("Heikkinen,", "Miss.", "Laina"))
    data.length shouldBe parquetRecordCount
  }
}

Source File: StreamingReadersTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.readers

import org.apache.hadoop.fs.Path
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import com.salesforce.op.test.TestCommon

@RunWith(classOf[JUnitRunner])
class StreamingReadersTest extends FlatSpec with TestCommon {
  val dotPath = new Path(".part.avro")
  val underScorePath = new Path("_part.avro")
  val goodPath = new Path("part.avro")

  val readerCls = StreamingReaders.getClass

  Spec[StreamingReaders.type] should "ignore hidden files" in {
    assert(!StreamingReaders.Simple.defaultPathFiler(dotPath))
    assert(!StreamingReaders.Simple.defaultPathFiler(underScorePath))
  }

  it should "accept non-hidden files" in {
    assert(StreamingReaders.Simple.defaultPathFiler(goodPath))
  }
}

Source File: CSVProductReadersTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.readers

import com.salesforce.op.features.FeatureBuilder
import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestCommon, TestSparkContext}
import com.salesforce.op.utils.io.csv.CSVOptions
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


// need this to be external to (not nested in) CSVProductReaderTest for spark sql to work correctly
case class PassengerCaseClass
(
  passengerId: Int,
  age: Option[Int],
  gender: Option[String],
  height: Option[Int],
  weight: Option[scala.math.BigInt],
  description: Option[String],
  boarded: Option[Long],
  recordDate: Option[Long],
  survived: Option[Boolean],
  randomTime: Option[java.sql.Timestamp],
  randomFloating: Option[Double]
)

@RunWith(classOf[JUnitRunner])
class CSVProductReadersTest extends FlatSpec with TestSparkContext with TestCommon {
  def csvWithoutHeaderPath: String = s"$testDataDir/BigPassenger.csv"

  def csvWithHeaderPath: String = s"$testDataDir/BigPassengerWithHeader.csv"

  import spark.implicits._

  Spec[CSVProductReader[_]] should "read in data correctly with header" in {
    val dataReader = new CSVProductReader[PassengerCaseClass](
      readPath = Some(csvWithHeaderPath),
      key = _.passengerId.toString,
      options = CSVOptions(header = true)
    )
    val data = dataReader.readDataset().collect()
    data.foreach(_ shouldBe a[PassengerCaseClass])
    data.length shouldBe 8
  }

  it should "read in data correctly without header" in {
    val dataReader = DataReaders.Simple.csvCase[PassengerCaseClass](
      path = Some(csvWithoutHeaderPath),
      key = _.passengerId.toString
    )
    val data = dataReader.readDataset().collect()
    data.foreach(_ shouldBe a[PassengerCaseClass])
    data.length shouldBe 8
  }

  it should "generate a dataframe" in {
    val dataReader = new CSVProductReader[PassengerCaseClass](
      readPath = Some(csvWithHeaderPath),
      key = _.passengerId.toString,
      options = CSVOptions(header = true)
    )
    val tokens =
      FeatureBuilder.TextList[PassengerCaseClass]
        .extract(p => p.description.map(_.split(" ")).toSeq.flatten.toTextList).asPredictor
    val data = dataReader.generateDataFrame(rawFeatures = Array(tokens)).collect()
    data.collect { case r if r.get(0) == "3" => r.get(1) } shouldBe Array(Array("this", "is", "a", "description"))
    data.length shouldBe 8
  }
}

Source File: CSVAutoReadersTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.readers

import com.salesforce.op.test.PassengerSparkFixtureTest
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

import scala.collection.JavaConverters._


@RunWith(classOf[JUnitRunner])
class CSVAutoReadersTest extends FlatSpec with PassengerSparkFixtureTest {

  private val expectedSchema = new Schema.Parser().parse(resourceFile(name = "PassengerAuto.avsc"))
  private val allFields = expectedSchema.getFields.asScala.map(_.name())
  private val keyField: String = allFields.head

  Spec[CSVAutoReader[_]] should "read in data correctly and infer schema" in {
    val dataReader = DataReaders.Simple.csvAuto[GenericRecord](
      path = Some(passengerCsvWithHeaderPath),
      key = _.get(keyField).toString
    )
    val data = dataReader.readRDD().collect()
    data.foreach(_ shouldBe a[GenericRecord])
    data.length shouldBe 8

    val inferredSchema = data.head.getSchema
    inferredSchema shouldBe expectedSchema
  }

  it should "read in data correctly and infer schema based with headers provided" in {
    val dataReader = DataReaders.Simple.csvAuto[GenericRecord](
      path = Some(passengerCsvPath),
      key = _.get(keyField).toString,
      headers = allFields
    )
    val data = dataReader.readRDD().collect()
    data.foreach(_ shouldBe a[GenericRecord])
    data.length shouldBe 8

    val inferredSchema = data.head.getSchema
    inferredSchema shouldBe expectedSchema

  }

}

Source File: AvroFieldTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.cli.gen

import com.salesforce.op.cli.gen.AvroField._
import com.salesforce.op.test.TestCommon
import org.apache.avro.Schema
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.collection.JavaConverters._
import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class AvroFieldTest extends FlatSpec with TestCommon with Assertions {

  Spec[AvroField] should "do from" in {
    val types = List(
      Schema.Type.STRING,
      //  Schema.Type.BYTES, // somehow this avro type is not covered (yet)
      Schema.Type.INT,
      Schema.Type.LONG,
      Schema.Type.FLOAT,
      Schema.Type.DOUBLE,
      Schema.Type.BOOLEAN
    )
    val simpleSchemas = types map Schema.create

    val unions = List(
      Schema.createUnion((Schema.Type.NULL::Schema.Type.INT::Nil) map Schema.create asJava),
      Schema.createUnion((Schema.Type.INT::Schema.Type.NULL::Nil) map Schema.create asJava)
    )

    val enum = Schema.createEnum("Aliens", "undocumented", "outer",
      List("Edgar_the_Bug", "Boris_the_Animal", "Laura_Vasquez") asJava)

    val allSchemas = (enum::unions)++simpleSchemas // NULL does not work

    val fields = allSchemas.zipWithIndex map {
      case (s, i) => new Schema.Field("x" + i, s, "Who", null: Object)
    }

    val expected = List(
      AEnum(fields(0), isNullable = false),
      AInt(fields(1), isNullable = true),
      AInt(fields(2), isNullable = true),
      AString(fields(3), isNullable = false),
      AInt(fields(4), isNullable = false),
      ALong(fields(5), isNullable = false),
      AFloat(fields(6), isNullable = false),
      ADouble(fields(7), isNullable = false),
      ABoolean(fields(8), isNullable = false)
    )

    an[IllegalArgumentException] should be thrownBy {
      val nullSchema = Schema.create(Schema.Type.NULL)
      val nullField = new Schema.Field("xxx", null, "Nobody", null: Object)
      AvroField from nullField
    }

    fields.size shouldBe expected.size

    for {
      (field, expected) <- fields zip expected
    } {
      val actual = AvroField from field
      actual shouldBe expected
    }
  }

}

Source File: OpsTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.cli.gen

import java.io.File
import java.nio.file.Paths

import com.salesforce.op.cli.{AvroSchemaFromFile, CliParameters, GeneratorConfig}
import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.io.Source


@RunWith(classOf[JUnitRunner])
class OpsTest extends FlatSpec with TestCommon with Assertions {

  val tempFolder = new File(System.getProperty("java.io.tmpdir"))
  val projectFolder = new File(tempFolder, "cli_test")
  projectFolder.deleteOnExit()

  val testParams = CliParameters(
    location = tempFolder,
    projName = "cli_test",
    inputFile = Some(Paths.get("templates", "simple", "src", "main", "resources", "PassengerData.csv").toFile),
    response = Some("survived"),
    idField = Some("passengerId"),
    schemaSource = Some(
      AvroSchemaFromFile(Paths.get("..", "utils", "src", "main", "avro", "PassengerCSV.avsc").toFile)
    ),
    answersFile = Some(new File("passengers.answers")),
    overwrite = true).values

  Spec[Ops] should "generate project files" in {

    testParams match {
      case None =>
        fail("Could not create config, I wonder why")
      case Some(conf: GeneratorConfig) =>
        val ops = Ops(conf)
        ops.run()
        val buildFile = new File(projectFolder, "build.gradle")
        buildFile should exist

        val buildFileContent = Source.fromFile(buildFile).mkString

        buildFileContent should include("mainClassName = 'com.salesforce.app.cli_test'")

        val scalaSourcesFolder = Paths.get(projectFolder.toString, "src", "main", "scala", "com", "salesforce", "app")
        val featuresFile = Source.fromFile(new File(scalaSourcesFolder.toFile, "Features.scala")).getLines
        val heightLine = featuresFile.find(_ contains "description") map (_.trim)
        heightLine shouldBe Some(
          "val description = FB.Text[PassengerCSV].extract(_.getDescription.toText).asPredictor"
        )
    }

  }

}

Source File: UserIOTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.cli.gen

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}


@RunWith(classOf[JUnitRunner])
class UserIOTest extends FlatSpec with TestCommon with Assertions {


  private case class Oracle(answers: String*) extends UserIO {
    private var i = -1
    var question = "---"

    override def readLine(q: String): Option[String] = {
      question = q
      i += 1
      if (i < answers.length) Some(answers(i)) else throw new IllegalStateException(s"Out of answers, q=$q")
    }
  }

  Spec[UserIO] should "do qna" in {
    // @see https://www.urbandictionary.com/define.php?term=aks
    def aksme(q: String, answers: String*): Option[String] = {
      Oracle(answers: _*).qna(q, _.length == 1, Map("2*3" -> "6", "3+2" -> "5"))
    }

    aksme("2+2", "11", "22", "?") shouldBe Some("?")
    aksme("2+2", "4", "5", "?") shouldBe Some("4")
    aksme("2+3", "44", "", "?") shouldBe Some("?")
    aksme("2*3", "4", "?") shouldBe Some("6")
    aksme("3+2", "4", "?") shouldBe Some("5")
  }

  it should "ask" in {

    // @see https://www.urbandictionary.com/define.php?term=aks
    def aksme[Int](q: String, opts: Map[Int, List[String]], answers: String*): (String, Int) = {
      val console = Oracle(answers: _*)
      val answer = console.ask(q, opts) getOrElse fail(s"A problem answering question $q")
      (console.question, answer)
    }

    an[IllegalStateException] should be thrownBy
      aksme("what is your name?", Map(1 -> List("one", "uno")), "11", "1", "?")

    aksme("what is your name?",
      Map(
        1 -> List("Nessuno", "Nobody"),
        2 -> List("Ishmael", "Gantenbein")),
      "5", "1", "?") shouldBe("what is your name? [0] Nessuno [1] Ishmael: ", 2)
  }

}

Source File: OpTransformerWrapperTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.sparkwrappers.specific

import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.feature.{Normalizer, StopWordsRemover}
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class OpTransformerWrapperTest extends FlatSpec with TestSparkContext {

  val (testData, featureVector) = TestFeatureBuilder(
    Seq[MultiPickList](
      Set("I", "saw", "the", "red", "balloon").toMultiPickList,
      Set("Mary", "had", "a", "little", "lamb").toMultiPickList
    )
  )

  val (testDataNorm, _, _) = TestFeatureBuilder("label", "features",
    Seq[(Real, OPVector)](
      0.0.toReal -> Vectors.dense(1.0, 0.5, -1.0).toOPVector,
      1.0.toReal -> Vectors.dense(2.0, 1.0, 1.0).toOPVector,
      2.0.toReal -> Vectors.dense(4.0, 10.0, 2.0).toOPVector
    )
  )
  val (targetDataNorm, targetLabelNorm, featureVectorNorm) = TestFeatureBuilder("label", "features",
    Seq[(Real, OPVector)](
      0.0.toReal -> Vectors.dense(0.4, 0.2, -0.4).toOPVector,
      1.0.toReal -> Vectors.dense(0.5, 0.25, 0.25).toOPVector,
      2.0.toReal -> Vectors.dense(0.25, 0.625, 0.125).toOPVector
    )
  )

  Spec[OpTransformerWrapper[_, _, _]] should "remove stop words with caseSensitivity=true" in {
    val remover = new StopWordsRemover().setCaseSensitive(true)
    val swFilter =
      new OpTransformerWrapper[MultiPickList, MultiPickList, StopWordsRemover](remover).setInput(featureVector)
    val output = swFilter.transform(testData)

    output.collect(swFilter.getOutput()) shouldBe Array(
      Seq("I", "saw", "red", "balloon").toMultiPickList,
      Seq("Mary", "little", "lamb").toMultiPickList
    )
  }

  it should "should properly normalize each feature vector instance with non-default norm of 1" in {
    val baseNormalizer = new Normalizer().setP(1.0)
    val normalizer =
      new OpTransformerWrapper[OPVector, OPVector, Normalizer](baseNormalizer).setInput(featureVectorNorm)
    val output = normalizer.transform(testDataNorm)

    val sumSqDist = validateDataframeDoubleColumn(output, normalizer.getOutput().name, targetDataNorm, "features")
    assert(sumSqDist <= 1E-6, "==> the sum of squared distances between actual and expected should be below tolerance.")
  }

  def validateDataframeDoubleColumn(
    normalizedFeatureDF: DataFrame, normalizedFeatureName: String, targetFeatureDF: DataFrame, targetColumnName: String
  ): Double = {
    val sqDistUdf = udf { (leftColVec: Vector, rightColVec: Vector) => Vectors.sqdist(leftColVec, rightColVec) }

    val targetColRename = "targetFeatures"
    val renamedTargedDF = targetFeatureDF.withColumnRenamed(targetColumnName, targetColRename)
    val joinedDF = normalizedFeatureDF.join(renamedTargedDF, Seq("label"))

    // compute sum of squared distances between expected and actual
    val finalDF = joinedDF.withColumn("sqDist", sqDistUdf(joinedDF(normalizedFeatureName), joinedDF(targetColRename)))
    val sumSqDist: Double = finalDF.agg(sum(finalDF("sqDist"))).first().getDouble(0)
    sumSqDist
  }
}

Source File: OpEstimatorWrapperTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.sparkwrappers.specific

import com.salesforce.op.features.types._
import com.salesforce.op.test.{PrestigeData, TestFeatureBuilder, _}
import org.apache.spark.ml.feature.{MinMaxScaler, MinMaxScalerModel}
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import org.slf4j.LoggerFactory


@RunWith(classOf[JUnitRunner])
class OpEstimatorWrapperTest extends FlatSpec with TestSparkContext with PrestigeData {

  val log = LoggerFactory.getLogger(this.getClass)

  val (ds, education, income, women, prestige) =
    TestFeatureBuilder[OPVector, OPVector, OPVector, OPVector]("education", "income", "women", "prestige",
      prestigeSeq.map(p =>
        (Vectors.dense(p.prestige).toOPVector, Vectors.dense(p.education).toOPVector,
          Vectors.dense(p.income).toOPVector, Vectors.dense(p.women).toOPVector)
      )
    )

  Spec[OpEstimatorWrapper[_, _, _, _]] should "scale variables properly with default min/max params" in {
    val baseScaler = new MinMaxScaler()
    val scalerModel: MinMaxScalerModel = fitScalerModel(baseScaler)

    (scalerModel.getMax - 1.0).abs should be < 1E-6
  }

  it should "scale variables properly with custom min/max params" in {
    val maxParam = 100
    val baseScaler = new MinMaxScaler().setMax(maxParam)
    val scalerModel: MinMaxScalerModel = fitScalerModel(baseScaler)

    (scalerModel.getMax - maxParam).abs should be < 1E-6
  }

  it should "should have the expected feature name" in {
    val wrappedEstimator =
      new OpEstimatorWrapper[OPVector, OPVector, MinMaxScaler, MinMaxScalerModel](new MinMaxScaler()).setInput(income)
    wrappedEstimator.getOutput().name shouldBe wrappedEstimator.getOutputFeatureName
  }

  private def fitScalerModel(baseScaler: MinMaxScaler): MinMaxScalerModel = {
    val scaler =
      new OpEstimatorWrapper[OPVector, OPVector, MinMaxScaler, MinMaxScalerModel](baseScaler).setInput(income)

    val model = scaler.fit(ds)
    val scalerModel = model.getSparkMlStage().get

    if (log.isInfoEnabled) {
      val output = scalerModel.transform(ds)
      output.show(false)
    }
    scalerModel
  }
}

Source File: OpPredictorWrapperTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.sparkwrappers.specific

import com.salesforce.op.features.types._
import com.salesforce.op.stages.sparkwrappers.generic.SparkWrapperParams
import com.salesforce.op.test.{PrestigeData, TestFeatureBuilder, TestSparkContext}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import org.slf4j.LoggerFactory


@RunWith(classOf[JUnitRunner])
class OpPredictorWrapperTest extends FlatSpec with TestSparkContext with PrestigeData {

  val log = LoggerFactory.getLogger(this.getClass)

  val (ds, targetLabel, featureVector) = TestFeatureBuilder[RealNN, OPVector](
    prestigeSeq.map(p => p.prestige.toRealNN -> Vectors.dense(p.education, p.income, p.women).toOPVector)
  )

  Spec[OpPredictorWrapper[_, _]] should
    "be able to run a simple logistic regression model (fitIntercept=true)" in {
    val lrModel: LinearRegressionModel = fitLinRegModel(fitIntercept = true)
    lrModel.intercept.abs should be > 1E-6
  }

  it should "be able to run a simple logistic regression model (fitIntercept=false)" in {
    val lrModel: LinearRegressionModel = fitLinRegModel(fitIntercept = false)
    lrModel.intercept.abs should be < Double.MinPositiveValue
  }

  private def fitLinRegModel(fitIntercept: Boolean): LinearRegressionModel = {
    val lrBase =
      new LinearRegression()
        .setMaxIter(10)
        .setRegParam(0.3)
        .setElasticNetParam(0.8)
        .setFitIntercept(fitIntercept)

    val lr = new OpPredictorWrapper[LinearRegression, LinearRegressionModel](lrBase)
      .setInput(targetLabel, featureVector)

    // Fit the model
    val model = lr.fit(ds).asInstanceOf[SparkWrapperParams[LinearRegressionModel]]
    val lrModel = model.getSparkMlStage().get

    // Print the coefficients and intercept for linear regression
    log.info(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")

    // Summarize the model over the training set and print out some metrics
    val trainingSummary = lrModel.summary
    log.info(s"numIterations: ${trainingSummary.totalIterations}")
    log.info(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]")
    if (log.isInfoEnabled) trainingSummary.residuals.show()
    log.info(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
    log.info(s"r2: ${trainingSummary.r2}")
    // checking r2 as a cheap way to make sure things are running as intended.
    assert(trainingSummary.r2 > 0.9)

    if (log.isInfoEnabled) {
      val output = lrModel.transform(ds)
      output.show(false)
    }

    lrModel
  }
}

Source File: SparkWrapperParamsTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.sparkwrappers.generic

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import org.apache.spark.ml.feature.{StandardScaler, StandardScalerModel}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfterEach, FlatSpec}

@RunWith(classOf[JUnitRunner])
class SparkWrapperParamsTest extends FlatSpec with BeforeAndAfterEach with TestCommon {

  private def estimator(sparkMlStageIn: Option[StandardScaler] = None) = {
    new SwUnaryEstimator[Real, Real, StandardScalerModel, StandardScaler](
      inputParamName = "in", outputParamName = "out",
      operationName = "test-op", sparkMlStageIn = sparkMlStageIn
    )
  }

  Spec[SparkWrapperParams[_]] should "have proper default values for path and stage" in {
    val stage = estimator()
    stage.getStageSavePath() shouldBe None
    stage.getSparkMlStage() shouldBe None
  }
  it should "when setting path, it should also set path to the stage param" in {
    val stage = estimator()
    stage.setStageSavePath("/test/path")
    stage.getStageSavePath() shouldBe Some("/test/path")
  }
  it should "allow set/get spark params on a wrapped stage" in {
    val sparkStage = new StandardScaler()
    val stage = estimator(sparkMlStageIn = Some(sparkStage))
    stage.getSparkMlStage() shouldBe Some(sparkStage)
    for {
      sparkStage <- stage.getSparkMlStage()
      withMean = sparkStage.getOrDefault(sparkStage.withMean)
    } {
      withMean shouldBe false
      sparkStage.set[Boolean](sparkStage.withMean, true)
      sparkStage.get(sparkStage.withMean) shouldBe Some(true)
    }
  }

}

Source File: OPLogLossTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.evaluator

import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class OPLogLossTest extends FlatSpec with TestSparkContext {

  val (ds, rawLabel, pred) = TestFeatureBuilder[RealNN, Prediction](
    Seq(
      (1.0, Vectors.dense(8, 1, 1), Vectors.dense(0.8, 0.1, 0.1), 0.0),
      (0.0, Vectors.dense(1.0, 0.0, 0.0), Vectors.dense(1.0, 0.0, 0.0), 0.0),
      (0.0, Vectors.dense(1.0, 0.8, 0.2), Vectors.dense(0.5, 0.4, 0.1), 0.0),
      (1.0, Vectors.dense(10.0, 80.0, 10.0), Vectors.dense(0.1, 0.8, 0.1), 1.0),
      (2.0, Vectors.dense(0.0, 0.0, 14.0), Vectors.dense(0.0, 0.0, 1.0), 2.0),
      (2.0, Vectors.dense(0.0, 0.0, 13.0), Vectors.dense(0.0, 0.0, 1.0), 2.0),
      (1.0, Vectors.dense(0.1, 0.4, 0.5), Vectors.dense(0.1, 0.4, 0.5), 2.0),
      (0.0, Vectors.dense(0.1, 0.6, 0.3), Vectors.dense(0.1, 0.6, 0.3), 1.0),
      (1.0, Vectors.dense(1.0, 0.8, 0.2), Vectors.dense(0.5, 0.4, 0.1), 0.0),
      (2.0, Vectors.dense(1.0, 0.8, 0.2), Vectors.dense(0.5, 0.4, 0.1), 0.0)
    ).map(v => (v._1.toRealNN, Prediction(v._4, v._2, v._3)))
  )

  val label = rawLabel.copy(isResponse = true)
  val expected: Double = -math.log(0.1 * 0.5 * 0.8 * 0.4 * 0.1 * 0.4 * 0.1) / 10.0

  val (dsEmpty, rawLabelEmpty, predEmpty) = TestFeatureBuilder[RealNN, Prediction](
    Seq()
  )

  val labelEmpty = rawLabel.copy(isResponse = true)

  val logLoss = LogLoss.multiLogLoss

  Spec(LogLoss.getClass) should "compute logarithmic loss metric" in {
    val metric = logLoss.setLabelCol(label).setPredictionCol(pred).evaluate(ds)
    metric shouldBe expected
  }

  it should "throw an error when the dataset is empty" in {
    the[IllegalArgumentException] thrownBy {
      logLoss.setLabelCol(labelEmpty).setPredictionCol(predEmpty).evaluate(dsEmpty)
    } should have message "requirement failed: Dataset is empty, log loss cannot be calculated"
  }
}

Source File: TextVectorizerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.RichDataset._
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class TextVectorizerTest extends FlatSpec with TestSparkContext with AttributeAsserts {
  // scalastyle:off
  lazy val (data, f1, f2) = TestFeatureBuilder(
    Seq[(Text, Text)](
      (Text("Hamlet: To be or not to be - that is the question."), Text("Enter Hamlet")),
      (Text("Гамлет: Быть или не быть - вот в чём вопрос."), Text("Входит Гамлет")),
      (Text("המלט: להיות או לא להיות - זאת השאלה."), Text("נככס המלט"))
    )
  )
  // scalastyle:on

  "TextVectorizer" should "work correctly out of the box" in {
    val vectorized = f1.vectorize(numHashes = TransmogrifierDefaults.DefaultNumOfFeatures,
      autoDetectLanguage = TextTokenizer.AutoDetectLanguage,
      minTokenLength = TextTokenizer.MinTokenLength,
      toLowercase = TextTokenizer.ToLowercase
    )
    vectorized.originStage shouldBe a[VectorsCombiner]
    vectorized.parents.head.originStage shouldBe a[OPCollectionHashingVectorizer[_]]
    val hasher = vectorized.parents.head.originStage.asInstanceOf[OPCollectionHashingVectorizer[_]].hashingTF()
    val transformed = new OpWorkflow().setResultFeatures(vectorized).transform(data)
    val result = transformed.collect(vectorized)
    val f1NameHash = hasher.indexOf(vectorized.parents.head.originStage.getInputFeatures().head.name)
    val field = transformed.schema(vectorized.name)
    assertNominal(field, Array.fill(result.head.value.size - 1)(false) :+ true, result)
    // scalastyle:off
    result(0).value(hasher.indexOf(s"${f1NameHash}_" + "hamlet")) should be >= 1.0
    result(0).value(hasher.indexOf(s"${f1NameHash}_" + "question")) should be >= 1.0
    result(1).value(hasher.indexOf(s"${f1NameHash}_" + "гамлет")) should be >= 1.0
    result(1).value(hasher.indexOf(s"${f1NameHash}_" + "вопрос")) should be >= 1.0
    result(1).value(hasher.indexOf(s"${f1NameHash}_" + "быть")) should be >= 2.0
    result(2).value(hasher.indexOf(s"${f1NameHash}_" + "המלט")) should be >= 1.0
    result(2).value(hasher.indexOf(s"${f1NameHash}_" + "להיות")) should be >= 2.0
    // scalastyle:on
  }

  it should "allow forcing hashing into a shared hash space" in {
    val vectorized = f1.vectorize(numHashes = TransmogrifierDefaults.DefaultNumOfFeatures,
      autoDetectLanguage = TextTokenizer.AutoDetectLanguage,
      minTokenLength = TextTokenizer.MinTokenLength,
      toLowercase = TextTokenizer.ToLowercase,
      binaryFreq = true,
      others = Array(f2))
    val hasher = vectorized.parents.head.originStage.asInstanceOf[OPCollectionHashingVectorizer[_]].hashingTF()
    val transformed = new OpWorkflow().setResultFeatures(vectorized).transform(data)
    val result = transformed.collect(vectorized)
    val f1NameHash = hasher.indexOf(vectorized.parents.head.originStage.getInputFeatures().head.name)
    val field = transformed.schema(vectorized.name)
    assertNominal(field, Array.fill(result.head.value.size - 2)(false) ++ Array(true, true), result)
    // scalastyle:off
    result(0).value(hasher.indexOf(s"${f1NameHash}_" + "hamlet")) shouldBe 1.0
    result(0).value(hasher.indexOf(s"${f1NameHash}_" + "hamlet")) shouldBe 1.0
    result(1).value(hasher.indexOf(s"${f1NameHash}_" + "гамлет")) shouldBe 1.0
    result(1).value(hasher.indexOf(s"${f1NameHash}_" + "гамлет")) shouldBe 1.0
    result(2).value(hasher.indexOf(s"${f1NameHash}_" + "המלט")) shouldBe 1.0
    result(2).value(hasher.indexOf(s"${f1NameHash}_" + "המלט")) shouldBe 1.0
    // scalastyle:on
  }
}

Source File: ScalerMetadataTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op.test.TestSparkContext
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import com.salesforce.op.utils.json.JsonUtils
import org.apache.spark.sql.types.MetadataBuilder

import scala.util.{Failure, Success}

@RunWith(classOf[JUnitRunner])
class ScalerMetadataTest extends FlatSpec with TestSparkContext {
  val linearArgs = LinearScalerArgs(slope = 2.0, intercept = 1.0)

  Spec[ScalerMetadata] should "properly construct ScalerMetadata for a LinearScaler" in {
    val metadata = ScalerMetadata(scalingType = ScalingType.Linear,
      scalingArgs = linearArgs).toMetadata()
    metadata.getString(ScalerMetadata.scalingTypeName) shouldBe ScalingType.Linear.entryName
    val args = JsonUtils.fromString[LinearScalerArgs](metadata.getString(ScalerMetadata.scalingArgsName))
    args match {
      case Failure(err) => fail(err)
      case Success(x) => x shouldBe linearArgs
    }
  }

  it should "properly construct ScalerMetaData for a LogScaler" in {
    val metadata = ScalerMetadata(scalingType = ScalingType.Logarithmic, scalingArgs = EmptyScalerArgs()).toMetadata()
    metadata.getString(ScalerMetadata.scalingTypeName) shouldBe ScalingType.Logarithmic.entryName
    metadata.getString(ScalerMetadata.scalingArgsName) shouldBe "{}"
  }

  it should "use apply to properly convert metadata to ScalerMetadata" in {
    val metadata = new MetadataBuilder().putString(ScalerMetadata.scalingTypeName, ScalingType.Linear.entryName)
      .putString(ScalerMetadata.scalingArgsName, linearArgs.toJson(pretty = false)).build()
    ScalerMetadata.apply(metadata) match {
      case Failure(err) => fail(err)
      case Success(x) => x shouldBe ScalerMetadata(ScalingType.Linear, linearArgs)
    }
  }

  it should "error when apply is given an invalid scaling type" in {
    val invalidMetaData = new MetadataBuilder().putString(ScalerMetadata.scalingTypeName, "unsupportedScaling")
      .putString(ScalerMetadata.scalingArgsName, linearArgs.toJson(pretty = false)).build()

    val err = intercept[NoSuchElementException] (
      ScalerMetadata.apply(invalidMetaData).get
    )
    err.getMessage shouldBe "unsupportedScaling is not a member of Enum (Linear, Logarithmic)"
  }
}

Source File: OpLDATest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.clustering.LDA
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class OpLDATest extends FlatSpec with TestSparkContext {

  val inputData = Seq(
    (0.0, Vectors.sparse(11, Array(0, 1, 2, 4, 5, 6, 7, 10), Array(1.0, 2.0, 6.0, 2.0, 3.0, 1.0, 1.0, 3.0))),
    (1.0, Vectors.sparse(11, Array(0, 1, 3, 4, 7, 10), Array(1.0, 3.0, 1.0, 3.0, 2.0, 1.0))),
    (2.0, Vectors.sparse(11, Array(0, 1, 2, 5, 6, 8, 9), Array(1.0, 4.0, 1.0, 4.0, 9.0, 1.0, 2.0))),
    (3.0, Vectors.sparse(11, Array(0, 1, 3, 6, 8, 9, 10), Array(2.0, 1.0, 3.0, 5.0, 2.0, 3.0, 9.0))),
    (4.0, Vectors.sparse(11, Array(0, 1, 2, 3, 4, 6, 9, 10), Array(3.0, 1.0, 1.0, 9.0, 3.0, 2.0, 1.0, 3.0))),
    (5.0, Vectors.sparse(11, Array(0, 1, 3, 4, 5, 6, 7, 8, 9), Array(4.0, 2.0, 3.0, 4.0, 5.0, 1.0, 1.0, 1.0, 4.0))),
    (6.0, Vectors.sparse(11, Array(0, 1, 3, 6, 8, 9, 10), Array(2.0, 1.0, 3.0, 5.0, 2.0, 2.0, 9.0))),
    (7.0, Vectors.sparse(11, Array(0, 1, 2, 3, 4, 5, 6, 9, 10), Array(1.0, 1.0, 1.0, 9.0, 2.0, 1.0, 2.0, 1.0, 3.0))),
    (8.0, Vectors.sparse(11, Array(0, 1, 3, 4, 5, 6, 7), Array(4.0, 4.0, 3.0, 4.0, 2.0, 1.0, 3.0))),
    (9.0, Vectors.sparse(11, Array(0, 1, 2, 4, 6, 8, 9, 10), Array(2.0, 8.0, 2.0, 3.0, 2.0, 2.0, 7.0, 2.0))),
    (10.0, Vectors.sparse(11, Array(0, 1, 2, 3, 5, 6, 9, 10), Array(1.0, 1.0, 1.0, 9.0, 2.0, 2.0, 3.0, 3.0))),
    (11.0, Vectors.sparse(11, Array(0, 1, 4, 5, 6, 7, 9), Array(4.0, 1.0, 4.0, 5.0, 1.0, 3.0, 1.0)))
  ).map(v => v._1.toReal -> v._2.toOPVector)

  lazy val (ds, f1, f2) = TestFeatureBuilder(inputData)

  lazy val inputDS = ds.persist()

  val seed = 1234567890L
  val k = 3
  val maxIter = 100

  lazy val expected = new LDA()
    .setFeaturesCol(f2.name)
    .setK(k)
    .setSeed(seed)
    .fit(inputDS)
    .transform(inputDS)
    .select("topicDistribution")
    .collect()
    .toSeq
    .map(_.getAs[Vector](0))

  Spec[OpLDA] should "convert document term vectors into topic vectors" in {
    val f2Vec = new OpLDA().setInput(f2).setK(k).setSeed(seed).setMaxIter(maxIter)
    val testTransformedData = f2Vec.fit(inputDS).transform(inputDS)
    val output = f2Vec.getOutput()
    val estimate = testTransformedData.collect(output)
    val mse = computeMeanSqError(estimate, expected)
    val expectedMse = 0.5
    withClue(s"Computed mse $mse (expected $expectedMse)") {
      mse should be < expectedMse
    }
  }

  it should "convert document term vectors into topic vectors (shortcut version)" in {
    val output = f2.lda(k = k, seed = seed, maxIter = maxIter)
    val f2Vec = output.originStage.asInstanceOf[OpLDA]
    val testTransformedData = f2Vec.fit(inputDS).transform(inputDS)
    val estimate = testTransformedData.collect(output)
    val mse = computeMeanSqError(estimate, expected)
    val expectedMse = 0.5
    withClue(s"Computed mse $mse (expected $expectedMse)") {
      mse should be < expectedMse
    }
  }

  private def computeMeanSqError(estimate: Seq[OPVector], expected: Seq[Vector]): Double = {
    val n = estimate.length.toDouble
    estimate.zip(expected).map { case (est, exp) => Vectors.sqdist(est.value, exp) }.sum / n
  }
}

Source File: EmailParserTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.stages.base.unary.UnaryLambdaTransformer
import com.salesforce.op.test.{TestFeatureBuilder, _}
import com.salesforce.op.utils.spark.RichDataset._
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class EmailParserTest extends FlatSpec with TestCommon with TestSparkContext {

  val (df, email) = TestFeatureBuilder("email", Seq(
    Email("[email protected]"),
    Email("@example.com"),
    Email("test@"),
    Email("@"),
    Email(""),
    Email("notanemail"),
    Email.empty,
    Email("[email protected]")
  ))

  "Email Extraction" should "extract prefix from simple email addresses" in {
    val prefix = email.toEmailPrefix
    val result = prefix.originStage.asInstanceOf[UnaryLambdaTransformer[Email, Text]].transform(df)

    result.collect(prefix) should contain theSameElementsInOrderAs
      Seq(Text("test"), Text.empty, Text.empty, Text.empty, Text.empty, Text.empty, Text.empty, Text("first.last"))
  }

  it should "extract domain from simple email addresses" in {
    val domain = email.toEmailDomain
    val result = domain.originStage.asInstanceOf[UnaryLambdaTransformer[Email, Text]].transform(df)

    result.collect(domain) should contain theSameElementsInOrderAs
      Seq(Text("example.com"), Text.empty, Text.empty, Text.empty, Text.empty, Text.empty, Text.empty,
        Text("example.com"))
  }
}

Source File: FillMissingWithMeanTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.FeatureLike
import com.salesforce.op.features.types._
import com.salesforce.op.stages.base.unary.{UnaryEstimator, UnaryModel}
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.sql.DataFrame
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

import scala.reflect.ClassTag


@RunWith(classOf[JUnitRunner])
class FillMissingWithMeanTest extends FlatSpec with TestSparkContext {
  val data = Seq[Real](Real(4.0), Real(2.0), Real.empty, Real(6.0))
  val dataNull = List.fill(7)(Real.empty)
  val binData = Seq[Binary](true.toBinary, false.toBinary, Binary.empty)

  lazy val (ds, f) = TestFeatureBuilder(data = data, f1name = "f")
  lazy val (dsi, fi) = TestFeatureBuilder(data = data.map(_.value.map(_.toLong).toIntegral), f1name = "fi")
  lazy val (dsNull, fNull) = TestFeatureBuilder(data = dataNull, f1name = "fNull")
  lazy val (dsb, fb) = TestFeatureBuilder(data = binData, f1name = "fb")

  Spec[FillMissingWithMean[_, _]] should "fill missing values with mean" in {
    assertUnaryEstimator[Real, RealNN](
      output = new FillMissingWithMean[Double, Real]().setInput(f).getOutput(),
      data = ds,
      expected = Array(4.0, 2.0, 4.0, 6.0).map(_.toRealNN)
    )
  }

  it should "fill missing values with mean from a shortcut on Real feature" in {
    assertUnaryEstimator[Real, RealNN](
      output = f.fillMissingWithMean(),
      data = ds,
      expected = Array(4.0, 2.0, 4.0, 6.0).map(_.toRealNN)
    )
  }

  it should "fill missing values with mean from a shortcut on Integral feature" in {
    assertUnaryEstimator[Real, RealNN](
      output = fi.fillMissingWithMean(),
      data = dsi,
      expected = Array(4.0, 2.0, 4.0, 6.0).map(_.toRealNN)
    )
  }

  it should "fill missing values with mean from a shortcut on Binary feature" in {
    assertUnaryEstimator[Real, RealNN](
      output = fb.fillMissingWithMean(),
      data = dsb,
      expected = Array(1.0, 0.0, 0.5).map(_.toRealNN)
    )
  }

  it should "fill a feature of only nulls with default value" in {
    val default = 3.14159
    assertUnaryEstimator[Real, RealNN](
      output = fNull.fillMissingWithMean(default = default),
      data = dsNull,
      expected = List.fill(dataNull.length)(default).map(_.toRealNN)
    )
  }

  // TODO move this assert to testkit
  private def assertUnaryEstimator[I <: FeatureType, O <: FeatureType : FeatureTypeSparkConverter : ClassTag]
  (
    output: FeatureLike[O], data: DataFrame, expected: Seq[O]
  ): Unit = {
    output.originStage shouldBe a[UnaryEstimator[_, _]]
    val model = output.originStage.asInstanceOf[UnaryEstimator[I, O]].fit(data)
    model shouldBe a[UnaryModel[_, _]]
    val transformed = model.asInstanceOf[UnaryModel[I, O]].transform(data)
    val results = transformed.collect(output)
    results should contain theSameElementsAs expected
  }
}

Source File: IsotonicRegressionCalibratorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.features.{Feature, FeatureLike}
import com.salesforce.op.stages.impl.regression.IsotonicRegressionCalibrator
import com.salesforce.op.stages.sparkwrappers.specific.OpBinaryEstimatorWrapper
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.regression.{IsotonicRegression, IsotonicRegressionModel}
import org.apache.spark.sql._
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class IsotonicRegressionCalibratorTest extends FlatSpec with TestSparkContext {

  val isoExpectedPredictions = Array(1, 2, 2, 2, 6, 16.5, 16.5, 17, 18)
  val isoExpectedModelBoundaries = Array(0, 1, 3, 4, 5, 6, 7, 8)
  val isoExpectedModelPredictions = Array(1, 2, 2, 6, 16.5, 16.5, 17.0, 18.0)

  val isoDataLabels = Seq(1, 2, 3, 1, 6, 17, 16, 17, 18)
  val isoTestData = isoDataLabels.zipWithIndex.map {
    case (label, i) => label.toRealNN -> i.toRealNN
  }

  val (isoScoresDF, isoLabels, isoScores): (DataFrame, Feature[RealNN], Feature[RealNN]) =
    TestFeatureBuilder(isoTestData)

  val antiExpectedPredictions = Array(7.0, 5.0, 4.0, 4.0, 1.0)
  val antiExpectedModelBoundaries = Array(0, 1, 2, 3, 4)
  val antiExpectedModelPredictions = Array(7.0, 5.0, 4.0, 4.0, 1.0)

  val antiDataLabels = Seq(7, 5, 3, 5, 1)
  val antiTestData = antiDataLabels.zipWithIndex.map {
    case (label, i) => label.toRealNN -> i.toRealNN
  }

  val (antiScoresDF, antiLabels, antiScores): (DataFrame, Feature[RealNN], Feature[RealNN]) =
    TestFeatureBuilder(antiTestData)

  Spec[IsotonicRegressionCalibrator] should "isotonically calibrate scores using shortcut" in {
    val calibratedScores = isoScores.toIsotonicCalibrated(isoLabels)

    val estimator = calibratedScores.originStage
      .asInstanceOf[OpBinaryEstimatorWrapper[RealNN, RealNN, RealNN, IsotonicRegression, IsotonicRegressionModel]]

    val model = estimator.fit(isoScoresDF).getSparkMlStage().get

    val predictionsDF = model.asInstanceOf[Transformer]
      .transform(isoScoresDF)

    validateOutput(calibratedScores, model, predictionsDF, true, isoExpectedPredictions, isoExpectedModelBoundaries,
      isoExpectedModelPredictions)
  }

  it should "isotonically calibrate scores" in {
    val isotonicCalibrator = new IsotonicRegressionCalibrator().setInput(isoLabels, isoScores)

    val calibratedScores = isotonicCalibrator.getOutput()

    val model = isotonicCalibrator.fit(isoScoresDF).getSparkMlStage().get

    val predictionsDF = model.asInstanceOf[Transformer]
      .transform(isoScoresDF)

    validateOutput(calibratedScores, model, predictionsDF, true, isoExpectedPredictions, isoExpectedModelBoundaries,
      isoExpectedModelPredictions)
  }

  it should "antitonically calibrate scores" in {
    val isIsotonic: Boolean = false
    val isotonicCalibrator = new IsotonicRegressionCalibrator().setIsotonic(isIsotonic).setInput(isoLabels, isoScores)

    val calibratedScores = isotonicCalibrator.getOutput()

    val model = isotonicCalibrator.fit(antiScoresDF).getSparkMlStage().get

    val predictionsDF = model.asInstanceOf[Transformer]
      .transform(antiScoresDF)

    validateOutput(calibratedScores, model, predictionsDF, isIsotonic, antiExpectedPredictions,
      antiExpectedModelBoundaries, antiExpectedModelPredictions)
  }

  def validateOutput(calibratedScores: FeatureLike[RealNN],
    model: IsotonicRegressionModel, predictionsDF: DataFrame, expectedIsIsotonic: Boolean,
    expectedPredictions: Array[Double], expectedModelBoundaries: Array[Int],
    expectedModelPredictions: Array[Double]): Unit = {

    val predictions = predictionsDF.select(calibratedScores.name).rdd.map { case Row(pred) => pred }.collect()
    val isIsotonic = model.getIsotonic

    isIsotonic should be(expectedIsIsotonic)
    predictions should contain theSameElementsInOrderAs expectedPredictions
    model.boundaries.toArray should contain theSameElementsInOrderAs expectedModelBoundaries
    model.predictions.toArray should contain theSameElementsInOrderAs expectedModelPredictions
  }
}

Source File: OpWord2VecTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class OpWord2VecTest extends FlatSpec with TestSparkContext {

  val data = Seq(
    "I I I like like Spark".split(" "),
    "Hi I heard about Spark".split(" "),
    "I wish Java could use case classes".split(" "),
    "Logistic regression models are neat".split(" ")
  ).map(_.toSeq.toTextList)

  lazy val (inputData, f1) = TestFeatureBuilder(Seq(data.head))
  lazy val (testData, _) = TestFeatureBuilder(data.tail)

  lazy val expected = data.tail.zip(Seq(
    Vectors.dense(-0.029884086549282075, -0.055613189935684204, 0.04186216294765473).toOPVector,
    Vectors.dense(-0.0026281912411962234, -0.016138136386871338, 0.010740748473576136).toOPVector,
    Vectors.dense(0.0, 0.0, 0.0).toOPVector
  )).toArray

  Spec[OpWord2VecTest] should "convert array of strings into a vector" in {
    val f1Vec = new OpWord2Vec().setInput(f1).setMinCount(0).setVectorSize(3).setSeed(1234567890L)
    val output = f1Vec.getOutput()
    val testTransformedData = f1Vec.fit(inputData).transform(testData)
    testTransformedData.orderBy(f1.name).collect(f1, output) shouldBe expected
  }

  it should "convert array of strings into a vector (shortcut version)" in {
    val output = f1.word2vec(minCount = 0, vectorSize = 3)
    val f1Vec = output.originStage.asInstanceOf[OpWord2Vec].setSeed(1234567890L)
    val testTransformedData = f1Vec.fit(inputData).transform(testData)
    testTransformedData.orderBy(f1.name).collect(f1, output) shouldBe expected
  }

}

Source File: OpStringIndexerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op.features.types._
import com.salesforce.op.features.types.Text
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import com.salesforce.op.utils.spark.RichDataset._

@RunWith(classOf[JUnitRunner])
class OpStringIndexerTest extends FlatSpec with TestSparkContext {

  val txtData = Seq("a", "b", "c", "a", "a", "c").map(_.toText)
  val (ds, txtF) = TestFeatureBuilder(txtData)
  val expected = Array(0.0, 2.0, 1.0, 0.0, 0.0, 1.0).map(_.toRealNN)


  Spec[OpStringIndexer[_]] should "correctly set the wrapped spark stage params" in {
    val indexer = new OpStringIndexer[Text]()
    indexer.setHandleInvalid(StringIndexerHandleInvalid.Skip)
    indexer.getSparkMlStage().get.getHandleInvalid shouldBe StringIndexerHandleInvalid.Skip.entryName.toLowerCase
    indexer.setHandleInvalid(StringIndexerHandleInvalid.Error)
    indexer.getSparkMlStage().get.getHandleInvalid shouldBe StringIndexerHandleInvalid.Error.entryName.toLowerCase
    indexer.setHandleInvalid(StringIndexerHandleInvalid.Keep)
    indexer.getSparkMlStage().get.getHandleInvalid shouldBe StringIndexerHandleInvalid.Keep.entryName.toLowerCase
  }

  it should "throw an error if you try to set noFilter as the indexer" in {
    val indexer = new OpStringIndexer[Text]()
    intercept[IllegalArgumentException](indexer.setHandleInvalid(StringIndexerHandleInvalid.NoFilter))
  }

  it should "correctly index a text column" in {
    val stringIndexer = new OpStringIndexer[Text]().setInput(txtF)
    val indices = stringIndexer.fit(ds).transform(ds).collect(stringIndexer.getOutput())

    indices shouldBe expected
  }

  it should "correctly deindex a numeric column" in {
    val indexedStage = new OpStringIndexer[Text]().setInput(txtF)
    val indexed = indexedStage.getOutput()
    val indices = indexedStage.fit(ds).transform(ds)
    val deindexedStage = new OpIndexToString().setInput(indexed)
    val deindexed = deindexedStage.getOutput()
    val deindexedData = deindexedStage.transform(indices).collect(deindexed)
    deindexedData shouldBe txtData
  }

}

Source File: LinearScalerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op.test.TestSparkContext
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class LinearScalerTest extends FlatSpec with TestSparkContext {

  Spec[LinearScaler] should "Error on construction of a non-invertible transformation" in {
    val error = intercept[java.lang.IllegalArgumentException](
      LinearScaler(LinearScalerArgs(slope = 0.0, intercept = 1.0))
    )
    error.getMessage shouldBe "requirement failed: LinearScaler must have a non-zero slope to be invertible"
  }

  it should "correctly construct the linear scaling and inverse scaling function" in {
    val sampleData = Seq(0.0, 1.0, 2.0, 3.0, 4.0)
    val scaler = LinearScaler(LinearScalerArgs(slope = 2.0, intercept = 1.0))
    sampleData.map(x => scaler.scale(x)) shouldEqual sampleData.map(x => 2.0*x + 1.0)
    sampleData.map(x => scaler.descale(x)) shouldEqual sampleData.map(x => 0.5*x - 0.5)
  }
}

Source File: IDFTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.feature.IDF
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
import org.apache.spark.ml.{Estimator, Transformer}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class IDFTest extends FlatSpec with TestSparkContext {

  val data = Seq(
    Vectors.sparse(4, Array(1, 3), Array(1.0, 2.0)),
    Vectors.dense(0.0, 1.0, 2.0, 3.0),
    Vectors.sparse(4, Array(1), Array(1.0))
  )

  lazy val (ds, f1) = TestFeatureBuilder(data.map(_.toOPVector))

  Spec[IDF] should "compute inverted document frequency" in {
    val idf = f1.idf()
    val model = idf.originStage.asInstanceOf[Estimator[_]].fit(ds)
    val transformedData = model.asInstanceOf[Transformer].transform(ds)
    val results = transformedData.select(idf.name).collect(idf)

    idf.name shouldBe idf.originStage.getOutputFeatureName

    val expectedIdf = Vectors.dense(Array(0, 3, 1, 2).map { x =>
      math.log((data.length + 1.0) / (x + 1.0))
    })
    val expected = scaleDataWithIDF(data, expectedIdf)

    for {
      (res, exp) <- results.zip(expected)
      (x, y) <- res.value.toArray.zip(exp.toArray)
    } assert(math.abs(x - y) <= 1e-5)
  }

  it should "compute inverted document frequency when minDocFreq is 1" in {
    val idf = f1.idf(minDocFreq = 1)
    val model = idf.originStage.asInstanceOf[Estimator[_]].fit(ds)
    val transformedData = model.asInstanceOf[Transformer].transform(ds)
    val results = transformedData.select(idf.name).collect(idf)
    idf.name shouldBe idf.originStage.getOutputFeatureName

    val expectedIdf = Vectors.dense(Array(0, 3, 1, 2).map { x =>
      if (x > 0) math.log((data.length + 1.0) / (x + 1.0)) else 0
    })
    val expected = scaleDataWithIDF(data, expectedIdf)

    for {
      (res, exp) <- results.zip(expected)
      (x, y) <- res.value.toArray.zip(exp.toArray)
    } assert(math.abs(x - y) <= 1e-5)
  }

  private def scaleDataWithIDF(dataSet: Seq[Vector], model: Vector): Seq[Vector] = {
    dataSet.map {
      case data: DenseVector =>
        val res = data.toArray.zip(model.toArray).map { case (x, y) => x * y }
        Vectors.dense(res)
      case data: SparseVector =>
        val res = data.indices.zip(data.values).map { case (id, value) =>
          (id, value * model(id))
        }
        Vectors.sparse(data.size, res)
    }
  }

}

Source File: OpCountVectorizerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.test.TestOpVectorColumnType.{IndCol, IndVal}
import com.salesforce.op.test.{TestFeatureBuilder, TestOpVectorMetadataBuilder, TestSparkContext}
import com.salesforce.op.utils.spark.OpVectorMetadata
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec, Matchers}


@RunWith(classOf[JUnitRunner])
class OpCountVectorizerTest extends FlatSpec with TestSparkContext {

  val data = Seq[(Real, TextList)](
    (Real(0), Seq("a", "b", "c").toTextList),
    (Real(1), Seq("a", "b", "b", "b", "a", "c").toTextList)
  )

  lazy val (ds, f1, f2) = TestFeatureBuilder(data)

  lazy val expected = Array[(Real, OPVector)](
    (Real(0), Vectors.sparse(3, Array(0, 1, 2), Array(1.0, 1.0, 1.0)).toOPVector),
    (Real(1), Vectors.sparse(3, Array(0, 1, 2), Array(3.0, 2.0, 1.0)).toOPVector)
  )

  val f2vec = new OpCountVectorizer().setInput(f2).setVocabSize(3).setMinDF(2)

  Spec[OpCountVectorizerTest] should "convert array of strings into count vector" in {
    val transformedData = f2vec.fit(ds).transform(ds)
    val output = f2vec.getOutput()
    transformedData.orderBy(f1.name).collect(f1, output) should contain theSameElementsInOrderAs expected
  }

  it should "return the a fitted vectorizer with the correct parameters" in {
    val fitted = f2vec.fit(ds)
    val vectorMetadata = fitted.getMetadata()
    val expectedMeta = TestOpVectorMetadataBuilder(
      f2vec,
      f2 -> List(IndVal(Some("b")), IndVal(Some("a")), IndVal(Some("c")))
    )
    // cannot just do equals because fitting is nondeterministic
    OpVectorMetadata(f2vec.getOutputFeatureName, vectorMetadata).columns should contain theSameElementsAs
      expectedMeta.columns
  }

  it should "convert array of strings into count vector (shortcut version)" in {
    val output = f2.countVec(minDF = 2, vocabSize = 3)
    val f2vec = output.originStage.asInstanceOf[OpCountVectorizer]
    val transformedData = f2vec.fit(ds).transform(ds)
    transformedData.orderBy(f1.name).collect(f1, output) should contain theSameElementsInOrderAs expected
  }
}

Source File: ScalerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op.test.TestSparkContext
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class ScalerTest extends FlatSpec with TestSparkContext {

  Spec[Scaler] should "error on invalid data" in {
    val error = intercept[IllegalArgumentException](
      Scaler.apply(scalingType = ScalingType.Linear, args = EmptyScalerArgs())
    )
    error.getMessage shouldBe
      s"Invalid combination of scaling type '${ScalingType.Linear}' " +
        s"and args type '${EmptyScalerArgs().getClass.getSimpleName}'"
  }

  it should "correctly build construct a LinearScaler" in {
    val linearScaler = Scaler.apply(scalingType = ScalingType.Linear,
      args = LinearScalerArgs(slope = 1.0, intercept = 2.0))
    linearScaler shouldBe a[LinearScaler]
    linearScaler.scalingType shouldBe ScalingType.Linear
  }

  it should "correctly build construct a LogScaler" in {
    val linearScaler = Scaler.apply(scalingType = ScalingType.Logarithmic, args = EmptyScalerArgs())
    linearScaler shouldBe a[LogScaler]
    linearScaler.scalingType shouldBe ScalingType.Logarithmic
  }
}

Source File: TransmogrifierTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op.features._
import com.salesforce.op.features.types._
import com.salesforce.op.test.TestOpVectorColumnType._
import com.salesforce.op.test.{PassengerSparkFixtureTest, TestOpVectorMetadataBuilder}
import com.salesforce.op.utils.spark.RichDataset._
import com.salesforce.op.utils.spark.RichStructType._
import com.salesforce.op._
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class TransmogrifierTest extends FlatSpec with PassengerSparkFixtureTest with AttributeAsserts {

  val inputFeatures = Array[OPFeature](heightNoWindow, weight, gender)

  Spec(Transmogrifier.getClass) should "return a single output feature of type vector with the correct name" in {
    val feature = inputFeatures.transmogrify()
    feature.name.contains("gender-heightNoWindow-weight_3-stagesApplied_OPVector")
  }

  it should "return a model when fitted" in {
    val feature = inputFeatures.transmogrify()
    val model = new OpWorkflow().setResultFeatures(feature).setReader(dataReader).train()

    model.getResultFeatures() should contain theSameElementsAs Array(feature)
    val name = model.getResultFeatures().map(_.name).head
    name.contains("gender-heightNoWindow-weight_3-stagesApplied_OPVector")
  }

  it should "correctly transform the data and store the feature names in metadata" in {
    val feature = inputFeatures.toSeq.transmogrify()
    val model = new OpWorkflow().setResultFeatures(feature).setReader(dataReader).train()
    val transformed = model.score(keepRawFeatures = true, keepIntermediateFeatures = true)
    val hist = feature.parents.flatMap { f =>
      val h = f.history()
      h.originFeatures.map(o => o -> FeatureHistory(Seq(o), h.stages))
    }.toMap
    transformed.schema.toOpVectorMetadata(feature.name) shouldEqual
      TestOpVectorMetadataBuilder.withOpNamesAndHist(
        feature.originStage,
        hist,
        (gender, "vecSet", List(IndCol(Some("OTHER")), IndCol(Some(TransmogrifierDefaults.NullString)))),
        (heightNoWindow, "vecReal", List(RootCol,
          IndColWithGroup(Some(TransmogrifierDefaults.NullString), heightNoWindow.name))),
        (weight, "vecReal", List(RootCol, IndColWithGroup(Some(TransmogrifierDefaults.NullString), weight.name)))
      )

    transformed.schema.findFields("heightNoWindow-weight_1-stagesApplied_OPVector").nonEmpty shouldBe true

    val collected = transformed.collect(feature)

    collected.head.v.size shouldEqual 6
    collected.map(_.v.toArray.toList).toSet shouldEqual
      Set(
        List(0.0, 1.0, 211.4, 1.0, 96.0, 1.0),
        List(1.0, 0.0, 172.0, 0.0, 78.0, 0.0),
        List(1.0, 0.0, 168.0, 0.0, 67.0, 0.0),
        List(1.0, 0.0, 363.0, 0.0, 172.0, 0.0),
        List(1.0, 0.0, 186.0, 0.0, 96.0, 0.0)
      )
    val field = transformed.schema(feature.name)
    assertNominal(field, Array(false, true, false, true, false, true), collected)
  }

}

Source File: Base64VectorizerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import com.salesforce.op.OpWorkflow
import com.salesforce.op.features.FeatureLike
import com.salesforce.op.features.types._
import com.salesforce.op.test.TestSparkContext
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class Base64VectorizerTest extends FlatSpec with TestSparkContext with Base64TestData with AttributeAsserts {

  "Base64Vectorizer" should "vectorize random binary data" in {
    val vec = randomBase64.vectorize(topK = 10, minSupport = 0, cleanText = true, trackNulls = false)
    val result = new OpWorkflow().setResultFeatures(vec).transform(randomData)

    result.collect(vec) should contain theSameElementsInOrderAs
      OPVector(Vectors.dense(0.0, 0.0)) +:
        Array.fill(expectedRandom.length - 1)(OPVector(Vectors.dense(1.0, 0.0)))
  }
  it should "vectorize some real binary content" in {
    val vec = realBase64.vectorize(topK = 10, minSupport = 0, cleanText = true)
    assertVectorizer(vec, expectedMime)
  }
  it should "vectorize some real binary content with a type hint" in {
    val vec = realBase64.vectorize(topK = 10, minSupport = 0, cleanText = true, typeHint = Some("application/json"))
    assertVectorizer(vec, expectedMimeJson)
  }

  def assertVectorizer(vec: FeatureLike[OPVector], expected: Seq[Text]): Unit = {
    val result = new OpWorkflow().setResultFeatures(vec).transform(realData)
    val vectors = result.collect(vec)
    val schema = result.schema(vec.name)
    assertNominal(schema, Array.fill(vectors.head.value.size)(true), vectors)

    vectors.length shouldBe expected.length
    // TODO add a more robust check
  }

}

Source File: DataSplitterTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.tuning

import com.salesforce.op.test.TestSparkContext
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.mllib.random.RandomRDDs
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class DataSplitterTest extends FlatSpec with TestSparkContext with SplitterSummaryAsserts {
  import spark.implicits._

  val seed = 1234L
  val dataCount = 1000
  val trainingLimitDefault = 1E6.toLong

  val data =
    RandomRDDs.normalVectorRDD(sc, 1000, 3, seed = seed)
      .map(v => (1.0, Vectors.dense(v.toArray), "A")).toDF()

  val dataSplitter = DataSplitter(seed = seed)

  Spec[DataSplitter] should "split the data in the appropriate proportion - 0.0" in {
    val (train, test) = dataSplitter.setReserveTestFraction(0.0).split(data)
    test.count() shouldBe 0
    train.count() shouldBe dataCount
  }

  it should "down-sample when the data count is above the default training limit" in {
    val numRows = trainingLimitDefault * 2
    val data =
      RandomRDDs.normalVectorRDD(sc, numRows, 3, seed = seed)
        .map(v => (1.0, Vectors.dense(v.toArray), "A")).toDF()
    dataSplitter.preValidationPrepare(data)

    val dataBalanced = dataSplitter.validationPrepare(data)
    // validationPrepare calls the data sample method that samples the data to a target ratio but there is an epsilon
    // to how precise this function is which is why we need to check around that epsilon
    val samplingErrorEpsilon = (0.1 * trainingLimitDefault).toLong

    dataBalanced.count() shouldBe trainingLimitDefault +- samplingErrorEpsilon
  }

  it should "set and get all data splitter params" in {
    val maxRows = dataCount / 2
    val downSampleFraction = maxRows / dataCount.toDouble

    val dataSplitter = DataSplitter()
      .setReserveTestFraction(0.0)
      .setSeed(seed)
      .setMaxTrainingSample(maxRows)
      .setDownSampleFraction(downSampleFraction)

    dataSplitter.getReserveTestFraction shouldBe 0.0
    dataSplitter.getDownSampleFraction shouldBe downSampleFraction
    dataSplitter.getSeed shouldBe seed
    dataSplitter.getMaxTrainingSample shouldBe maxRows
  }

  it should "split the data in the appropriate proportion - 0.2" in {
    val (train, test) = dataSplitter.setReserveTestFraction(0.2).split(data)
    math.abs(test.count() - 200) < 30 shouldBe true
    math.abs(train.count() - 800) < 30 shouldBe true
  }

  it should "split the data in the appropriate proportion - 0.6" in {
    val (train, test) = dataSplitter.setReserveTestFraction(0.6).split(data)
    math.abs(test.count() - 600) < 30 shouldBe true
    math.abs(train.count() - 400) < 30 shouldBe true
  }

  it should "keep the data unchanged when prepare is called" in {
    val dataCount = data.count()
    val summary = dataSplitter.preValidationPrepare(data)
    val train = dataSplitter.validationPrepare(data)
    val sampleF = trainingLimitDefault / dataCount.toDouble
    val downSampleFraction = math.min(sampleF, 1.0)
    train.collect().zip(data.collect()).foreach { case (a, b) => a shouldBe b }
    assertDataSplitterSummary(summary.summaryOpt) { s => s shouldBe DataSplitterSummary(dataCount, downSampleFraction) }
  }

}

Source File: RandomParamBuilderTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.selector

import com.salesforce.op.stages.impl.classification.{OpLogisticRegression, OpRandomForestClassifier, OpXGBoostClassifier}
import com.salesforce.op.test.TestSparkContext
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class RandomParamBuilderTest extends FlatSpec with TestSparkContext {

  private val lr = new OpLogisticRegression()
  private val rf = new OpRandomForestClassifier()
  private val xgb = new OpXGBoostClassifier()


  Spec[RandomParamBuilder] should "build a param grid of the desired length with one param variable" in {
    val min = 0.00001
    val max = 10
    val lrParams = new RandomParamBuilder()
      .uniform(lr.regParam, min, max)
      .build(5)
    lrParams.length shouldBe 5
    lrParams.foreach(_.toSeq.length shouldBe 1)
    lrParams.foreach(_.toSeq.foreach( p => (p.value.asInstanceOf[Double] < max &&
      p.value.asInstanceOf[Double] > min) shouldBe true))
    lrParams.foreach(_.toSeq.map(_.param).toSet shouldBe Set(lr.regParam))

    val lrParams2 = new RandomParamBuilder()
      .exponential(lr.regParam, min, max)
      .build(20)
    lrParams2.length shouldBe 20
    lrParams2.foreach(_.toSeq.length shouldBe 1)
    lrParams2.foreach(_.toSeq.foreach( p => (p.value.asInstanceOf[Double] < max &&
      p.value.asInstanceOf[Double] > min) shouldBe true))
    lrParams2.foreach(_.toSeq.map(_.param).toSet shouldBe Set(lr.regParam))
  }

  it should "build a param grid of the desired length with many param variables" in {
    val lrParams = new RandomParamBuilder()
      .exponential(lr.regParam, .000001, 10)
      .subset(lr.family, Seq("auto", "binomial", "multinomial"))
      .uniform(lr.maxIter, 2, 50)
      .build(23)
    lrParams.length shouldBe 23
    lrParams.foreach(_.toSeq.length shouldBe 3)
    lrParams.foreach(_.toSeq.map(_.param).toSet shouldBe Set(lr.regParam, lr.family, lr.maxIter))
  }

  it should "work for all param types" in {
    val xgbParams = new RandomParamBuilder()
      .subset(xgb.checkpointPath, Seq("a", "b")) // string
      .uniform(xgb.alpha, 0, 1) // double
      .uniform(xgb.missing, 0, 100) // float
      .uniform(xgb.checkpointInterval, 2, 5) // int
      .uniform(xgb.seed, 5, 1000) // long
      .uniform(xgb.useExternalMemory) // boolean
      .exponential(xgb.baseScore, 0.0001, 1) // double
      .exponential(xgb.missing, 0.000001F, 1) // float - overwrites first call
      .build(2)

    xgbParams.length shouldBe 2
    xgbParams.foreach(_.toSeq.length shouldBe 7)
    xgbParams.foreach(_.toSeq.map(_.param).toSet shouldBe Set(xgb.checkpointPath, xgb.alpha, xgb.missing,
      xgb.checkpointInterval, xgb.seed, xgb.useExternalMemory, xgb.baseScore))
  }

  it should "throw a requirement error if an improper min value is passed in for exponential scale" in {
    intercept[IllegalArgumentException]( new RandomParamBuilder()
      .exponential(xgb.baseScore, 0, 1)).getMessage() shouldBe
      "requirement failed: Min value must be greater than zero for exponential distribution to work"
  }

  it should "throw a requirement error if an min max are passed in" in {
    intercept[IllegalArgumentException]( new RandomParamBuilder()
      .uniform(xgb.baseScore, 1, 0)).getMessage() shouldBe
      "requirement failed: Min must be less than max"
  }
}

Source File: PredictionDeIndexerTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.preparators


import com.salesforce.op.utils.spark.RichDataset._
import com.salesforce.op.features.types._
import com.salesforce.op.stages.base.unary.UnaryLambdaTransformer
import com.salesforce.op.stages.impl.feature.OpStringIndexerNoFilter
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class PredictionDeIndexerTest extends FlatSpec with TestSparkContext {

  val data = Seq(("a", 0.0), ("b", 1.0), ("c", 2.0)).map { case (txt, num) => (txt.toText, num.toRealNN) }
  val (ds, txtF, numF) = TestFeatureBuilder(data)

  val response = txtF.indexed()
  val indexedData = response.originStage.asInstanceOf[OpStringIndexerNoFilter[_]].fit(ds).transform(ds)

  val permutation = new UnaryLambdaTransformer[RealNN, RealNN](
    operationName = "modulo",
    transformFn = v => ((v.value.get + 1).toInt % 3).toRealNN
  ).setInput(response)
  val pred = permutation.getOutput()
  val permutedData = permutation.transform(indexedData)

  val expected = Array("b", "c", "a").map(_.toText)

  Spec[PredictionDeIndexer] should "deindexed the feature correctly" in {
    val predDeIndexer = new PredictionDeIndexer().setInput(response, pred)
    val deIndexed = predDeIndexer.getOutput()

    val results = predDeIndexer.fit(permutedData).transform(permutedData).collect(deIndexed)
    results shouldBe expected
  }


  it should "throw a nice error when there is no metadata" in {
    val predDeIndexer = new PredictionDeIndexer().setInput(numF, pred)
    the[Error] thrownBy {
      predDeIndexer.fit(permutedData).transform(permutedData)
    } should have message
      s"The feature ${numF.name} does not contain any label/index mapping in its metadata"
  }
}

Source File: MinVarianceFilterMetadataTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.preparators

import com.salesforce.op.stages.impl.preparators.MinVarianceSummary.statisticsFromMetadata
import com.salesforce.op.test.TestSparkContext
import com.salesforce.op.utils.spark.RichMetadata._
import org.apache.spark.sql.types.Metadata
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class MinVarianceFilterMetadataTest extends FlatSpec with TestSparkContext {

  val summary = MinVarianceSummary(
    dropped = Seq("f1"),
    featuresStatistics = SummaryStatistics(3, 0.01, Seq(0.1, 0.2, 0.3), Seq(0.1, 0.2, 0.3),
      Seq(0.1, 0.2, 0.3), Seq(0.1, 0.2, 0.3)),
    names = Seq("f1", "f2", "f3")
  )

  Spec[MinVarianceSummary] should "convert to and from metadata correctly" in {
    val meta = summary.toMetadata()
    meta.isInstanceOf[Metadata] shouldBe true

    val retrieved = MinVarianceSummary.fromMetadata(meta)
    retrieved.isInstanceOf[MinVarianceSummary]

    retrieved.dropped should contain theSameElementsAs summary.dropped
    retrieved.featuresStatistics.count shouldBe summary.featuresStatistics.count
    retrieved.featuresStatistics.max should contain theSameElementsAs summary.featuresStatistics.max
    retrieved.featuresStatistics.min should contain theSameElementsAs summary.featuresStatistics.min
    retrieved.featuresStatistics.mean should contain theSameElementsAs summary.featuresStatistics.mean
    retrieved.featuresStatistics.variance should contain theSameElementsAs summary.featuresStatistics.variance
    retrieved.names should contain theSameElementsAs summary.names
  }

  it should "convert to and from JSON and give the same values" in {
    val meta = summary.toMetadata()
    val json = meta.wrapped.prettyJson
    val recovered = Metadata.fromJson(json).wrapped
    val dropped = recovered.getArray[String](MinVarianceNames.Dropped).toSeq
    val stats = statisticsFromMetadata(recovered.get[Metadata](MinVarianceNames.FeaturesStatistics))
    val names = recovered.getArray[String](MinVarianceNames.Names).toSeq

    dropped should contain theSameElementsAs summary.dropped
    stats.count shouldBe summary.featuresStatistics.count
    stats.max should contain theSameElementsAs summary.featuresStatistics.max
    stats.min should contain theSameElementsAs summary.featuresStatistics.min
    stats.mean should contain theSameElementsAs summary.featuresStatistics.mean
    stats.variance should contain theSameElementsAs summary.featuresStatistics.variance
    names should contain theSameElementsAs summary.names
  }

}

Source File: OpRegressionModelTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.regression

import com.salesforce.op.features.types.{Prediction, RealNN}
import com.salesforce.op.stages.sparkwrappers.specific.SparkModelConverter.toOP
import com.salesforce.op.test._
import com.salesforce.op.testkit._
import ml.dmlc.xgboost4j.scala.spark.{OpXGBoost, OpXGBoostQuietLogging, XGBoostRegressor}
import org.apache.spark.ml.regression._
import org.apache.spark.sql.DataFrame
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class OpRegressionModelTest extends FlatSpec with TestSparkContext with OpXGBoostQuietLogging {

  private val label = RandomIntegral.integrals(0, 2).limit(1000)
    .map{ v => RealNN(v.value.map(_.toDouble).getOrElse(0.0)) }
  private val fv = RandomVector.binary(10, 0.3).limit(1000)

  private val data = label.zip(fv)

  private val (rawDF, labelF, featureV) = TestFeatureBuilder("label", "features", data)

  Spec[OpDecisionTreeRegressionModel] should "produce the same values as the spark version" in {
    val spk = new DecisionTreeRegressor()
      .setFeaturesCol(featureV.name)
      .setLabelCol(labelF.name)
      .fit(rawDF)

    val op = toOP(spk, spk.uid).setInput(labelF, featureV)
    compareOutputs(spk.transform(rawDF), op.transform(rawDF))
  }

  Spec[OpLinearRegressionModel] should "produce the same values as the spark version" in {
    val spk = new LinearRegression()
      .setFeaturesCol(featureV.name)
      .setLabelCol(labelF.name)
      .fit(rawDF)

    val op = toOP(spk, spk.uid).setInput(labelF, featureV)
    compareOutputs(spk.transform(rawDF), op.transform(rawDF))
  }

  Spec[OpGBTRegressionModel] should "produce the same values as the spark version" in {
    val spk = new GBTRegressor()
      .setFeaturesCol(featureV.name)
      .setLabelCol(labelF.name)
      .fit(rawDF)

    val op = toOP(spk, spk.uid).setInput(labelF, featureV)
    compareOutputs(spk.transform(rawDF), op.transform(rawDF))
  }

  Spec[OpRandomForestRegressionModel] should "produce the same values as the spark version" in {
    val spk = new RandomForestRegressor()
      .setFeaturesCol(featureV.name)
      .setLabelCol(labelF.name)
      .fit(rawDF)

    val op = toOP(spk, spk.uid).setInput(labelF, featureV)
    compareOutputs(spk.transform(rawDF), op.transform(rawDF))
  }

  Spec[OpGeneralizedLinearRegressionModel] should "produce the same values as the spark version" in {
    val spk = new GeneralizedLinearRegression()
      .setFeaturesCol(featureV.name)
      .setLabelCol(labelF.name)
      .fit(rawDF)

    val op = toOP(spk, spk.uid).setInput(labelF, featureV)
    compareOutputs(spk.transform(rawDF), op.transform(rawDF))
  }

  Spec[OpXGBoostRegressionModel] should "produce the same values as the spark version" in {
    val reg = new XGBoostRegressor()
    reg.set(reg.trackerConf, OpXGBoost.DefaultTrackerConf)
      .setFeaturesCol(featureV.name)
      .setLabelCol(labelF.name)
    val spk = reg.fit(rawDF)

    val op = toOP(spk, spk.uid).setInput(labelF, featureV)
    compareOutputs(spk.transform(rawDF), op.transform(rawDF))
  }

  def compareOutputs(df1: DataFrame, df2: DataFrame): Unit = {
    val sorted1 = df1.collect().sortBy(_.getAs[Double](2))
    val sorted2 = df2.collect().sortBy(_.getAs[Map[String, Double]](2)(Prediction.Keys.PredictionName))
    sorted1.zip(sorted2).foreach{ case (r1, r2) =>
      val map = r2.getAs[Map[String, Double]](2)
      r1.getAs[Double](2) shouldEqual map(Prediction.Keys.PredictionName)
    }
  }
}

Source File: OpPipelineStageReaderWriterTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages

import com.salesforce.op.features._
import com.salesforce.op.features.types._
import com.salesforce.op.stages.OpPipelineStageReaderWriter._
import com.salesforce.op.test.PassengerSparkFixtureTest
import com.salesforce.op.utils.reflection.ReflectionUtils
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.spark.ml.{Model, Transformer}
import org.apache.spark.sql.types.{DataType, Metadata, MetadataBuilder}
import org.json4s.JsonAST.JValue
import org.json4s.jackson.JsonMethods.{compact, parse, pretty, render}
import org.json4s.{JArray, JObject}
import org.scalatest.FlatSpec
import org.slf4j.LoggerFactory


// TODO: consider adding a read/write test for a spark wrapped stage as well
private[stages] abstract class OpPipelineStageReaderWriterTest
  extends FlatSpec with PassengerSparkFixtureTest {

  val meta = new MetadataBuilder().putString("foo", "bar").build()
  val expectedFeaturesLength = 1
  def stage: OpPipelineStageBase with Transformer
  val expected: Array[Real]
  val hasOutputName = true

  private val log = LoggerFactory.getLogger(this.getClass)
  private lazy val savePath = tempDir + "/" + this.getClass.getSimpleName + "-" + System.currentTimeMillis()
  private lazy val writer = new OpPipelineStageWriter(stage)
  private lazy val stageJsonString: String = writer.writeToJsonString(savePath)
  private lazy val stageJson: JValue = parse(stageJsonString)
  private lazy val isModel = stage.isInstanceOf[Model[_]]
  private val FN = FieldNames

  Spec(this.getClass) should "write stage uid" in {
    log.info(pretty(stageJson))
    (stageJson \ FN.Uid.entryName).extract[String] shouldBe stage.uid
  }
  it should "write class name" in {
    (stageJson \ FN.Class.entryName).extract[String] shouldBe stage.getClass.getName
  }
  it should "write params map" in {
    val params = extractParams(stageJson).extract[Map[String, Any]]
    if (hasOutputName) {
      params should have size 4
      params.keys shouldBe Set("inputFeatures", "outputMetadata", "inputSchema", "outputFeatureName")
    } else {
      params should have size 3
      params.keys shouldBe Set("inputFeatures", "outputMetadata", "inputSchema")
    }
  }
  it should "write outputMetadata" in {
    val params = extractParams(stageJson)
    val metadataStr = compact(render(extractParams(stageJson) \ "outputMetadata"))
    val metadata = Metadata.fromJson(metadataStr)
    metadata shouldBe stage.getMetadata()
  }
  it should "write inputSchema" in {
    val schemaStr = compact(render(extractParams(stageJson) \ "inputSchema"))
    val schema = DataType.fromJson(schemaStr)
    schema shouldBe stage.getInputSchema()
  }
  it should "write input features" in {
    val jArray = (extractParams(stageJson) \ "inputFeatures").extract[JArray]
    jArray.values should have length expectedFeaturesLength
    val obj = jArray(0).extract[JObject]
    obj.values.keys shouldBe Set("name", "isResponse", "isRaw", "uid", "typeName", "stages", "originFeatures")
  }
  it should "write model ctor args" in {
    if (stage.isInstanceOf[Model[_]]) {
      val ctorArgs = (stageJson \ FN.CtorArgs.entryName).extract[JObject]
      val (_, args) = ReflectionUtils.bestCtorWithArgs(stage)
      ctorArgs.values.keys shouldBe args.map(_._1).toSet
    }
  }
  it should "load stage correctly" in {
    val reader = new OpPipelineStageReader(stage)
    val stageLoaded = reader.loadFromJsonString(stageJsonString, path = savePath)
    stageLoaded shouldBe a[OpPipelineStageBase]
    stageLoaded shouldBe a[Transformer]
    stageLoaded.getOutput() shouldBe a[FeatureLike[_]]
    val _ = stage.asInstanceOf[Transformer].transform(passengersDataSet)
    val transformed = stageLoaded.asInstanceOf[Transformer].transform(passengersDataSet)
    transformed.collect(stageLoaded.getOutput().asInstanceOf[FeatureLike[Real]]) shouldBe expected
    stageLoaded.uid shouldBe stage.uid
    stageLoaded.operationName shouldBe stage.operationName
    stageLoaded.getInputFeatures() shouldBe stage.getInputFeatures()
    stageLoaded.getInputSchema() shouldBe stage.getInputSchema()
  }

  private def extractParams(stageJson: JValue): JValue = {
    val defaultParamsMap = stageJson \ FN.DefaultParamMap.entryName
    val paramsMap = stageJson \ FN.ParamMap.entryName
    defaultParamsMap.merge(paramsMap)
  }

}

Source File: FeatureJsonHelperTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features

import com.salesforce.op._
import com.salesforce.op.test.{PassengerFeaturesTest, TestCommon}
import org.json4s.MappingException
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class FeatureJsonHelperTest extends FlatSpec with PassengerFeaturesTest with TestCommon {

  trait DifferentParents {
    val feature = height + weight
    val stages = Map(feature.originStage.uid -> feature.originStage)
    val features = Map(height.uid -> height, weight.uid -> weight)
  }

  trait SameParents {
    val feature = height + height
    val stages = Map(feature.originStage.uid -> feature.originStage)
    val features = Map(height.uid -> height, height.uid -> height)
  }

  Spec(FeatureJsonHelper.getClass) should "serialize/deserialize a feature properly" in new DifferentParents {
    val json = feature.toJson()
    val parsedFeature = FeatureJsonHelper.fromJsonString(json, stages, features)
    if (parsedFeature.isFailure) fail(s"Failed to deserialize from json: $json", parsedFeature.failed.get)

    val res = parsedFeature.get
    res shouldBe a[Feature[_]]
    res.equals(feature) shouldBe true
    res.uid shouldBe feature.uid
    res.wtt.tpe =:= feature.wtt.tpe shouldBe true
  }

  it should "deserialize a set of parent features from one reference" in new SameParents {
    val json = feature.toJson()
    val parsedFeature = FeatureJsonHelper.fromJsonString(feature.toJson(), stages, features)
    if (parsedFeature.isFailure) fail(s"Failed to deserialize from json: $json", parsedFeature.failed.get)

    val res = parsedFeature.get
    res.equals(feature) shouldBe true
    res.wtt.tpe =:= feature.wtt.tpe shouldBe true
  }

  it should "fail to deserialize invalid json" in new DifferentParents {
    val res = FeatureJsonHelper.fromJsonString("{}", stages, features)
    res.isFailure shouldBe true
    res.failed.get shouldBe a[MappingException]
  }

  it should "fail when origin stage is not found" in new DifferentParents {
    val res = FeatureJsonHelper.fromJsonString(feature.toJson(), stages = Map.empty, features)
    res.isFailure shouldBe true
    res.failed.get shouldBe a[RuntimeException]
  }

  it should "fail when not all parents are found" in new DifferentParents {
    val res = FeatureJsonHelper.fromJsonString(feature.toJson(), stages, features = Map.empty)
    res.isFailure shouldBe true
    res.failed.get shouldBe a[RuntimeException]
  }


}

Source File: JobGroupUtilTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.spark

import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import com.salesforce.op.test.{TestCommon, TestSparkContext}

@RunWith(classOf[JUnitRunner])
class JobGroupUtilTest extends FlatSpec with TestCommon with TestSparkContext {

  Spec(JobGroupUtil.getClass) should "be able to set a job group ID around a code block" in {
    JobGroupUtil.withJobGroup(OpStep.DataReadingAndFiltering) {
      spark.sparkContext.parallelize(Seq(1, 2, 3, 4, 5)).collect()
    }
    spark.sparkContext.statusTracker.getJobIdsForGroup("DataReadingAndFiltering") should not be empty
  }

  it should "reset the job group ID after a code block" in {
    JobGroupUtil.withJobGroup(OpStep.DataReadingAndFiltering) {
      spark.sparkContext.parallelize(Seq(1, 2, 3, 4, 5)).collect()
    }
    spark.sparkContext.parallelize(Seq(1, 2, 3, 4, 5)).collect()
    // Ensure that the last `.collect()` was not tagged with "DataReadingAndFiltering"
    spark.sparkContext.statusTracker.getJobIdsForGroup(null) should not be empty
  }
}

Source File: OpenNLPSentenceSplitterTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.utils.text

import com.salesforce.op.features.types._
import com.salesforce.op.stages.impl.feature.TextTokenizer
import com.salesforce.op.stages.impl.feature.TextTokenizer.TextTokenizerResult
import com.salesforce.op.test.TestCommon
import com.salesforce.op.utils.text.Language._
import opennlp.tools.sentdetect.SentenceModel
import opennlp.tools.tokenize.TokenizerModel
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class OpenNLPSentenceSplitterTest extends FlatSpec with TestCommon {

  val splitter = new OpenNLPSentenceSplitter()

  Spec[OpenNLPSentenceSplitter] should "split an English paragraph into sentences" in {
    val input =
      "Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov 29. " +
        "Mr Vinken is chairman of Elsevier N.V., the Dutch publishing group. Rudolph Agnew, 55 years old and " +
        "former chairman of Consolidated Gold Fields PLC, was named a director of this British industrial conglomerate."

    splitter.getSentences(input, language = English) shouldEqual Seq(
      "Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov 29.",
      "Mr Vinken is chairman of Elsevier N.V., the Dutch publishing group.",
      "Rudolph Agnew, 55 years old and former chairman of Consolidated Gold Fields PLC, " +
        "was named a director of this British industrial conglomerate."
    )

    TextTokenizer.tokenize(input.toText, sentenceSplitter = Option(splitter), defaultLanguage = English) shouldEqual
      TextTokenizerResult(English, Seq(
        Seq("pierr", "vinken", "61", "year", "old", "will", "join", "board",
          "nonexecut", "director", "nov", "29").toTextList,
        Seq("mr", "vinken", "chairman", "elsevi", "n.v", "dutch", "publish", "group").toTextList,
        Seq("rudolph", "agnew", "55", "year", "old", "former", "chairman", "consolid", "gold", "field", "plc",
          "name", "director", "british", "industri", "conglomer").toTextList))

    TextTokenizer.tokenize(input.toText, analyzer = new OpenNLPAnalyzer(), sentenceSplitter = Option(splitter),
      defaultLanguage = English) shouldEqual TextTokenizerResult(
      English, Seq(
        Seq("pierre", "vinken", ",", "61", "years", "old", ",", "will", "join", "the", "board", "as", "a",
          "nonexecutive", "director", "nov", "29", ".").toTextList,
        Seq("mr", "vinken", "is", "chairman", "of", "elsevier", "n", ".v.", ",", "the", "dutch", "publishing",
          "group", ".").toTextList,
        Seq("rudolph", "agnew", ",", "55", "years", "old", "and", "former", "chairman", "of", "consolidated",
          "gold", "fields", "plc", ",", "was", "named", "a", "director", "of", "this", "british", "industrial",
          "conglomerate", ".").toTextList))
  }

  it should "split a Portuguese text into sentences" in {
    // scalastyle:off
    val input = "Depois de Guimarães, o North Music Festival estaciona este ano no Porto. A partir de sexta-feira, " +
      "a Alfândega do Porto recebe a segunda edição deste festival de dois dias. No cartaz há nomes como os " +
      "portugueses Linda Martini e Mão Morta, mas também Guano Apes ou os DJ’s portugueses Rich e Mendes."

    splitter.getSentences(input, language = Portuguese) shouldEqual Seq(
      "Depois de Guimarães, o North Music Festival estaciona este ano no Porto.",
      "A partir de sexta-feira, a Alfândega do Porto recebe a segunda edição deste festival de dois dias.",
      "No cartaz há nomes como os portugueses Linda Martini e Mão Morta, mas também Guano Apes ou os DJ’s " +
        "portugueses Rich e Mendes."
    )
    // scalastyle:on
  }

  it should "load a sentence detection and tokenizer model for a language if they exist" in {
    val languages = Seq(Danish, Portuguese, English, Dutch, German, Sami)
    languages.map { language =>
      OpenNLPModels.getSentenceModel(language).exists(_.isInstanceOf[SentenceModel]) shouldBe true
      OpenNLPModels.getTokenizerModel(language).exists(_.isInstanceOf[TokenizerModel]) shouldBe true
    }
  }

  it should "load not a sentence detection and tokenizer model for a language if they do not exist" in {
    val languages = Seq(Japanese, Czech)
    languages.map { language =>
      OpenNLPModels.getSentenceModel(language) shouldEqual None
      OpenNLPModels.getTokenizerModel(language) shouldEqual None
    }
  }

  it should "return non-preprocessed input if no such a sentence detection model exist" in {
    // scalastyle:off
    val input = "ピエール・ヴィンケン（61歳）は、11月29日に臨時理事に就任します。" +
      "ヴィンケン氏は、オランダの出版グループであるエルゼビアN.V.の会長です。 " +
      "55歳のルドルフ・アグニュー（Rudolph Agnew、元コネチカットゴールドフィールドPLC）会長は、" +
      "この英国の産業大企業の取締役に任命されました。"
    // scalastyle:on
    splitter.getSentences(input, language = Language.Japanese) shouldEqual Seq(input)
  }
}

Source File: RawFeatureFilterResultsComparison.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.filters

import com.salesforce.op.DoubleEquality
import org.scalatest.{FlatSpec, Matchers}


object RawFeatureFilterResultsComparison extends FlatSpec with Matchers with DoubleEquality{

  def compareConfig(c1: RawFeatureFilterConfig, c2: RawFeatureFilterConfig): Unit = {
    c1.minFill shouldBe c2.minFill
    c1.maxFillDifference shouldBe c2.maxFillDifference
    c1.maxFillRatioDiff shouldBe c2.maxFillRatioDiff
    c1.maxJSDivergence shouldBe c2.maxJSDivergence
    c1.maxCorrelation shouldBe c2.maxCorrelation
    c1.correlationType shouldBe c2.correlationType
    c1.jsDivergenceProtectedFeatures shouldBe c2.jsDivergenceProtectedFeatures
    c1.protectedFeatures shouldBe c2.protectedFeatures
  }

  def compareDistributions(d1: FeatureDistribution, d2: FeatureDistribution): Unit = {
    d1.name shouldEqual d2.name
    d1.key shouldEqual d2.key
    d1.count shouldEqual d2.count
    d1.nulls shouldEqual d2.nulls
    d1.distribution shouldEqual d2.distribution
    d1.summaryInfo shouldEqual d2.summaryInfo
  }

  def compareSeqDistributions(d1: Seq[FeatureDistribution], d2: Seq[FeatureDistribution]): Unit = {
    d1.zip(d2).foreach { case (a, b) => compareDistributions(a, b) }
  }

  def compareMetrics(m1: RawFeatureFilterMetrics, m2: RawFeatureFilterMetrics): Unit = {
    m1.name shouldBe m2.name
    m1.key shouldBe m2.key
    m1.trainingFillRate shouldBe m2.trainingFillRate
    m1.trainingNullLabelAbsoluteCorr shouldEqual m2.trainingNullLabelAbsoluteCorr
    m1.scoringFillRate shouldEqual m2.scoringFillRate
    m1.jsDivergence shouldEqual m2.jsDivergence
    m1.fillRateDiff shouldEqual m2.fillRateDiff
    m1.fillRatioDiff shouldEqual m2.fillRatioDiff
  }

  def compareSeqMetrics(m1: Seq[RawFeatureFilterMetrics], m2: Seq[RawFeatureFilterMetrics]): Unit = {
    m1.zip(m2).foreach { case (a, b) => compareMetrics(a, b) }
  }

  def compareExclusionReasons(er1: ExclusionReasons, er2: ExclusionReasons): Unit = {
    er1.name shouldBe er2.name
    er1.key shouldBe er2.key
    er1.trainingUnfilledState shouldBe er2.trainingUnfilledState
    er1.trainingNullLabelLeaker shouldBe er2.trainingNullLabelLeaker
    er1.scoringUnfilledState shouldBe er2.scoringUnfilledState
    er1.jsDivergenceMismatch shouldBe er2.jsDivergenceMismatch
    er1.fillRateDiffMismatch shouldBe er2.fillRateDiffMismatch
    er1.fillRatioDiffMismatch shouldBe er2.fillRatioDiffMismatch
    er1.excluded shouldBe er2.excluded
  }

  def compareSeqExclusionReasons(er1: Seq[ExclusionReasons], er2: Seq[ExclusionReasons]): Unit = {
    er1.zip(er2).foreach { case (a, b) => compareExclusionReasons(a, b) }
  }

  def compare(rff1: RawFeatureFilterResults, rff2: RawFeatureFilterResults): Unit = {
    compareConfig(rff1.rawFeatureFilterConfig, rff2.rawFeatureFilterConfig)
    compareSeqDistributions(rff1.rawFeatureDistributions, rff2.rawFeatureDistributions)
    compareSeqMetrics(rff1.rawFeatureFilterMetrics, rff2.rawFeatureFilterMetrics)
    compareSeqExclusionReasons(rff1.exclusionReasons, rff2.exclusionReasons)
  }
}

Source File: SummaryTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.filters

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class SummaryTest extends FlatSpec with TestCommon {
  Spec[Summary] should "be correctly created from a sequence of features" in {
    val f1 = Left(Seq("a", "b", "c"))
    val f2 = Right(Seq(0.5, 1.0))
    val f1s = Summary(f1)
    val f2s = Summary(f2)
    f1s.min shouldBe 3
    f1s.max shouldBe 3
    f1s.sum shouldBe 3
    f1s.count shouldBe 1
    f2s.min shouldBe 0.5
    f2s.max shouldBe 1.0
    f2s.sum shouldBe 1.5
    f2s.count shouldBe 2
  }
}

Source File: OpRegressionEvaluatorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.evaluators

import com.salesforce.op.features.types._
import com.salesforce.op.stages.impl.classification.OpLogisticRegression
import com.salesforce.op.stages.impl.regression.{OpLinearRegression, RegressionModelSelector}
import com.salesforce.op.stages.impl.selector.ModelSelectorNames.EstimatorType
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tuning.ParamGridBuilder
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class OpRegressionEvaluatorTest extends FlatSpec with TestSparkContext {

  val (ds, rawLabel, features) = TestFeatureBuilder[RealNN, OPVector](
    Seq(
      (10.0, Vectors.dense(1.0, 4.3, 1.3)),
      (20.0, Vectors.dense(2.0, 0.3, 0.1)),
      (30.0, Vectors.dense(3.0, 3.9, 4.3)),
      (40.0, Vectors.dense(4.0, 1.3, 0.9)),
      (50.0, Vectors.dense(5.0, 4.7, 1.3)),
      (10.0, Vectors.dense(1.0, 4.3, 1.3)),
      (20.0, Vectors.dense(2.0, 0.3, 0.1)),
      (30.0, Vectors.dense(3.0, 3.9, 4.3)),
      (40.0, Vectors.dense(4.0, 1.3, 0.9)),
      (50.0, Vectors.dense(5.0, 4.7, 1.3))
    ).map(v => v._1.toRealNN -> v._2.toOPVector)
  )

  val label = rawLabel.copy(isResponse = true)

  val lr = new OpLogisticRegression()
  val lrParams = new ParamGridBuilder().addGrid(lr.regParam, Array(0.0)).build()

  val testEstimator = RegressionModelSelector.withTrainValidationSplit(dataSplitter = None, trainRatio = 0.5,
    modelsAndParameters = Seq(lr -> lrParams))
    .setInput(label, features)

  val prediction = testEstimator.getOutput()
  val testEvaluator = new OpRegressionEvaluator().setLabelCol(label).setPredictionCol(prediction)

  val testEstimator2 = new OpLinearRegression().setInput(label, features)

  val prediction2 = testEstimator2.getOutput()
  val testEvaluator2 = new OpRegressionEvaluator().setLabelCol(label).setPredictionCol(prediction2)


  Spec[OpRegressionEvaluator] should "copy" in {
    val testEvaluatorCopy = testEvaluator.copy(ParamMap())
    testEvaluatorCopy.uid shouldBe testEvaluator.uid
  }

  it should "evaluate the metrics from a model selector" in {
    val model = testEstimator.fit(ds)
    val transformedData = model.setInput(label, features).transform(ds)
    val metrics = testEvaluator.evaluateAll(transformedData).toMetadata()

    assert(metrics.getDouble(RegressionEvalMetrics.RootMeanSquaredError.toString) <= 1E-12, "rmse should be close to 0")
    assert(metrics.getDouble(RegressionEvalMetrics.MeanSquaredError.toString) <= 1E-24, "mse should be close to 0")
    assert(metrics.getDouble(RegressionEvalMetrics.R2.toString) == 1.0, "R2 should equal 1.0")
    assert(metrics.getDouble(RegressionEvalMetrics.MeanAbsoluteError.toString) <= 1E-12, "mae should be close to 0")
  }

  it should "evaluate the metrics from a single model" in {
    val model = testEstimator2.fit(ds)
    val transformedData = model.setInput(label, features).transform(ds)
    val metrics = testEvaluator2.evaluateAll(transformedData).toMetadata()

    assert(metrics.getDouble(RegressionEvalMetrics.RootMeanSquaredError.toString) <= 1E-12, "rmse should be close to 0")
    assert(metrics.getDouble(RegressionEvalMetrics.MeanSquaredError.toString) <= 1E-24, "mse should be close to 0")
    assert(metrics.getDouble(RegressionEvalMetrics.R2.toString) == 1.0, "R2 should equal 1.0")
    assert(metrics.getDouble(RegressionEvalMetrics.MeanAbsoluteError.toString) <= 1E-12, "mae should be close to 0")
  }
}

Source File: ScalaStyleValidationTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op

import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec, Matchers}

@RunWith(classOf[JUnitRunner])
class ScalaStyleValidationTest extends FlatSpec with Matchers with Assertions {
  import scala.Throwable
  private def +(x: Int, y: Int) = x + y
  private def -(x: Int, y: Int) = x - y
  private def *(x: Int, y: Int) = x * y
  private def /(x: Int, y: Int) = x / y
  private def +-(x: Int, y: Int) = x + (-y)
  private def xx_=(y: Int) = println(s"setting xx to $y")

  "bad names" should "never happen" in {
    "def _=abc = ???" shouldNot compile
    true shouldBe true
  }

  "non-ascii" should "not be allowed" in {
//    "def ⇒ = ???" shouldNot compile // it does not even compile as a string
  }

}

Source File: RandomListTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import java.text.SimpleDateFormat

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import com.salesforce.op.testkit.RandomList.{NormalGeolocation, UniformGeolocation}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class RandomListTest extends FlatSpec with TestCommon with Assertions {
  private val numTries = 10000
  private val rngSeed = 314159214142136L

  private def check[D, T <: OPList[D]](
    g: RandomList[D, T],
    minLen: Int, maxLen: Int,
    predicate: (D => Boolean) = (_: D) => true
  ) = {
    g reset rngSeed

    def segment = g limit numTries

    segment count (_.value.length < minLen) shouldBe 0
    segment count (_.value.length > maxLen) shouldBe 0
    segment foreach (list => list.value foreach { x =>
      predicate(x) shouldBe true
    })
  }

  private val df = new SimpleDateFormat("dd/MM/yy")

  Spec[Text, RandomList[String, TextList]] should "generate lists of strings" in {
    val sut = RandomList.ofTexts(RandomText.countries, 0, 4)
    check[String, TextList](sut, 0, 4, _.length > 0)

    (sut limit 7 map (_.value.toList)) shouldBe
      List(
        List("Madagascar", "Gondal", "Zephyria"),
        List("Holy Alliance"),
        List("North American Union"),
        List("Guatemala", "Estonia", "Kolechia"),
        List(),
        List("Myanmar", "Bhutan"),
        List("Equatorial Guinea")
      )
  }

  Spec[Date, RandomList[Long, DateList]] should "generate lists of dates" in {
    val dates = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000)
    val sut = RandomList.ofDates(dates, 11, 22)
    var d0 = 0L
    check[Long, DateList](sut, 11, 22, d => {
      val d1 = d0
      d0 = d
      d > d1
    })
  }

  Spec[DateTimeList, RandomList[Long, DateTimeList]] should "generate lists of datetimes" in {
    val datetimes = RandomIntegral.datetimes(df.parse("01/01/2017"), 1000, 1000000)
    val sut = RandomList.ofDateTimes(datetimes, 11, 22)
    var d0 = 0L
    check[Long, DateTimeList](sut, 11, 22, d => {
      val d1 = d0
      d0 = d
      d > d1
    })
  }

  Spec[UniformGeolocation] should "generate uniformly distributed geolocations" in {
    val sut = RandomList.ofGeolocations
    val segment = sut limit numTries
    segment foreach (_.value.length shouldBe 3)
  }

  Spec[NormalGeolocation] should "generate geolocations around given point" in {
    for {accuracy <- GeolocationAccuracy.values} {
      val geolocation = RandomList.ofGeolocationsNear(37.444136, 122.163160, accuracy)
      val segment = geolocation limit numTries
      segment foreach (_.value.length shouldBe 3)
    }
  }
}

Source File: RandomRealTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import com.salesforce.op.features.types.{Currency, Percent, Real, RealNN}
import com.salesforce.op.test.TestCommon
import com.salesforce.op.testkit.RandomReal._
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class RandomRealTest extends FlatSpec with TestCommon {
  val numTries = 1000000

  
  // ignore should "cast to default data type" in {
  //  check(uniform(1.0, 2.0), probabilityOfEmpty = 0.5, range = (1.0, 2.0))
  // }

  Spec[RandomReal[Real]]  should "Give Normal distribution with mean 1 sigma 0.1, 10% nulls" in {
    val normalReals = normal[Real](1.0, 0.2)
    check(normalReals, probabilityOfEmpty = 0.1, range = (-2.0, 4.0))
  }

  Spec[RandomReal[Real]] should "Give Uniform distribution on 1..2, half nulls" in {
    check(uniform[Real](1.0, 2.0), probabilityOfEmpty = 0.5, range = (1.0, 2.0))
  }

  it should "Give Poisson distribution with mean 4, 20% nulls" in {
    check(poisson[Real](4.0), probabilityOfEmpty = 0.2, range = (0.0, 15.0))
  }

  it should "Give Exponential distribution with mean 1, 1% nulls" in {
    check(exponential[Real](1.0), probabilityOfEmpty = 0.01, range = (0.0, 15.0))
  }

  it should "Give Gamma distribution with mean 5, 0% nulls" in {
    check(gamma[Real](5.0), probabilityOfEmpty = 0.0, range = (0.0, 25.0))
  }

  it should "Give LogNormal distribution with mean 0.25, 20% nulls" in {
    check(logNormal[Real](0.25, 0.001), probabilityOfEmpty = 0.7, range = (0.1, 15.0))
  }

  it should "Weibull distribution (4.0, 5.0), 20% nulls" in {
    check(weibull[Real](4.0, 5.0), probabilityOfEmpty = 0.2, range = (0.0, 15.0))
  }

  Spec[RandomReal[RealNN]] should "give no nulls" in {
    check(normal[RealNN](1.0, 0.2), probabilityOfEmpty = 0.0, range = (-2.0, 4.0))
  }

  Spec[RandomReal[Currency]] should "distribute money normally" in {
    check(normal[Currency](1.0, 0.2), probabilityOfEmpty = 0.5, range = (-2.0, 4.0))
  }

  Spec[RandomReal[Percent]] should "distribute percentage evenly" in {
    check(uniform[Percent](1.0, 2.0), probabilityOfEmpty = 0.5, range = (0.0, 2.0))
  }

  private val rngSeed = 7688721

  private def check[T <: Real](
    src: RandomReal[T],
    probabilityOfEmpty: Double,
    range: (Double, Double)) = {
    val sut = src withProbabilityOfEmpty probabilityOfEmpty
    sut reset rngSeed

    val found = sut.next
    sut reset rngSeed
    val foundAfterReseed = sut.next
    if (foundAfterReseed != found) {
      sut.reset(rngSeed)
    }
    withClue(s"generator reset did not work for $sut") {
      foundAfterReseed shouldBe found
    }
    sut reset rngSeed

    val numberOfNulls = sut limit numTries count (_.isEmpty)

    val expectedNumberOfNulls = probabilityOfEmpty * numTries
    withClue(s"numNulls = $numberOfNulls, expected $expectedNumberOfNulls") {
      math.abs(numberOfNulls - expectedNumberOfNulls) < numTries / 100 shouldBe true
    }

    val numberOfOutliers = sut limit numTries count (xOpt => xOpt.value.exists(x => x < range._1 || x > range._2))

    numberOfOutliers should be < (numTries / 1000)

  }
}

Source File: InfiniteStreamTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class InfiniteStreamTest extends FlatSpec with TestCommon {

  Spec[InfiniteStream[_]] should "map" in {
    var i = 0
    val src = new InfiniteStream[Int] {
      override def next: Int = {
        i += 1;
        i
      }
    }

    val sut = src map (5 +)

    while (i < 10) sut.next shouldBe (i + 5)
  }

}

Source File: RandomIntegralTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import java.text.SimpleDateFormat

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.language.postfixOps

@RunWith(classOf[JUnitRunner])
class RandomIntegralTest extends FlatSpec with TestCommon with Assertions {
  private val numTries = 10000
  private val rngSeed = 314159214142135L

  private def check[T <: Integral](
    g: RandomIntegral[T],
    predicate: Long => Boolean = _ => true
  ) = {
    g reset rngSeed

    def segment = g limit numTries

    val numberOfEmpties = segment count (_.isEmpty)

    val expectedNumberOfEmpties = g.probabilityOfEmpty * numTries

    withClue(s"numEmpties = $numberOfEmpties, expected $expectedNumberOfEmpties") {
      math.abs(numberOfEmpties - expectedNumberOfEmpties) < 2 * math.sqrt(numTries) shouldBe true
    }

    val maybeValues = segment filterNot (_.isEmpty) map (_.value)
    val values = maybeValues collect { case Some(s) => s }

    values foreach (x => predicate(x) shouldBe true)

    withClue(s"number of distinct values = ${values.size}, expected:") {
      math.abs(maybeValues.size - values.toSet.size) < maybeValues.size / 20
    }

  }

  private val df = new SimpleDateFormat("dd/MM/yy")

  Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers" in {
    val sut0 = RandomIntegral.integrals
    val sut = sut0.withProbabilityOfEmpty(0.3)
    check(sut)
    sut.probabilityOfEmpty shouldBe 0.3
  }

  Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers in some range" in {
    val sut0 = RandomIntegral.integrals(100, 200)
    val sut = sut0.withProbabilityOfEmpty(0.3)
    check(sut, i => i >= 100 && i < 200)
    sut.probabilityOfEmpty shouldBe 0.3
  }

  Spec[RandomIntegral[Date]] should "generate dates" in {
    val sut = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000)
    var d0 = 0L
    check(sut withProbabilityOfEmpty 0.01, d => {
      val d1 = d0
      d0 = d
      d0 > d1
    })
  }

  Spec[RandomIntegral[DateTime]] should "generate dates with times" in {
    val sut = RandomIntegral.datetimes(df.parse("08/24/2017"), 1000, 1000000)
    var d0 = 0L
    check(sut withProbabilityOfEmpty 0.001, d => {
      val d1 = d0
      d0 = d
      d0 > d1
    })
  }
}

Source File: RandomBinaryTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import com.salesforce.op.features.types.Binary
import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class RandomBinaryTest extends FlatSpec with TestCommon {
  val numTries = 1000000
  val rngSeed = 12345

  private def truthWithProbability(probabilityOfTrue: Double) = {
    RandomBinary(probabilityOfTrue)
  }

  Spec[RandomBinary] should "generate empties, truths and falses" in {
    check(truthWithProbability(0.5) withProbabilityOfEmpty 0.5)
    check(truthWithProbability(0.3) withProbabilityOfEmpty 0.65)
    check(truthWithProbability(0.0) withProbabilityOfEmpty 0.1)
    check(truthWithProbability(1.0) withProbabilityOfEmpty 0.0)
  }

  private def check(g: RandomBinary) = {
    g reset rngSeed
    val numberOfEmpties = g limit numTries count (_.isEmpty)
    val expectedNumberOfEmpties = g.probabilityOfEmpty * numTries
    withClue(s"numEmpties = $numberOfEmpties, expected $expectedNumberOfEmpties") {
      math.abs(numberOfEmpties - expectedNumberOfEmpties) < numTries / 100 shouldBe true
    }

    val expectedNumberOfTruths = g.probabilityOfSuccess * (1 - g.probabilityOfEmpty) * numTries
    val numberOfTruths = g limit numTries count (Binary(true) ==)
    withClue(s"numTruths = $numberOfTruths, expected $expectedNumberOfTruths") {
      math.abs(numberOfTruths - expectedNumberOfTruths) < numTries / 100 shouldBe true
    }
  }
}

Source File: RandomSetTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class RandomSetTest extends FlatSpec with TestCommon with Assertions {
  private val numTries = 10000
  private val rngSeed = 314159214142136L

  private def check[D, T <: OPSet[D]](
    g: RandomSet[D, T],
    minLen: Int, maxLen: Int,
    predicate: (D => Boolean) = (_: D) => true
  ) = {
    g reset rngSeed

    def segment = g limit numTries

    segment count (_.value.size < minLen) shouldBe 0
    segment count (_.value.size > maxLen) shouldBe 0
    segment foreach (Set => Set.value foreach { x =>
      predicate(x) shouldBe true
    })
  }

  Spec[MultiPickList] should "generate multipicklists" in {
    val sut = RandomMultiPickList.of(RandomText.countries, maxLen = 5)

    check[String, MultiPickList](sut, 0, 5, _.nonEmpty)

    val expected = List(
      Set(),
      Set("Aldorria", "Palau", "Glubbdubdrib"),
      Set(),
      Set(),
      Set("Sweden", "Wuhu Islands", "Tuvalu")
    )

    {sut reset 42; sut limit 5 map (_.value)} shouldBe expected

    {sut reset 42; sut limit 5 map (_.value)} shouldBe expected
  }

}

Source File: TypeConversionTest.scala From spark-hbase-connector with Apache License 2.0

5 votes

package com.user.integration

import java.util.UUID

import it.nerdammer.spark.hbase._
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

class TypeConversionTest extends FlatSpec with Matchers with BeforeAndAfterAll  {

  val tables = Seq(UUID.randomUUID().toString, UUID.randomUUID().toString)
  val columnFamilies = Seq("cfconv", "cfconv2")

  override def beforeAll() = {
    (tables zip columnFamilies) foreach (t => {
      IntegrationUtils.createTable(t._1, t._2)
    })
  }

  override def afterAll() = {
    tables.foreach(table => IntegrationUtils.dropTable(table))
  }

  "type conversion" should "work" in {

    val sc = IntegrationUtils.sparkContext

    sc.parallelize(1 to 100)
      .map(i => (i.toString, i, i.toShort, i.toLong, i % 2 == 0, i.toDouble, i.toFloat, BigDecimal(i), i.toString))
      .toHBaseTable(tables(0)).toColumns("col-int", "col-sho", "col-lon", "col-boo", "col-dou", "col-flo", "col-big", "col-str")
      .inColumnFamily(columnFamilies(0))
      .save()


    val retrieved = sc.hbaseTable[(String, Int, Short, Long, Boolean, Double, Float, BigDecimal, String)](tables(0))
      .select("col-int", "col-sho", "col-lon", "col-boo", "col-dou", "col-flo", "col-big", "col-str")
      .inColumnFamily(columnFamilies(0))
      .sortBy(_._1.toInt)
      .collect()

    val cmp = (1 to 100) zip retrieved

    cmp.foreach(p => {
      p._1 should be(p._2._2)
      p._1.toShort should be(p._2._3)
      p._1.toLong should be(p._2._4)
      (p._1 % 2 == 0) should be(p._2._5)
      p._1.toDouble should be(p._2._6)
      p._1.toFloat should be(p._2._7)
      BigDecimal(p._1) should be(p._2._8)
      p._1.toString should be(p._2._9)
    })

  }

  "type conversion" should "support empty values" in {

    val sc = IntegrationUtils.sparkContext

    sc.parallelize(1 to 100)
      .map(i => (i.toString, i, None.asInstanceOf[Option[Short]]))
      .toHBaseTable(tables(1))
      .inColumnFamily(columnFamilies(1))
      .toColumns("myint", "myshort")
      .save()


    val chk = sc.hbaseTable[(String, Option[Int], Option[Short], Option[Long], Option[Boolean], Option[Double], Option[Float], Option[BigDecimal], Option[String])](tables(1))
      .inColumnFamily(columnFamilies(1))
      .select("myint", "myshort", "mynonexistentlong", "mynonexistentbool", "mynonexistentdouble", "mynonexistentfloat", "mynonexistentbigd", "mynonexistentstr")
      .filter(r => r match {
      case (s, Some(i), None, None, None, None, None, None, None) => true
      case _ => false
      })
      .count

    chk should be (100)

  }

}

Source File: HostAndPortSpec.scala From akka-persistence-redis with Apache License 2.0

5 votes

package akka.persistence.utils

import org.scalatest.{ FlatSpec, Matchers }

class HostAndPortSpec extends FlatSpec with Matchers {

  "A HostAndPort" should "have localhost as host and 8080 as port" in {
    val hostAndPort = HostAndPort("localhost:8080")
    assert(hostAndPort.host == "localhost")
    assert(hostAndPort.port == 8080)
  }

  it should "return a tuple (String, Int)" in {
    val hostAndPort = HostAndPort("my.host.name:10001")
    assert(hostAndPort.asTuple == ("my.host.name", 10001))
  }

  it should "return 8080 instead of 9090" in {
    val hostAndPort = HostAndPort("localhost:8080")
    assert(hostAndPort.portOrDefault(9090) == 8080)
  }

}

Source File: RedisUtilsSpec.scala From akka-persistence-redis with Apache License 2.0

5 votes

package akka.persistence.redis

import com.typesafe.config.ConfigFactory
import org.scalatest.{ FlatSpec, Matchers }

class RedisUtilsSpec extends FlatSpec with Matchers {

  "A RedisClient" should "use redis.sentinels as config if sentinel-list is also present" in {
    val config = ConfigFactory.parseString(s"""
         |redis {
         |  mode = sentinel
         |  master = foo
         |  database = 0
         |  sentinel-list = "host1:1234,host2:1235"
         |  sentinels = [
         |   {
         |     host = "host3"
         |     port = 1236
         |    },
         |    {
         |      host = "host4"
         |      port = 1237
         |     }
         |   ]
         |}""".stripMargin)
    val sentinels = RedisUtils.sentinels(config)
    sentinels should be(List(("host3", 1236), ("host4", 1237)))
  }

  it should "use redis.sentinel-list if redis.sentinels is not present" in {
    val config = ConfigFactory.parseString(s"""
      |redis {
      |  mode = sentinel
      |  master = foo
      |  database = 0
      |  sentinel-list = "host1:1234,host2:1235"
      |}""".stripMargin)
    val sentinels = RedisUtils.sentinels(config)
    sentinels should be(List(("host1", 1234), ("host2", 1235)))
  }

}

Source File: FilterUDFSpec.scala From jgit-spark-connector with Apache License 2.0

5 votes

package tech.sourced.engine

import org.scalatest.{FlatSpec, Matchers}

class FilterUDFSpec extends FlatSpec with Matchers with BaseSivaSpec with BaseSparkSpec {

  var engine: Engine = _

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    engine = Engine(ss, resourcePath, "siva")
  }

  "Filter by language" should "work properly" in {
    val langDf = engine
      .getRepositories
      .getReferences
      .getCommits
      .getBlobs
      .classifyLanguages

    val filteredLang = langDf.select("repository_id", "path", "lang").where("lang='Python'")
    filteredLang.count() should be(6)
  }

  override protected def afterAll(): Unit = {
    super.afterAll()
    engine = _: Engine
  }
}

Source File: RepositoryRDDProviderSpec.scala From jgit-spark-connector with Apache License 2.0

5 votes

package tech.sourced.engine.provider

import java.nio.file.{Path, Paths}
import java.util.UUID

import org.apache.commons.io.FileUtils
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import tech.sourced.engine.util.RepoUtils
import tech.sourced.engine.{BaseSivaSpec, BaseSparkSpec}

class RepositoryRDDProviderSpec extends FlatSpec with Matchers with BeforeAndAfterEach
  with BaseSparkSpec with BaseSivaSpec {

  private var provider: RepositoryRDDProvider = _
  private var tmpPath: Path = _

  override def beforeEach(): Unit = {
    super.beforeEach()
    provider = RepositoryRDDProvider(ss.sparkContext)
    tmpPath = Paths.get(
      System.getProperty("java.io.tmpdir"),
      UUID.randomUUID().toString
    )
  }

  override def afterEach(): Unit = {
    super.afterEach()

    FileUtils.deleteQuietly(tmpPath.toFile)
  }

  "RepositoryRDDProvider" should "retrieve bucketized raw repositories" in {
    tmpPath.resolve("a").toFile.mkdir()
    createRepo(tmpPath.resolve("a").resolve("repo"))

    tmpPath.resolve("b").toFile.mkdir()
    createRepo(tmpPath.resolve("b").resolve("repo"))

    createRepo(tmpPath.resolve("repo"))

    val repos = provider.get(tmpPath.toString, "standard").collect()
    repos.length should be(3)
  }

  it should "retrieve non-bucketized raw repositories" in {
    tmpPath.resolve("a").toFile.mkdir()
    createRepo(tmpPath.resolve("repo"))

    tmpPath.resolve("b").toFile.mkdir()
    createRepo(tmpPath.resolve("repo2"))

    val repos = provider.get(tmpPath.toString, "standard").collect()
    repos.length should be(2)
  }

  it should "retrieve bucketized siva repositories" in {
    val repos = provider.get(resourcePath, "siva").collect()
    repos.length should be(3)
  }

  it should "retrieve non-bucketized siva repositories" in {
    val repos = provider.get(Paths.get(resourcePath, "ff").toString, "siva").collect()
    repos.length should be(1)
  }

  private def createRepo(path: Path) = {
    val repo = RepoUtils.createRepo(path)
    RepoUtils.commitFile(repo, "file.txt", "something something", "some commit")
  }

}

Source File: StorageLevelSpec.scala From jgit-spark-connector with Apache License 2.0

5 votes

package tech.sourced.engine

import org.scalatest.{FlatSpec, Matchers}

class StorageLevelSpec  extends FlatSpec with Matchers with BaseSivaSpec with BaseSparkSpec {

  var engine: Engine = _

  override protected def beforeAll(): Unit = {
    super.beforeAll()
    engine = Engine(ss, resourcePath, "siva")
  }

  "A Dataframe" should "work with all storage levels" in {
    import org.apache.spark.storage.StorageLevel._
    val storageLevels = List(
      DISK_ONLY,
      DISK_ONLY_2,
      MEMORY_AND_DISK,
      MEMORY_AND_DISK_2,
      MEMORY_AND_DISK_SER,
      MEMORY_AND_DISK_SER_2,
      MEMORY_ONLY,
      MEMORY_ONLY_2,
      MEMORY_ONLY_SER,
      MEMORY_ONLY_SER_2,
      NONE,
      OFF_HEAP
    )

    storageLevels.foreach(level => {
      val df = engine.getRepositories.persist(level)
      df.count()
      df.unpersist()
    })
  }
}

Source File: FilterSpec.scala From jgit-spark-connector with Apache License 2.0

5 votes

package tech.sourced.engine.util

import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.StringType
import org.scalatest.{FlatSpec, Matchers}

class FilterSpec extends FlatSpec with Matchers {
  "CompiledFilters" should "filter properly depending of his type" in {
    val eq = EqualFilter(Attr("test", ""), "a")

    eq.eval("a") should be(true)
    eq.eval("b") should be(false)

    val notEq = NotFilter(EqualFilter(Attr("test", ""), "a"))

    notEq.eval("a") should be(false)
    notEq.eval("b") should be(true)

    val in = InFilter(Attr("test", ""), Array("a", "b", "c"))

    in.eval("a") should be(true)
    in.eval("b") should be(true)
    in.eval("c") should be(true)
    in.eval("d") should be(false)

    val gt = GreaterThanFilter(Attr("test", ""), 5)

    gt.eval(4) should be(false)
    gt.eval(5) should be(false)
    gt.eval(6) should be(true)

    val gte = GreaterThanOrEqualFilter(Attr("test", ""), 5)

    gte.eval(4) should be(false)
    gte.eval(5) should be(true)
    gte.eval(6) should be(true)

    val lt = LessThanFilter(Attr("test", ""), 5)

    lt.eval(4) should be(true)
    lt.eval(5) should be(false)
    lt.eval(6) should be(false)

    val lte = LessThanOrEqualFilter(Attr("test", ""), 5)

    lte.eval(4) should be(true)
    lte.eval(5) should be(true)
    lte.eval(6) should be(false)
  }

  "ColumnFilter" should "process correctly columns" in {
    // test = 'val' AND test IS NOT NULL AND test2 = 'val2' AND test3 IN ('a', 'b')
    val f = Filter.compile(And(
      And(
        And(
          EqualTo(AttributeReference("test", StringType)(), Literal("val")),
          IsNotNull(AttributeReference("test", StringType)())
        ),
        EqualTo(AttributeReference("test2", StringType)(), Literal("val2"))
      ),
      In(AttributeReference("test3", StringType)(), Seq(Literal("a"), Literal("b")))
    ))

    f.length should be(4)
    val filters = Filters(f)
    filters.matches(Seq("test"), "val") should be(true)
    filters.matches(Seq("test2"), "val") should be(false)
    filters.matches(Seq("test3"), "b") should be(true)
  }

  "ColumnFilter" should "handle correctly unsupported filters" in {
    val f = Filter.compile(StartsWith(AttributeReference("test", StringType)(), Literal("a")))

    f.length should be(0)
  }
}

Source File: ReferenceIteratorSpec.scala From jgit-spark-connector with Apache License 2.0

5 votes

package tech.sourced.engine.iterator

import org.scalatest.FlatSpec
import tech.sourced.engine.util.{Attr, EqualFilter}

class ReferenceIteratorSpec extends FlatSpec with BaseChainableIterator {

  "ReferenceIterator" should "return all references from all repositories into a siva file" in {
    testIterator(
      new ReferenceIterator(Array("repository_id", "name", "hash"), _, null, Seq(), false), {
        case (0, row) =>
          row.getString(0) should be("github.com/xiyou-linuxer/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/HEAD")
          row.getString(2) should be("fff7062de8474d10a67d417ccea87ba6f58ca81d")
        case (1, row) =>
          row.getString(0) should be("github.com/mawag/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/HEAD")
          row.getString(2) should be("fff7062de8474d10a67d417ccea87ba6f58ca81d")
        case (2, row) =>
          row.getString(0) should be("github.com/xiyou-linuxer/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/develop")
          row.getString(2) should be("880653c14945dbbc915f1145561ed3df3ebaf168")
        case _ =>
      }, total = 43, columnsCount = 3
    )
  }

  it should "return only specified columns" in {
    testIterator(
      new ReferenceIterator(Array("repository_id", "name"), _, null, Seq(), false), {
        case (0, row) =>
          row.getString(0) should be("github.com/xiyou-linuxer/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/HEAD")
        case (1, row) =>
          row.getString(0) should be("github.com/mawag/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/HEAD")
        case (2, row) =>
          row.getString(0) should be("github.com/xiyou-linuxer/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/develop")
        case _ =>
      }, total = 43, columnsCount = 2
    )
  }

  it should "apply passed filters" in {
    testIterator(
      new ReferenceIterator(
        Array("repository_id", "name"),
        _,
        null,
        Seq(EqualFilter(Attr("name", "references"), "refs/heads/develop")),
        false
      ), {
        case (0, row) =>
          row.getString(0) should be("github.com/xiyou-linuxer/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/develop")
        case (1, row) =>
          row.getString(0) should be("github.com/mawag/faq-xiyoulinux")
          row.getString(1) should be("refs/heads/develop")
      }, total = 2, columnsCount = 2
    )
  }

  it should "use previously passed iterator" in {
    testIterator(repo =>
      new ReferenceIterator(
        Array("repository_id", "name"),
        repo,
        new RepositoryIterator(
          "/foo/bar",
          Array("id"),
          repo,
          Seq(EqualFilter(Attr("id", "repository"), "github.com/xiyou-linuxer/faq-xiyoulinux")),
          false
        ),
        Seq(EqualFilter(Attr("name", "references"), "refs/heads/develop")),
        false
      ), {
      case (0, row) =>
        row.getString(0) should be("github.com/xiyou-linuxer/faq-xiyoulinux")
        row.getString(1) should be("refs/heads/develop")
    }, total = 1, columnsCount = 2
    )
  }
}

Source File: MetadataIteratorSpec.scala From jgit-spark-connector with Apache License 2.0

5 votes

package tech.sourced.engine.iterator

import java.nio.file.Paths
import java.util.{Properties, UUID}

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.types.{Metadata, StringType, StructType}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
import tech.sourced.engine.{BaseSparkSpec, Schema}

class JDBCQueryIteratorSpec
  extends FlatSpec with Matchers with BeforeAndAfterAll with BaseSparkSpec {
  private val tmpPath = Paths.get(
    System.getProperty("java.io.tmpdir"),
    UUID.randomUUID.toString
  )

  private val dbPath = tmpPath.resolve("test.db")

  override def beforeAll(): Unit = {
    super.beforeAll()
    tmpPath.toFile.mkdir()
    val rdd = ss.sparkContext.parallelize(Seq(
      Row("id1"),
      Row("id2"),
      Row("id3")
    ))

    val properties = new Properties()
    properties.put("driver", "org.sqlite.JDBC")
    val df = ss.createDataFrame(rdd, StructType(Seq(Schema.repositories.head)))
    df.write.jdbc(s"jdbc:sqlite:${dbPath.toString}", "repositories", properties)
  }

  override def afterAll(): Unit = {
    super.afterAll()
    FileUtils.deleteQuietly(tmpPath.toFile)
  }

  "JDBCQueryIterator" should "return all rows for the query" in {
    val iter = new JDBCQueryIterator(
      Seq(attr("id")),
      dbPath.toString,
      "SELECT id FROM repositories ORDER BY id"
    )

    // calling hasNext more than one time does not cause rows to be lost
    iter.hasNext
    iter.hasNext
    val rows = (for (row <- iter) yield row).toArray
    rows.length should be(3)
    rows(0).length should be(1)
    rows(0)(0).toString should be("id1")
    rows(1)(0).toString should be("id2")
    rows(2)(0).toString should be("id3")
  }

  private def attr(name: String): Attribute = AttributeReference(
    name, StringType, nullable = false, Metadata.empty
  )()
}

Source File: ExamplesTest.scala From json-schema-codegen with Apache License 2.0

5 votes

import java.net.{Inet6Address, InetAddress, Inet4Address, URI}
import java.util.Date

import argonaut.Argonaut._
import argonaut._
import org.scalatest.{FlatSpec, Matchers}
import product.vox.shop._


class ExamplesTest extends FlatSpec with Matchers {


  "AdditionalPropertiesJson" should "encode and decode" in {
    import additional.Codecs._
    test(additional.Properties("bvalue", Some(Map("p1" -> additional.PropertiesAdditional(1)))))
  }

  "AdditionalPropertiesOnlyJson" should "encode and decode" in {
    import additional.properties.Codecs._
    test(additional.properties.Only(Some(Map("p1" -> additional.properties.OnlyAdditional(1)))))
  }

  "EnumStrings" should "encode and decode" in {
    import Codecs._
    test(Strings.a)
    test(Strings.b)
  }

  "EnumIntegers" should "encode and decode" in {
    import Codecs._
    test(Integers.v10)
    test(Integers.v20)
  }

  "Formats" should "encode and decode" in {
    import Codecs._
    test(Formats(
      new URI("http://uri/address"),
      InetAddress.getByName("127.0.0.1").asInstanceOf[Inet4Address],
      InetAddress.getByName("FE80:0000:0000:0000:0202:B3FF:FE1E:8329").asInstanceOf[Inet6Address],
      new Date()
    ))
  }

  "Product" should "decode from string and encode to string" in {
    import product.vox.shop.Codecs._
    val js = """{"name":"Recharge Cards (5 PIN)","prices":[{"cost":0.0187,"currency":"USD","moq":200000}],"eid":"iso-card-5-pin","description":"<p>ISO card, 5 PINs, printed 4 colour front and back</p>\n<p>Every card option shown below meets Tier 1 operator quality standards, at a competitive pricing including freight to your country that’s always openly visible, with streamlined fulfillment and support included, creating what we believe is the best overall value at the lowest total cost of ownership in the industry.</p>\n<p>Material:        Cardboard 300 GSM, UV varnish both sides</p>\n<p>Scratch panel:   Silver/Black Ink with black overprint</p> \n<p>Individually plastic wrapped in chain of 50 cards</p>\n<p>Small boxes of 500 cards, Master Carton of 5000 cards</p>\n<p>Alternate names: Scratch cards, RCV, top-up cards</p>\n","properties":[{"name":"Overscratch Protection","options":[{"name":"No protection"},{"name":"Protective measures against over scratching","prices":[{"cost":0.0253,"currency":"USD","moq":200000},{"cost":0.021,"currency":"USD","moq":500000},{"cost":0.02,"currency":"USD","moq":1000000},{"cost":0.0188,"currency":"USD","moq":5000000,"leadtime":21},{"cost":0.0173,"currency":"USD","moq":10000000},{"cost":0.0171,"currency":"USD","moq":50000000,"leadtime":28}]}]},{"name":"Payment terms","options":[{"name":"Payment on shipment readiness"},{"name":"Net 30 (subject to approval)"}]},{"name":"Order Timing","options":[{"name":"Ship order when ready"},{"name":"Pre-order for shipment in 3 months"}]}],"client":"112","sample":{"price":{"cost":250,"currency":"USD"}},"category":"recharge_cards","leadtime":14,"imageUrl":["https://d2w2n7dk76p3lq.cloudfront.net/product_image/recharge_cards/iso-5pin.png"],"types":[{"name":"Recharge Cards (5 PIN)","prices":[{"cost":0.0187,"currency":"USD","moq":200000},{"cost":0.0175,"currency":"USD","moq":500000},{"cost":0.0162,"currency":"USD","moq":1000000},{"cost":0.0153,"currency":"USD","moq":5000000,"leadtime":21},{"cost":0.0138,"currency":"USD","moq":10000000,"leadtime":28},{"cost":0.0137,"currency":"USD","moq":50000000,"leadtime":28}]}],"presentation":1000}"""
    val po = js.decodeValidation[Product]
    println(po)
    po.isSuccess shouldBe true
    test(po.toOption.get)
  }


  def test[T: CodecJson](value: T) = {
    val json = value.asJson
    println(json)
    json.jdecode[T] shouldBe DecodeResult.ok(value)
  }
}

org.scalatest.FlatSpec Scala Examples