java.sql.Timestamp Scala Example

Source File: SparkRandomGenDataIngress.scala From pipelines-examples with Apache License 2.0

5 votes

package pipelines.example

import java.sql.Timestamp

import scala.util.Random

import pipelines.streamlets.{ DurationConfigParameter, IntegerConfigParameter, StreamletShape }
import pipelines.streamlets.avro._
import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.{ OutputMode, Trigger }

import pipelines.spark.sql.SQLImplicits._

case class Rate(timestamp: Timestamp, value: Long)

class SparkRandomGenDataIngress extends SparkStreamlet {
  val out = AvroOutlet[Data]("out", d ⇒ d.src)
  val shape = StreamletShape(out)

  val RecordsPerSecond = IntegerConfigParameter(
    "records-per-second",
    "Records per second to produce.",
    Some(50))

  val RampUpTime = DurationConfigParameter(
    "ramp-up-time",
    "Time to reach max records per second.",
    Some("0 seconds"))

  override def configParameters = Vector(RecordsPerSecond, RampUpTime)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries = {
      writeStream(process, out, OutputMode.Append).toQueryExecution
    }

    private def process: Dataset[Data] = {

      val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
      val rampUpTime = context.streamletConfig.getDuration(RampUpTime.key, java.util.concurrent.TimeUnit.SECONDS)
      println(s"Using rampup time of $rampUpTime seconds")

      val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas"

      val rateStream = session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .option("rampUpTime", s"${rampUpTime}s")
        .load()
        .as[Rate]

      rateStream.map {
        case Rate(timestamp, value) ⇒ Data(s"src-${value % 1000}", timestamp.getTime, None, None, gaugeGen(), value)
      }
    }
  }
}

Source File: TransactionsFlowUnitTest.scala From kafka-examples with Apache License 2.0

5 votes

package com.cloudera.streaming.refapp

import java.sql.Timestamp

import org.scalatest.BeforeAndAfter

import org.apache.spark.sql.execution.streaming.MemoryStream

class TransactionsFlowUnitTest extends UnitTestBase with BeforeAndAfter {
  import testImplicits._

  var transactionsFromStream: MemoryStream[Transaction] = _
  var transactiosnFlow: TransactionsFlow = _

  before {
    transactionsFromStream = MemoryStream[Transaction]
    transactiosnFlow = new TransactionsFlow(
      spark,
      statesFromCluster,
      customersFromCluster,
      vendorsFromCluster,
      transactionsFromStream = transactionsFromStream
        .toDF.withColumn("timestamp", $"event_timestamp".cast("timestamp")))
  }

  test("Valid records are written to the validTransactions output") {

    val validTransaction = Transaction(
      transaction_id = "1",
      customer_id = Some(1),
      vendor_id = Some(1),
      event_state = Some("CREATED"),
      event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
      price = Some("100"),
      card_type = Some("Credit"))

    testStream(transactiosnFlow.validTransactions.select('transaction_id, 'customer_id, 'vendor_id, 'event_state, 'event_timestamp, 'price, 'card_type)) (
      AddData(transactionsFromStream, validTransaction),
      CheckAnswer(validTransaction)
    )
  }

  test("Invalid records are written to the invalidTransactions output") {
    // Note: transactionsFlow.validTransactions and invalidTransactions contain the fields that we used for internal calculations, e.g. for validation
    // It enables us to check the internal calculations
    testStream(transactiosnFlow.invalidTransactions.select('transaction_id, 'valid_card_type)) (
      AddData(transactionsFromStream,
        Transaction(
          transaction_id = "2",
          customer_id = Some(1),
          vendor_id = Some(1),
          event_state = Some("CREATED"),
          event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
          price = Some("100"),
          card_type = Some("Invalid"))),
      CheckAnswer(("2", false))
    )
  }

}

Source File: LocalIntegrationTest.scala From kafka-examples with Apache License 2.0

5 votes

package com.cloudera.streaming.refapp

import java.sql.Timestamp

import org.scalatest.Matchers._
import org.scalatest.concurrent.Eventually._
import org.scalatest.time.{Seconds, Span}

import org.apache.spark.sql.Encoders

class LocalIntegrationTest extends IntegrationTestBase {

  test("Integration test with one kafka and one spark instance embedded in the same JVM") {

    val inputDir = "src/test/resources/samples"

    val spark = EmbeddedSpark.sparkSession

    val fileSource = new FileSources(spark, inputDir)
    val kafkaConfig = EmbeddedKafkaBroker.defaultKafkaConfig
    val kafkaSource = new KafkaSource(spark, kafkaConfig)

    val application = new Application(
      spark,
      Sources(
        statesFromCluster = fileSource.jsonFile("states"),
        customersFromCluster = fileSource.jsonFile("customers"),
        vendorsFromCluster = fileSource.jsonFile("vendors"),
        customersFromStream = kafkaSource.jsonStreamWithKafkaTimestamp("customer"),
        vendorsFromStream = kafkaSource.jsonStreamWithTimestampFromMessage("vendor", "update_timestamp"),
        transactionsFromStream = kafkaSource.jsonStreamWithTimestampFromMessage("transaction", "event_timestamp")
      ),
      Sinks(
        invalidTransactions = Memory.memorySink("invalidTransactions"),
        validTransactions = Memory.memorySink("validTransactions"),
        customerOrphans = Memory.memorySink("customerOrphans"),
        vendorOrphans = Memory.memorySink("vendorOrphans"),
        customers = Memory.memorySink("customers"),
        vendors = Memory.memorySink("vendors"),
        transactionsOperationalMetadata = Memory.memorySink("transactionsOperationalMetadata")
      ))

    application.start()

    eventually(timeout(Span(20, Seconds)), interval(Span(5, Seconds))) {
      EmbeddedKafkaBroker.publishStringMessageToKafka(
        "transaction",
        """{
          "transaction_id": "1",
          "customer_id": 1,
          "vendor_id": 1,
          "event_state": "CREATED",
          "event_timestamp": "2018-11-12 09:42:00",
          "price": "100",
          "card_type": "Credit"}""")
      EmbeddedKafkaBroker.publishStringMessageToKafka(
        "transaction",
        """{
          "transaction_id": "21",
          "customer_id": 100,
          "vendor_id": 2,
          "event_state": "SWIPED",
          "event_timestamp": "2018-11-13 09:45:01",
          "price": "100",
          "card_type": "Debit"}""")

      val validTransactionsQuery = application.streamingQueries.validTransactions
      validTransactionsQuery.processAllAvailable()
      val currentContent = spark.table("validTransactions").as[Transaction](Encoders.product).collect()

      currentContent.shouldBe(
        Array(
          Transaction(
            transaction_id = "1",
            customer_id = Some(1),
            vendor_id = Some(1),
            event_state = Some("CREATED"),
            event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
            price = Some("100"),
            card_type = Some("Credit")),
          Transaction(
            transaction_id = "21",
            customer_id = Some(100),
            vendor_id = Some(2),
            event_state = Some("SWIPED"),
            event_timestamp = Timestamp.valueOf("2018-11-13 09:45:01"),
            price = Some("100"),
            card_type = Some("Debit"))
        ))
    }
  }
}

Source File: TypeCast.scala From spark-google-spreadsheets with Apache License 2.0

5 votes

package com.github.potix2.spark.google.spreadsheets.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheets] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
}

Source File: ProcessMarshaller.scala From sundial with MIT License

5 votes

package dao.postgres.marshalling

import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp}
import java.util.UUID
import dao.postgres.common.ProcessTable
import model.{Process, ProcessStatus}
import util.JdbcUtil._

object ProcessMarshaller {

  def unmarshalProcess(rs: ResultSet): Process = {
    import ProcessTable._
    Process(
      id = rs.getObject(COL_ID).asInstanceOf[UUID],
      processDefinitionName = rs.getString(COL_DEF_NAME),
      startedAt = javaDate(rs.getTimestamp(COL_STARTED)),
      status = rs.getString(COL_STATUS) match {
        case STATUS_SUCCEEDED =>
          ProcessStatus.Succeeded(javaDate(rs.getTimestamp(COL_ENDED_AT)))
        case STATUS_FAILED =>
          ProcessStatus.Failed(javaDate(rs.getTimestamp(COL_ENDED_AT)))
        case STATUS_RUNNING => ProcessStatus.Running()
      },
      taskFilter = getStringArray(rs, COL_TASK_FILTER)
    )
  }

  def marshalProcess(process: Process,
                     stmt: PreparedStatement,
                     columns: Seq[String],
                     startIndex: Int = 1)(implicit conn: Connection) = {
    import ProcessTable._
    var index = startIndex
    columns.foreach { col =>
      col match {
        case COL_ID => stmt.setObject(index, process.id)
        case COL_DEF_NAME =>
          stmt.setString(index, process.processDefinitionName)
        case COL_STARTED =>
          stmt.setTimestamp(index, new Timestamp(process.startedAt.getTime()))
        case COL_ENDED_AT =>
          stmt.setTimestamp(index, process.endedAt.getOrElse(null))
        case COL_STATUS =>
          stmt.setString(
            index,
            process.status match {
              case ProcessStatus.Succeeded(_) => STATUS_SUCCEEDED
              case ProcessStatus.Failed(_)    => STATUS_FAILED
              case ProcessStatus.Running()    => STATUS_RUNNING
            }
          )
        case COL_TASK_FILTER =>
          stmt.setArray(index,
                        process.taskFilter.map(makeStringArray).getOrElse(null))
      }
      index += 1
    }
  }

}

Source File: JdbcUtil.scala From sundial with MIT License

5 votes

package util

import java.sql.{Connection, Timestamp, ResultSet}
import java.util.Date
import scala.language.implicitConversions

object JdbcUtil {

  implicit def resultSetItr(resultSet: ResultSet): Stream[ResultSet] = {
    new Iterator[ResultSet] {
      def hasNext = resultSet.next()
      def next() = resultSet
    }.toStream
  }

  implicit def javaDate(ts: Timestamp): Date = {
    new Date(ts.getTime())
  }

  implicit def dateToTimestamp(date: Date) = {
    if (date != null)
      new Timestamp(date.getTime())
    else
      null
  }

  private def getNullable[T](rs: ResultSet, f: ResultSet => T): Option[T] = {
    val obj = f(rs)
    if (rs.wasNull()) {
      Option.empty
    } else {
      Some(obj)
    }
  }

  def getIntOption(rs: ResultSet, col: String) =
    getNullable(rs, rs => rs.getInt(col))

  def makeStringArray(seq: Seq[String])(implicit conn: Connection) = {
    conn.createArrayOf("varchar", seq.toArray[AnyRef])
  }

  def getStringArray(rs: ResultSet, col: String) = {
    Option(rs.getArray(col))
      .map(_.getArray().asInstanceOf[Array[String]].toList)
  }

}

Source File: SchedulerDataManager.scala From cave with MIT License

5 votes

package com.cave.metrics.data.postgresql

import java.sql.Timestamp

import com.cave.metrics.data.AwsConfig
import com.cave.metrics.data.postgresql.Tables._
import org.joda.time.format.DateTimeFormat
import org.joda.time.DateTime

import scala.slick.jdbc.{GetResult, StaticQuery => Q}
import scala.slick.driver.PostgresDriver.simple._

class SchedulerDataManager(awsConfig: AwsConfig) extends DatabaseConnection(awsConfig) {

  def leadershipTermTimeoutSeconds = awsConfig.leadershipTermTimeoutSeconds
  def leadershipTermLengthSeconds = awsConfig.leadershipTermLengthSeconds

  def DBDateTimeFormatter = DateTimeFormat.forPattern("YYYY-MM-dd HH:mm:ss Z")

  implicit val getSchedulersResult = GetResult(r => SchedulersRow(r.<<, r.<<, r.<<))

  
  def takeLeadership(hostname: String): Boolean = {
    db.withTransaction { implicit session =>
      val termTimeout = new DateTime().minusSeconds(leadershipTermTimeoutSeconds)
      val timeoutSql = DBDateTimeFormatter.print(termTimeout)
      val sql = s"BEGIN; SELECT * FROM schedulers WHERE created_at < '$timeoutSql' FOR UPDATE"
      val query = Q.queryNA[SchedulersRow](sql)

      def updateTimestamp(): Boolean = Schedulers.filter(_.createdAt < new Timestamp(termTimeout.getMillis))
        .map(s => (s.name, s.createdAt)).update(hostname, new Timestamp(System.currentTimeMillis())) == 1

      try {
        query.list.length == 1 &&
          (updateTimestamp() || {
            session.rollback()
            false
          })
      } catch {
        case e: Exception =>
          log.error(e)
          session.rollback()
          false
      }
    }
  }
}

Source File: SchedulerDataManagerSpec.scala From cave with MIT License

5 votes

package com.cave.metrics.data.postgresql

import java.sql.Timestamp

import com.cave.metrics.data.postgresql.Tables._
import org.joda.time.format.DateTimeFormat
import org.scalatest.BeforeAndAfter
import scala.slick.driver.H2Driver.simple._
import scala.slick.jdbc.StaticQuery

class SchedulerDataManagerSpec extends AbstractDataManagerSpec with BeforeAndAfter {
  val hostname_1 = "host1"
  val hostname_2 = "host2"
  val hostname_3 = "host3"

  var dm: SchedulerDataManager = _

  before {
    dm = new SchedulerDataManager(awsConfig) {
      override def DBDateTimeFormatter = DateTimeFormat.forPattern("YYYY-MM-dd HH:mm:ss")

      override def leadershipTermTimeoutSeconds = 30
    }
    Schedulers += SchedulersRow(1, "initialValue", new Timestamp(System.currentTimeMillis() - 1000 * 60))
  }

  "Scheduler Data Manager" should "update Schedulers table" in {
    Schedulers.list.head.name should be("initialValue")

    assert(dm.takeLeadership(hostname_1), "Expected success")
    Schedulers.list.head.name should be(hostname_1)

    assert(dm.takeLeadership(hostname_3) == false, "Expected success")
    Schedulers.list.head.name should be(hostname_1)

    assert(dm.extendLeadership(hostname_2) == false, "Expected success")
    Schedulers.list.head.name should be(hostname_1)

    Thread.sleep(1500)
    assert(dm.extendLeadership(hostname_1), "A-hostname was not able to extend its leadership")
    Schedulers.list.head.name should be(hostname_1)
  }

  it should "not update the leader if one is active" in {
    StaticQuery.queryNA("truncate table SCHEDULERS").execute
    Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 20))
    Schedulers.list.length should be(1)
    assert(!dm.takeLeadership(hostname_2), "Expected failure")
    Schedulers.list.head.name should be(hostname_1)

    Thread.sleep(100)
    assert(dm.extendLeadership(hostname_1), "Expected success")
    Schedulers.list.head.name should be(hostname_1)

  }

  it should "not give leadership to host3 when host2 is the leader" in {
    StaticQuery.queryNA("truncate table SCHEDULERS").execute
    Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 31))
    Schedulers.list.length should be(1)
    assert(dm.takeLeadership(hostname_2), "Expected success")
    Schedulers.list.head.name should be(hostname_2)


    assert(!dm.takeLeadership(hostname_3), "Expected failure")
    Schedulers.list.head.name should be(hostname_2)

    assert(!dm.takeLeadership(hostname_1), "Expected failure")
    Schedulers.list.head.name should be(hostname_2)
  }

  it should "be thread safe" in {
    StaticQuery.queryNA("truncate table SCHEDULERS").execute
    Schedulers.list.length should be(0)
    Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 360))
    Schedulers.list.length should be(1)
    Schedulers.list.head.name should be(hostname_1)

    import scala.slick.jdbc.{GetResult, StaticQuery => Q}

    
    val sql = s"BEGIN; select * from SCHEDULERS FOR UPDATE"
    val query = Q.queryNA[SchedulersRow](sql)
    query.list.length should be(1)


    assert(!dm.takeLeadership(hostname_1), "Expected failure")
    assert(!dm.takeLeadership(hostname_2), "Expected failure")
    assert(!dm.takeLeadership(hostname_3), "Expected failure")
    assert(!dm.extendLeadership(hostname_1), "Expected failure")
    assert(!dm.extendLeadership(hostname_2), "Expected failure")
    assert(!dm.extendLeadership(hostname_3), "Expected failure")

    Schedulers.list.head.name should be(hostname_1)
  }
}

Source File: TimeColumnBuffer.scala From spark-vector with Apache License 2.0

5 votes

package com.actian.spark_vector.colbuffer.time

import java.nio.ByteBuffer
import java.sql.Timestamp
import java.util.{ Calendar, TimeZone }

import org.apache.spark.sql.catalyst.util.DateTimeUtils

import com.actian.spark_vector.ComposePartial
import com.actian.spark_vector.colbuffer._
import com.actian.spark_vector.colbuffer.util._
import com.actian.spark_vector.vector.VectorDataType

private case class TimeColumnBufferParams(cbParams: ColumnBufferBuildParams,
  converter: TimeConversion.TimeConverter,
  adjustToUTC: Boolean = false)

private[colbuffer] abstract class TimeColumnBuffer(p: TimeColumnBufferParams, valueWidth: Int)
    extends ColumnBuffer[Timestamp, Long](p.cbParams.name, p.cbParams.maxValueCount, valueWidth, valueWidth, p.cbParams.nullable) {
  private val ts = new Timestamp(System.currentTimeMillis())
  private val cal = Calendar.getInstance

  override def put(source: Timestamp, buffer: ByteBuffer): Unit = {
    if (p.adjustToUTC) {
      TimeConversion.convertLocalTimestampToUTC(source, cal)
    }
    val convertedSource = p.converter.convert(TimeConversion.normalizeTime(source), p.cbParams.scale)
    putConverted(convertedSource, buffer)
  }

  protected def putConverted(converted: Long, buffer: ByteBuffer): Unit

  override def get(buffer: ByteBuffer): Long = {
    val deconvertedSource = p.converter.deconvert(getConverted(buffer), p.cbParams.scale)
    ts.setTime(TimeConversion.scaleNanos(deconvertedSource, MillisecondsScale))
    ts.setNanos((deconvertedSource % PowersOfTen(NanosecondsScale)).toInt)
    if (p.adjustToUTC) {
      TimeConversion.convertUTCToLocalTimestamp(ts, cal)
    }
    DateTimeUtils.fromJavaTimestamp(ts)
  }

  protected def getConverted(buffer: ByteBuffer): Long
}

private class TimeIntColumnBuffer(p: TimeColumnBufferParams) extends TimeColumnBuffer(p, IntSize) {
  override protected def putConverted(converted: Long, buffer: ByteBuffer): Unit = buffer.putInt(converted.toInt)

  override protected def getConverted(buffer: ByteBuffer): Long = buffer.getInt()
}

private class TimeLongColumnBuffer(p: TimeColumnBufferParams) extends TimeColumnBuffer(p, LongSize) {
  override protected def putConverted(converted: Long, buffer: ByteBuffer): Unit = buffer.putLong(converted)

  override protected def getConverted(buffer: ByteBuffer): Long = buffer.getLong()
}

private class TimeNZLZConverter extends TimeConversion.TimeConverter {
  override def convert(unscaledNanos: Long, scale: Int): Long = TimeConversion.scaleNanos(unscaledNanos, scale)

  override def deconvert(scaledNanos: Long, scale: Int): Long = TimeConversion.unscaleNanos(scaledNanos, scale)
}

private class TimeTZConverter extends TimeConversion.TimeConverter {
  override def convert(unscaledNanos: Long, scale: Int): Long =
    (TimeConversion.scaleNanos(unscaledNanos, scale) << TimeMaskSize)

  override def deconvert(scaledNanos: Long, scale: Int): Long =
    TimeConversion.unscaleNanos(scaledNanos >> TimeMaskSize, scale)
}


private[colbuffer] object TimeColumnBuffer extends ColumnBufferBuilder {
  private final val (nzlzIntScaleBounds, nzlzLongScaleBounds) = ((0, 4), (5, 9))
  private final val (tzIntScaleBounds, tzLongScaleBounds) = ((0, 1), (2, 9))
  private val calIsNotUTC = Calendar.getInstance.getTimeZone != TimeZone.getTimeZone("UTC")

  private val buildNZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] =
    ofDataType(VectorDataType.TimeType) andThen { TimeColumnBufferParams(_, new TimeNZLZConverter(), calIsNotUTC) }

  private val buildLZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] =
    ofDataType(VectorDataType.TimeLTZType) andThen { TimeColumnBufferParams(_, new TimeNZLZConverter()) }

  private val buildNZLZ: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = (buildNZPartial orElse buildLZPartial) andThenPartial {
    case nzlz if isInBounds(nzlz.cbParams.scale, nzlzIntScaleBounds) => new TimeIntColumnBuffer(nzlz)
    case nzlz if isInBounds(nzlz.cbParams.scale, nzlzLongScaleBounds) => new TimeLongColumnBuffer(nzlz)
  }

  private val buildTZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] =
    ofDataType(VectorDataType.TimeTZType) andThen { TimeColumnBufferParams(_, new TimeTZConverter()) }

  private val buildTZ: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = buildTZPartial andThenPartial {
    case tz if isInBounds(tz.cbParams.scale, tzIntScaleBounds) => new TimeIntColumnBuffer(tz)
    case tz if isInBounds(tz.cbParams.scale, tzLongScaleBounds) => new TimeLongColumnBuffer(tz)
  }

  override private[colbuffer] val build: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = buildNZLZ orElse buildTZ
}

Source File: package.scala From spark-vector with Apache License 2.0

5 votes

package com.actian.spark_vector.colbuffer

import java.sql.Timestamp


package object util {
  // scalastyle:off magic.number
  final val PowersOfTen = Seq(1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000)
  final val SecondsBeforeEpoch = 62167219200L
  final val TimeMaskSize = 11
  final val SecondsInMinute = 60
  final val MinutesInHour = 60
  final val HoursInDay = 24
  final val SecondsInDay = SecondsInMinute * MinutesInHour * HoursInDay
  final val MillisecondsScale = 3
  final val MillisecondsInMinute = SecondsInMinute * PowersOfTen(MillisecondsScale)
  final val MillisecondsInHour = MinutesInHour * MillisecondsInMinute
  final val MillisecondsInDay = HoursInDay * MillisecondsInHour
  final val NanosecondsScale = 9
  final val NanosecondsInMinute = (MillisecondsInMinute.toLong * PowersOfTen(NanosecondsScale - MillisecondsScale))
  final val NanosecondsInHour = MinutesInHour * NanosecondsInMinute
  final val NanosecondsInDay = HoursInDay * NanosecondsInHour
  // scalastyle:on magic.number

  def floorDiv(x: Long, y: Long): Long = {
    val ret = x / y
    if (ret >= 0 || ret * y == x) ret else ret - 1
  }
}

Source File: PackageSpec.scala From sparkpipe-core with Apache License 2.0

5 votes

package software.uncharted.sparkpipe.ops.core.dataframe.temporal

import org.scalatest._
import software.uncharted.sparkpipe.Spark
import software.uncharted.sparkpipe.ops.core.rdd.toDF

import java.text.SimpleDateFormat
import java.sql.Timestamp

class PackageSpec extends FunSpec {
  describe("ops.core.dataframe.temporal") {
    val rdd = Spark.sc.parallelize(Seq(
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18").getTime), "2015-11-18", 1),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19").getTime), "2015-11-19", 2),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-20").getTime), "2015-11-20", 3),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21").getTime), "2015-11-21", 4)
    ))
    val df = toDF(Spark.sparkSession)(rdd)

    describe("#dateFilter()") {
      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21"),
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 3)
      }

      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range, specified using strings") {
        val df2 = dateFilter(
          "2015-11-19",
          "2015-11-20",
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 2)
      }

      it("should support filtering rows in an input DataFrame with a Timestamp timestamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-17"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18"),
          "_1"
        )(df)
        assert(df2.count == 1)
      }
    }

    describe("#parseDate()") {
      it("should facilitate converting a string timestamp column into a TimestampType and adding it as a new column") {
        val df2 = parseDate("_2", "new", "yyyy-MM-dd")(df)
        assert(df2.filter("new = _1").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }

    describe("#dateField()") {
      it("should facilitate extracting a single field from a Timestamp column, and placing it a new column") {
        val df2 = dateField("_1", "new", java.util.Calendar.YEAR)(df)
        assert(df2.filter("new = 2015").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }
  }
}

Source File: KafkaStructuredStreamingDemo.scala From MaxCompute-Spark with Apache License 2.0

5 votes

package com.aliyun.odps.spark.examples.structuredStreaming.kafka

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.window

object KafkaStructuredStreamingDemo{
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName("KafkaStreamingDemo")
      .getOrCreate()

    import spark.implicits._

    val df = spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", "localhost:9092")
      .option("subscribe", "topic")
      .load()

    
    // 请使用OSS作为Checkpoint存储
    val checkpointLocation3 = "oss://bucket/checkpoint3/"

    val windowedCountsWithWatermark = wordsWithTimestamp
      .withWatermark("timestamp", "5 seconds")
      .groupBy(
        window($"timestamp", "6 seconds", "3 seconds"),
        $"word"
      ).count()

    val query3 = windowedCountsWithWatermark.writeStream
      .outputMode("append")
      .format("console")
      .option("checkpointLocation", checkpointLocation3)
      .start()

    query3.awaitTermination()
  }
}

Source File: TimestampVectorWriter.scala From stream-reactor with Apache License 2.0

5 votes

package com.landoop.streamreactor.connect.hive.orc.vectors

import java.sql.Timestamp

import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector

object TimestampVectorWriter extends OrcVectorWriter[TimestampColumnVector, Timestamp] {
  override def write(vector: TimestampColumnVector, offset: Int, value: Option[Timestamp]): Unit = {
    value match {
      case Some(ts) =>
        vector.set(offset, value.asInstanceOf[Timestamp])
      case _ =>
        vector.setNullValue(offset)
        vector.noNulls = false
        vector.isNull(offset) = true
    }
  }
}

Source File: CallRecordGeneratorIngress.scala From pipelines-examples with Apache License 2.0

5 votes

package pipelines.examples.carly.aggregator

import java.sql.Timestamp

import scala.util.Random
import scala.concurrent.duration._

import org.apache.spark.sql.{ Dataset, SparkSession }
import org.apache.spark.sql.streaming.OutputMode

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.LongType

import pipelines.streamlets._
import pipelines.streamlets.avro._
import pipelines.spark.sql.SQLImplicits._
import pipelines.examples.carly.data.CallRecord
import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.log4j.{ Level, Logger }

case class Rate(timestamp: Timestamp, value: Long)

class CallRecordGeneratorIngress extends SparkStreamlet {

  val rootLogger = Logger.getRootLogger()
  rootLogger.setLevel(Level.ERROR)

  val RecordsPerSecond = IntegerConfigParameter(
    "records-per-second",
    "Records per second to process.",
    Some(50))

  override def configParameters = Vector(RecordsPerSecond)

  val out = AvroOutlet[CallRecord]("out", _.user)
  val shape = StreamletShape(out)

  override def createLogic() = new SparkStreamletLogic {
    val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
    override def buildStreamingQueries = {
      val outStream = DataGenerator.mkData(super.session, recordsPerSecond)
      writeStream(outStream, out, OutputMode.Append).toQueryExecution
    }
  }
}

object DataGenerator {
  def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = {
    // do we need to expose this through configuration?

    val MaxTime = 2.hours.toMillis
    val MaxUsers = 100000
    val TS0 = new java.sql.Timestamp(0)
    val ZeroTimestampProb = 0.05 // error rate

    // Random Data Generator
    val usersUdf = udf(() ⇒ "user-" + Random.nextInt(MaxUsers))
    val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing")

    // Time-biased randomized filter - 1/2 hour cycles
    val sinTime: Long ⇒ Double = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI)
    val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob
    val timeFilterUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng))
    val zeroTimestampUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ {
      if (rng < ZeroTimestampProb) {
        TS0
      } else {
        ts
      }
    })

    val rateStream = session.readStream
      .format("rate")
      .option("rowsPerSecond", recordsPerSecond)
      .load()
      .as[Rate]

    val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand())
    val sampledData = randomDataset.where(timeFilterUdf($"timestamp", $"rng"))
      .withColumn("user", usersUdf())
      .withColumn("other", usersUdf())
      .withColumn("direction", directionUdf())
      .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType))
      .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng"))
      .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp" as "timestamp")
      .as[CallRecord]
    sampledData
  }
}

Source File: SparkRandomGenDataIngress.scala From pipelines-examples with Apache License 2.0

5 votes

package pipelines.example

import java.sql.Timestamp

import scala.util.Random

import pipelines.streamlets.{ IntegerConfigParameter, StreamletShape }
import pipelines.streamlets.avro._
import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode

import pipelines.spark.sql.SQLImplicits._

case class Rate(timestamp: Timestamp, value: Long)

class SparkRandomGenDataIngress extends SparkStreamlet {
  val out = AvroOutlet[Data]("out", d ⇒ d.src)
  val shape = StreamletShape(out)

  val RecordsPerSecond = IntegerConfigParameter(
    "records-per-second",
    "Records per second to produce.",
    Some(50))

  override def configParameters = Vector(RecordsPerSecond)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries = {
      writeStream(process, out, OutputMode.Append).toQueryExecution
    }

    private def process: Dataset[Data] = {

      val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)

      val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas"

      val rateStream = session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .load()
        .as[Rate]

      rateStream.map {
        case Rate(timestamp, value) ⇒ Data(s"src-${value % 100}", timestamp.getTime, gaugeGen(), Random.nextDouble() * value)
      }
    }
  }
}

Source File: PSetAny.scala From yoda-orm with MIT License

5 votes

package in.norbor.yoda.orm

import java.sql.{Blob, Timestamp}

import org.joda.time.DateTime


trait PSetAny {

  def set(p: PStatement, v: Any): PStatement = v match {
    case _: Boolean => p.setBoolean(v.asInstanceOf[Boolean])
    case _: Int => p.setInt(v.asInstanceOf[Int])
    case _: Long => p.setLong(v.asInstanceOf[Long])
    case _: Float => p.setDouble(v.asInstanceOf[Double])
    case _: Double => p.setDouble(v.asInstanceOf[Double])
    case _: String => p.setString(v.asInstanceOf[String])
    case _: Timestamp => p.setTimestamp(v.asInstanceOf[Timestamp])
    case _: DateTime => p.setDateTime(v.asInstanceOf[DateTime])
    case _: Blob => p.setBlob(v.asInstanceOf[Blob])
    case _: Array[Byte] => p.setBytes(v.asInstanceOf[Array[Byte]])
    case _ => p;
  }

}

Source File: PStatementTest.scala From yoda-orm with MIT License

5 votes

package in.norbor.yoda.orm

import java.sql.{Connection, DriverManager, ResultSet, Timestamp}

import com.typesafe.scalalogging.LazyLogging
import in.norbor.yoda.implicits.JavaSqlImprovement._
import mocks.People
import org.joda.time.DateTime
import org.scalatest.funsuite.AnyFunSuite


class PStatementTest extends AnyFunSuite {

  Class.forName("org.h2.Driver")

  private implicit val conn: Connection = DriverManager.getConnection("jdbc:h2:~/test", "sa", "")

  test("0) apply") {

    val ps = PStatement("SELECT 1")(conn)
    assert(ps !== null)

    ps.equals(null)
    ps.canEqual(null)
    ps.hashCode
    ps.toString
    ps.productPrefix
    ps.productArity
    ps.productElement(0)
    ps.productIterator
    ps.copy()
  }

  test("0) query") {

    PStatement("DROP TABLE IF EXISTS yoda_sql; CREATE TABLE yoda_sql (id INTEGER);")
      .update
  }

  test("0) update") {

    val rs = PStatement("""select 1""")
      .query

    assert(rs !== null)
  }

  test("0) queryOne with non index parameter") {

    val result = PStatement("""select ?, ?, ?, ?, ?, ?, ?, ?""")
      .setBoolean(true)
      .setInt(1)
      .setLong(1L)
      .setDouble(1)
      .setString("YO")
      .setDateTime(DateTime.now)
      .setTimestamp(new Timestamp(System.currentTimeMillis))
      .setTimestamp(null)
      .queryOne(parse)

    assert(result.head._1 === true)
  }

  test("3) queryList with parse method") {

    val peoples = PStatement("""select 1 as id, 'Peerapat' as name, now() as born;""")
      .queryList(parsePeople)

    assert(peoples.head.id === 1)
    assert(peoples.head.name === "Peerapat")
    assert(peoples.head.born.getMillis <= DateTime.now.getMillis)
  }

  test("5) batch") {

    val insert = PStatement("INSERT INTO yoda_sql VALUES(?)")
      .setInt(1)
      .addBatch()
      .setInt(2)
      .addBatch()
      .executeBatch

    assert(insert.length === 2)
  }


  private def parse(rs: ResultSet): (Boolean, Int, Long, Double, String, DateTime, Timestamp) = (rs.getBoolean(1)
    , rs.getInt(2)
    , rs.getLong(3)
    , rs.getDouble(4)
    , rs.getString(5)
    , rs.getDateTime(6)
    , rs.getTimestamp(7)
  )

  private def parsePeople(rs: ResultSet): People = People(id = rs.getLong("id")
    , name = rs.getString("name")
    , born = rs.getDateTime("born")
  )

}

Source File: DateTimeConverter.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.commons.datetime

import java.sql.Timestamp

import org.joda.time.format.{DateTimeFormatter, ISODateTimeFormat}
import org.joda.time.{DateTime, DateTimeZone}

trait DateTimeConverter {
  val zone: DateTimeZone = DateTimeZone.getDefault
  val dateTimeFormatter: DateTimeFormatter = ISODateTimeFormat.dateTime()
  def toString(dateTime: DateTime): String = dateTime.toString(dateTimeFormatter)
  def parseDateTime(s: String): DateTime = dateTimeFormatter.parseDateTime(s).withZone(zone)
  def parseTimestamp(s: String): Timestamp = new Timestamp(parseDateTime(s).getMillis)
  def now: DateTime = new DateTime(zone)
  def fromMillis(millis: Long): DateTime = new DateTime(zone).withMillis(millis)
  def dateTime(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, zone)
  def dateTimeFromUTC(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(
      year,
      monthOfyear,
      dayOfMonth,
      hourOfDay,
      minutesOfHour,
      secondsOfMinute,
      DateTimeZone.UTC).withZone(DateTimeConverter.zone)
}

object DateTimeConverter extends DateTimeConverter

Source File: CsvSchemaStringifierBeforeCsvWriting.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import io.deepsense.commons.datetime.DateTimeConverter
import io.deepsense.deeplang.ExecutionContext
import io.deepsense.deeplang.doperables.dataframe.DataFrame
import io.deepsense.deeplang.doperations.exceptions.UnsupportedColumnTypeException


object CsvSchemaStringifierBeforeCsvWriting {

  def preprocess(dataFrame: DataFrame)
                (implicit context: ExecutionContext): DataFrame = {
    requireNoComplexTypes(dataFrame)

    val schema = dataFrame.sparkDataFrame.schema
    def stringifySelectedTypes(schema: StructType): StructType = {
      StructType(
        schema.map {
          case field: StructField => field.copy(dataType = StringType)
        }
      )
    }

    context.dataFrameBuilder.buildDataFrame(
      stringifySelectedTypes(schema),
      dataFrame.sparkDataFrame.rdd.map(stringifySelectedCells(schema)))
  }

  private def requireNoComplexTypes(dataFrame: DataFrame): Unit = {
    dataFrame.sparkDataFrame.schema.fields.map(structField =>
      (structField.dataType, structField.name)
    ).foreach {
      case (dataType, columnName) => dataType match {
        case _: ArrayType | _: MapType | _: StructType =>
          throw UnsupportedColumnTypeException(columnName, dataType)
        case _ => ()
      }
    }

  }

  private def stringifySelectedCells(originalSchema: StructType)(row: Row): Row = {
    Row.fromSeq(
      row.toSeq.zipWithIndex map { case (value, index) =>
        (value, originalSchema(index).dataType) match {
          case (null, _) => ""
          case (_, BooleanType) =>
            if (value.asInstanceOf[Boolean]) "1" else "0"
          case (_, TimestampType) =>
            DateTimeConverter.toString(
              DateTimeConverter.fromMillis(value.asInstanceOf[Timestamp].getTime))
          case (x, _) => value.toString
        }
      })
  }

}

Source File: WriteReadDataFrameWithDriverFilesIntegSpec.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperations

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalatest.BeforeAndAfter

import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}
import io.deepsense.deeplang.doperables.dataframe.DataFrame
import io.deepsense.deeplang.doperations.inout._

class WriteReadDataFrameWithDriverFilesIntegSpec
  extends DeeplangIntegTestSupport
  with BeforeAndAfter with TestFiles {

  import DeeplangIntegTestSupport._

  val schema: StructType =
    StructType(Seq(
      StructField("boolean", BooleanType),
      StructField("double", DoubleType),
      StructField("string", StringType)
    ))

  val rows = {
    val base = Seq(
      Row(true, 0.45, "3.14"),
      Row(false, null, "\"testing...\""),
      Row(false, 3.14159, "Hello, world!"),
      // in case of CSV, an empty string is the same as null - no way around it
      Row(null, null, "")
    )
    val repeatedFewTimes = (1 to 10).flatMap(_ => base)
    repeatedFewTimes
  }

  lazy val dataFrame = createDataFrame(rows, schema)

  "WriteDataFrame and ReadDataFrame" should {
    "write and read CSV file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(
            new OutputStorageTypeChoice.File()
              .setOutputFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(
                new OutputFileFormatChoice.Csv()
                  .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Tab())
                  .setNamesIncluded(true)))
      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(
            new InputStorageTypeChoice.File()
              .setSourceFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(new InputFileFormatChoice.Csv()
                .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Tab())
                .setNamesIncluded(true)
                .setShouldConvertToBoolean(true)))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }

    "write and read JSON file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(new OutputStorageTypeChoice.File()
            .setOutputFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new OutputFileFormatChoice.Json()))

      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(new InputStorageTypeChoice.File()
            .setSourceFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new InputFileFormatChoice.Json()))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }
  }
}

Source File: DataFrameReportPerformanceSpec.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import io.deepsense.commons.utils.{DoubleUtils, Logging}
import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

Source File: StatisticsForContinuousIntegSpec.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperables.dataframe.report.distribution

import java.sql.Timestamp

import org.apache.spark.rdd.RDD
import org.apache.spark.sql
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import io.deepsense.commons.datetime.DateTimeConverter
import io.deepsense.deeplang.DeeplangIntegTestSupport
import io.deepsense.deeplang.doperables.dataframe.{DataFrame, DataFrameTestFactory}
import io.deepsense.reportlib.model._

class StatisticsForContinuousIntegSpec extends DeeplangIntegTestSupport with DataFrameTestFactory {

  "Statistics (Min, max and mean values)" should {
    "be calculated for each continuous column in distribution" when {
      "data is of type int" in {
        val distribution = distributionForInt(1, 2, 3, 4, 5)
        distribution.statistics.min shouldEqual Some("1")
        distribution.statistics.max shouldEqual Some("5")
        distribution.statistics.mean shouldEqual Some("3")
      }
      "data is of type Timestamp" in {
        val distribution =
          distributionForTimestamps(new Timestamp(1000), new Timestamp(2000), new Timestamp(3000))
        distribution.statistics.min shouldEqual Some(formatDate(1000))
        distribution.statistics.max shouldEqual Some(formatDate(3000))
        distribution.statistics.mean shouldEqual Some(formatDate(2000))
      }
    }
  }
  "Null value in data" should {
    val distribution = distributionForDouble(1, 2, 3, 4, Double.NaN, 5)
    "not be skipped in calculating min and max" in {
      distribution.statistics.min shouldEqual Some("1")
      distribution.statistics.max shouldEqual Some("5")
    }
    "result in mean value NaN" in {
      distribution.statistics.mean shouldEqual Some("NaN")
    }
  }

  lazy val columnName = "column_name"

  private def distributionForDouble(data: Double*): ContinuousDistribution = {
    distributionFor(data, DoubleType)
  }

  private def distributionForInt(data: Int*): ContinuousDistribution = {
    distributionFor(data, IntegerType)
  }

  private def distributionForTimestamps(data: Timestamp*): ContinuousDistribution = {
    distributionFor(data, TimestampType)
  }

  private def distributionFor(data: Seq[Any], dataType: DataType): ContinuousDistribution = {
    val schema = StructType(Array(
      StructField(columnName, dataType)
    ))

    val rows = data.map(v => Row(v))
    val dataFrame = createDataFrame(rows, schema)

    val report = dataFrame.report
    report.content.distributions(columnName).asInstanceOf[ContinuousDistribution]
  }

  def buildDataFrame(schema: StructType, data: RDD[Row]): DataFrame = {
    val dataFrame: sql.DataFrame = sparkSQLSession.createDataFrame(data, schema)
    DataFrame.fromSparkDataFrame(dataFrame)
  }

  def formatDate(millis: Long): String = {
    DateTimeConverter.toString(DateTimeConverter.fromMillis(millis))
  }

}

Source File: ParameterConversions.scala From scruid with Apache License 2.0

5 votes

package ing.wbaa.druid.sql

import java.sql.Timestamp
import java.time.{ Instant, LocalDate, LocalDateTime }

import scala.language.implicitConversions

import ing.wbaa.druid.{ DruidConfig, SQLQueryParameter, SQLQueryParameterType }

trait ParameterConversions {
  implicit def char2Param(v: Char): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Char, v.toString)

  implicit def string2Param(v: String): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Varchar, v)

  implicit def byte2Param(v: Byte): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Tinyint, v.toString)

  implicit def short2Param(v: Short): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Smallint, v.toString)

  implicit def int2Param(v: Int): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Integer, v.toString)

  implicit def long2Param(v: Long): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Bigint, v.toString)

  implicit def float2Param(v: Float): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Float, v.toString)

  implicit def double2Param(v: Double): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Double, v.toString)

  implicit def boolean2Param(v: Boolean): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Boolean, v.toString)

  implicit def localDate2Param(v: LocalDate)(implicit config: DruidConfig =
                                               DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Date, v.format(config.FormatterDate))

  implicit def localDateTime2Param(
      v: LocalDateTime
  )(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Timestamp, v.format(config.FormatterDateTime))

  implicit def timestamp2Param(v: Timestamp)(implicit config: DruidConfig =
                                               DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Timestamp, config.FormatterDateTime.format(v.toInstant))

  implicit def instant2Param(
      v: Instant
  )(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Timestamp, config.FormatterDateTime.format(v))
}

Source File: CreateOps.scala From recogito2 with Apache License 2.0

5 votes

package services.folder.create

import java.util.{Date, UUID}
import java.sql.Timestamp
import org.jooq.DSLContext
import scala.concurrent.Future
import services.{PublicAccess, SharingLevel}
import services.folder.FolderService
import services.generated.Tables.{FOLDER, FOLDER_ASSOCIATION, SHARING_POLICY}
import services.generated.tables.records.{FolderRecord, FolderAssociationRecord, SharingPolicyRecord}

trait CreateOps { self: FolderService => 

  def createFolder(owner: String, title: String, parent: Option[UUID]): Future[FolderRecord] = 
    db.withTransaction { sql => 
      val folder = new FolderRecord(UUID.randomUUID, owner, title, optUUID(parent), null, PublicAccess.PRIVATE.toString, null)
      sql.insertInto(FOLDER).set(folder).execute()
      folder
    }

  private def insertAssociation(documentId: String, folderId: UUID, sql: DSLContext) = {
    val association = new FolderAssociationRecord(folderId, documentId)
    sql.insertInto(FOLDER_ASSOCIATION).set(association).execute()
    association
  }

  
  def moveDocumentToFolder(documentId: String, folderId: UUID) =
    db.withTransaction { sql => 
      sql.deleteFrom(FOLDER_ASSOCIATION)
         .where(FOLDER_ASSOCIATION.DOCUMENT_ID.equal(documentId))
         .execute

      insertAssociation(documentId, folderId, sql)
    }

  def moveDocumentToRoot(documentId: String) = db.withTransaction { sql => 
    sql.deleteFrom(FOLDER_ASSOCIATION)
       .where(FOLDER_ASSOCIATION.DOCUMENT_ID.equal(documentId))
       .execute == 1
  }

  def addCollaborator(folderId: UUID, sharedBy: String, sharedWith: String, level: SharingLevel) = 
    db.query { sql => 
      val existing = sql.selectFrom(SHARING_POLICY)
        .where(SHARING_POLICY.FOLDER_ID.equal(folderId)
          .and(SHARING_POLICY.SHARED_WITH.equal(sharedWith))).fetchOne 

      val policy = Option(existing) match {
        case Some(policy) =>
          policy.setSharedBy(sharedBy)
          policy.setSharedAt(new Timestamp(new Date().getTime))
          policy.setAccessLevel(level.toString)
          policy

        case None => 
          val policy = new SharingPolicyRecord(
            null, // auto-inc id
            folderId,
            null, // document_id
            sharedBy,
            sharedWith,
            new Timestamp(new Date().getTime),
            level.toString)

          policy.changed(SHARING_POLICY.ID, false)     
          sql.attach(policy)
          policy
      }
      
      policy.store() == 1
    }

}

Source File: A_1_WindowOperation.scala From wow-spark with MIT License

5 votes

package com.sev7e0.wow.structured_streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.OutputMode

object A_1_WindowOperation {

  def main(args: Array[String]): Unit = {

    if (args.length < 3) {
      println(s" Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<滑动间隔> 必须要小于或等于 <窗口间隔>")
    }

    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession.builder()
      .master("local")
      .appName(A_1_WindowOperation.getClass.getName)
      .getOrCreate()
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .load()
    import spark.implicits._

    val words = lines.as[(String, Timestamp)]
      .flatMap(line => line._1.split(" ").map(word => (word, line._2))).toDF()

    val windowCount = words.groupBy(
      window($"timestamp", windowDuration, slideDuration)
      , $"word").count().orderBy("window")

    val query = windowCount.writeStream
      .outputMode(OutputMode.Complete())
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()


  }
}

Source File: A_1_BasicOperation.scala From wow-spark with MIT License

5 votes

package com.sev7e0.wow.structured_streaming

import java.sql.Timestamp

import org.apache.spark.sql.types.{BooleanType, StringType, StructType, TimestampType}
import org.apache.spark.sql.{Dataset, SparkSession}

object A_1_BasicOperation {

  //DateTime要使用Timestamp  case类必须使用java.sql。在catalyst中作为TimestampType调用的时间戳
  case class DeviceData(device: String, deviceType: String, signal: Double, time: Timestamp)

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName(A_1_BasicOperation.getClass.getName)
      .master("local")
      .getOrCreate()
    val timeStructType = new StructType().add("device", StringType)
      .add("deviceType", StringType)
      .add("signal", BooleanType)
      .add("time", TimestampType)

    val dataFrame = spark.read.json("src/main/resources/sparkresource/device.json")
    import spark.implicits._
    val ds: Dataset[DeviceData] = dataFrame.as[DeviceData]

    //使用无类型方式查询,类sql
    dataFrame.select("device").where("signal>10").show()
    //使用有类型方式进行查询
    ds.filter(_.signal > 10).map(_.device).show()

    //使用无类型方式进行groupBy,并进行统计
    dataFrame.groupBy("deviceType").count().show()


    import org.apache.spark.sql.expressions.scalalang.typed
    //使用有类型方式进行 计算每种类型的设备的平均信号值
    ds.groupByKey(_.deviceType).agg(typed.avg(_.signal)).show()

    //也可以使用创建临时视图的形式,使用sql语句进行查询
    dataFrame.createOrReplaceTempView("device")
    spark.sql("select * from device").show()

    //可以使用isStreaming来判断是否有流数据
    println(dataFrame.isStreaming)
  }
}

Source File: SparkDataGenerator.scala From cloudflow with Apache License 2.0

5 votes

package swissknife.spark

import java.sql.Timestamp

import cloudflow.streamlets.{ IntegerConfigParameter, StreamletShape }
import cloudflow.streamlets.avro._
import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.functions._

import cloudflow.spark.sql.SQLImplicits._

import swissknife.data.Data

case class Rate(timestamp: Timestamp, value: Long)

class SparkDataGenerator extends SparkStreamlet {
  val out   = AvroOutlet[Data]("out", d ⇒ d.src)
  val shape = StreamletShape(out)

  val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to produce.", Some(1))

  override def configParameters = Vector(RecordsPerSecond)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries =
      writeStream(process, out, OutputMode.Append).toQueryExecution

    private def process: Dataset[Data] = {
      val recordsPerSecond = RecordsPerSecond.value
      session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .load()
        .select(lit("origin").as("src"), $"timestamp", lit("").as("payload"), $"value".as("count"))
        .as[Data]
    }
  }
}

Source File: CallRecordGeneratorIngress.scala From cloudflow with Apache License 2.0

5 votes

package carly.aggregator

import java.sql.Timestamp

import scala.util.Random
import scala.concurrent.duration._

import org.apache.spark.sql.{ Dataset, SparkSession }
import org.apache.spark.sql.streaming.OutputMode

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.LongType

import cloudflow.streamlets._
import cloudflow.streamlets.avro._
import cloudflow.spark.sql.SQLImplicits._
import carly.data.CallRecord
import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.log4j.{ Level, Logger }

case class Rate(timestamp: Timestamp, value: Long)

class CallRecordGeneratorIngress extends SparkStreamlet {

  val rootLogger = Logger.getRootLogger()
  rootLogger.setLevel(Level.ERROR)

  val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to process.", Some(50))

  override def configParameters = Vector(RecordsPerSecond)

  val out   = AvroOutlet[CallRecord]("out", _.user)
  val shape = StreamletShape(out)

  override def createLogic() = new SparkStreamletLogic {
    val recordsPerSecond = RecordsPerSecond.value
    override def buildStreamingQueries = {
      val outStream = DataGenerator.mkData(super.session, recordsPerSecond)
      writeStream(outStream, out, OutputMode.Append).toQueryExecution
    }
  }
}

object DataGenerator {
  def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = {
    // do we need to expose this through configuration?

    val MaxTime           = 2.hours.toMillis
    val MaxUsers          = 100000
    val TS0               = new java.sql.Timestamp(0)
    val ZeroTimestampProb = 0.05 // error rate

    // Random Data Generator
    val usersUdf     = udf(() ⇒ "user-" + Random.nextInt(MaxUsers))
    val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing")

    // Time-biased randomized filter - 1/2 hour cycles
    val sinTime: Long ⇒ Double                   = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI)
    val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob
    val timeFilterUdf                            = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng))
    val zeroTimestampUdf = udf { (ts: java.sql.Timestamp, rng: Double) ⇒
      if (rng < ZeroTimestampProb) {
        TS0
      } else {
        ts
      }
    }

    val rateStream = session.readStream
      .format("rate")
      .option("rowsPerSecond", recordsPerSecond)
      .load()
      .as[Rate]

    val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand())
    val sampledData = randomDataset
      .where(timeFilterUdf($"timestamp", $"rng"))
      .withColumn("user", usersUdf())
      .withColumn("other", usersUdf())
      .withColumn("direction", directionUdf())
      .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType))
      .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng"))
      .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp".as("timestamp"))
      .as[CallRecord]
    sampledData
  }
}

Source File: SparkRandomGenIngress.scala From cloudflow with Apache License 2.0

5 votes

package cloudflow.sparkdoc

import scala.util.Random

import cloudflow.spark._
import cloudflow.streamlets._
import cloudflow.streamlets.avro._
import cloudflow.spark.sql.SQLImplicits._

import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode
import java.sql.Timestamp

class SparkRandomGenDataIngress extends SparkStreamlet {
  val out   = AvroOutlet[Data]("out", d ⇒ d.key)
  val shape = StreamletShape(out)

  case class Rate(timestamp: Timestamp, value: Long)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries =
      writeStream(process, out, OutputMode.Append).toQueryExecution

    private def process: Dataset[Data] = {

      val recordsPerSecond = 10

      val keyGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "keyOne" else "keyTwo"

      val rateStream = session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .load()
        .as[Rate]

      rateStream.map {
        case Rate(_, value) ⇒ Data(keyGen(), value.toInt)
      }
    }
  }
}

Source File: InsertMysqlDemo.scala From spark_mysql with Apache License 2.0

5 votes

import java.sql.{Date, Timestamp}

import InsertMysqlDemo.CardMember
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import utils.MySQLUtils

/**
  * Created with IntelliJ IDEA.
  * Author: [email protected]
  * Description:DataFrame 中数据存入到MySQL
  * Date: Created in 2018-11-17 12:39
  */
object InsertMysqlDemo {

  case class CardMember(m_id: String, card_type: String, expire: Timestamp, duration: Int, is_sale: Boolean, date: Date, user: Long, salary: Float)

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName(getClass.getSimpleName).set("spark.testing.memory", "3147480000")
    val sparkContext = new SparkContext(conf)
    val hiveContext = new SQLContext(sparkContext)
    import hiveContext.implicits._
    val memberSeq = Seq(
      CardMember("member_2", "月卡", new Timestamp(System.currentTimeMillis()), 31, false, new Date(System.currentTimeMillis()), 123223, 0.32f),
      CardMember("member_1", "季卡", new Timestamp(System.currentTimeMillis()), 93, false, new Date(System.currentTimeMillis()), 124224, 0.362f)
    )
    val memberDF = memberSeq.toDF()
    MySQLUtils.saveDFtoDBCreateTableIfNotExist("member_test", memberDF)
    MySQLUtils.insertOrUpdateDFtoDBUsePool("member_test", memberDF, Array("user", "salary"))
    MySQLUtils.getDFFromMysql(hiveContext, "", null)


    sparkContext.stop()
  }
}

Source File: SchemaData.scala From pulsar-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.pulsar

import java.sql.Timestamp
import java.util
import java.util.Calendar

import scala.beans.BeanProperty
import scala.collection.JavaConverters._

object SchemaData {

  val booleanSeq = Seq(true, false, true, true, false)
  val bytesSeq = 1.to(5).map(_.toString.getBytes)

  val cal = Calendar.getInstance()
  cal.clear()
  val dateSeq = (1 to 5).map { i =>
    cal.set(2019, 0, i)
    cal.getTime
  }

  cal.clear()
  val timestampSeq = (1 to 5).map { i =>
    cal.set(2019, 0, i, 20, 35, 40)
    new Timestamp(cal.getTimeInMillis)
  }

  val stringSeq = 1.to(5).map(_.toString)
  val int8Seq = 1.to(5).map(_.toByte)
  val doubleSeq = 1.to(5).map(_.toDouble)
  val floatSeq = 1.to(5).map(_.toFloat)
  val int32Seq = 1.to(5)
  val int64Seq = 1.to(5).map(_.toLong)
  val int16Seq = 1.to(5).map(_.toShort)

  case class Foo(@BeanProperty i: Int, @BeanProperty f: Float, @BeanProperty bar: Bar)
  case class Bar(@BeanProperty b: Boolean, @BeanProperty s: String)

  case class F1(@BeanProperty baz: Baz)

  case class Baz(
      @BeanProperty f: Float,
      @BeanProperty d: Double,
      @BeanProperty mp: util.Map[String, Bar],
      @BeanProperty arr: Array[Bar])

  val fooSeq: Seq[Foo] =
    Foo(1, 1.0.toFloat, Bar(true, "a")) :: Foo(2, 2.0.toFloat, Bar(false, "b")) :: Foo(3, 0, null) :: Nil

  val f1Seq: Seq[F1] =
    F1(
      Baz(
        Float.NaN,
        Double.NaN,
        Map("1" -> Bar(true, "1"), "2" -> Bar(false, "2")).asJava,
        Array(Bar(true, "1"), Bar(true, "2")))) ::
    F1(
      Baz(
        Float.NegativeInfinity,
        Double.NegativeInfinity,
        Map("" -> Bar(true, "1")).asJava,
        null)) ::
    F1(Baz(Float.PositiveInfinity, Double.PositiveInfinity, null, null)) ::
    F1(Baz(1.0.toFloat, 2.0, null, null)) :: Nil

  val f1Results = f1Seq.map(f1 =>
    (f1.baz.f, f1.baz.d, if (f1.baz.mp == null) null else f1.baz.mp.asScala, f1.baz.arr))
}

Source File: Executor.scala From neo4j-spark-connector with Apache License 2.0

5 votes

package org.neo4j.spark

import java.time.{LocalDate, LocalDateTime, OffsetTime, ZoneOffset, ZonedDateTime}
import java.util
import java.sql.Timestamp

import org.apache.spark.SparkContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.types.StructType
import org.neo4j.spark.dataframe.CypherTypes
import org.neo4j.spark.utils.{Neo4jSessionAwareIterator, Neo4jUtils}

import scala.collection.JavaConverters._


object Executor {

  def convert(value: AnyRef): Any = value match {
    case it: util.Collection[_] => it.toArray()
    case m: java.util.Map[_,_] => m.asScala
    case _ => Neo4jUtils.convert(value)
  }

  def toJava(parameters: Map[String, Any]): java.util.Map[String, Object] = {
    parameters.mapValues(toJava).asJava
  }

  private def toJava(x: Any): AnyRef = x match {
    case y: Seq[_] => y.asJava
    case _ => x.asInstanceOf[AnyRef]
  }

  val EMPTY = Array.empty[Any]

  val EMPTY_RESULT = new CypherResult(new StructType(), Iterator.empty)

  class CypherResult(val schema: StructType, val rows: Iterator[Array[Any]]) {
    def sparkRows: Iterator[Row] = rows.map(row => new GenericRowWithSchema(row, schema))

    def fields = schema.fieldNames
  }

  def execute(sc: SparkContext, query: String, parameters: Map[String, AnyRef]): CypherResult = {
    execute(Neo4jConfig(sc.getConf), query, parameters)
  }

  private def rows(result: Iterator[_]) = {
    var i = 0
    while (result.hasNext) i = i + 1
    i
  }

  def execute(config: Neo4jConfig, query: String, parameters: Map[String, Any], write: Boolean = false): CypherResult = {
    val result = new Neo4jSessionAwareIterator(config, query, toJava(parameters), write)
    if (!result.hasNext) {
      return EMPTY_RESULT
    }
    val peek = result.peek()
    val keyCount = peek.size()
    if (keyCount == 0) {
      return new CypherResult(new StructType(), Array.fill[Array[Any]](rows(result))(EMPTY).toIterator)
    }
    val keys = peek.keys().asScala
    val fields = keys.map(k => (k, peek.get(k).`type`())).map(keyType => CypherTypes.field(keyType))
    val schema = StructType(fields)
    val it = result.map(record => {
      val row = new Array[Any](keyCount)
      var i = 0
      while (i < keyCount) {
        val value = convert(record.get(i).asObject())
        row.update(i, value)
        i = i + 1
      }
      row
    })
    new CypherResult(schema, it)
  }
}

Source File: Neo4jUtils.scala From neo4j-spark-connector with Apache License 2.0

5 votes

package org.neo4j.spark.utils
import java.sql.Timestamp
import java.time._
import java.util.concurrent.Callable
import java.util.function

import io.github.resilience4j.retry.{Retry, RetryConfig}
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.neo4j.driver.exceptions.{ServiceUnavailableException, SessionExpiredException, TransientException}
import org.neo4j.driver.{Driver, Result, Session, Transaction}
import org.neo4j.spark.Neo4jConfig
import org.slf4j.LoggerFactory

class Neo4jUtils

object Neo4jUtils {

  private val logger = LoggerFactory.getLogger(classOf[Neo4jUtils])

  def close(driver: Driver, session: Session): Unit = {
    try {
      if (session != null && session.isOpen) {
        closeSafety(session)
      }
    } finally {
      if (driver != null) {
        closeSafety(driver)
      }
    }
  }

  private def closeSafety(closable: AutoCloseable): Unit = {
    try {
      closable.close()
    } catch {
      case e: Throwable => {
        logger.error("Exception while trying to close an AutoCloseable, because of the following exception", e)
      }
    }
  }

  private val retryConfig = RetryConfig.custom.retryExceptions(
      classOf[SessionExpiredException], classOf[ServiceUnavailableException] // retry on the same exceptions the driver does [1]
    )
    .retryOnException(new function.Predicate[Throwable] {
      override def test(exception: Throwable): Boolean = exception match {
        case t: TransientException => {
          val code = t.code()
          !("Neo.TransientError.Transaction.Terminated" == code) && !("Neo.TransientError.Transaction.LockClientStopped" == code)
        }
        case _ => false
      }
    })
    .maxAttempts(3)
    .build

  def executeTxWithRetries[T](neo4jConfig: Neo4jConfig,
                              query: String,
                              params: java.util.Map[String, AnyRef],
                              write: Boolean): (Driver, Session, Transaction, Result) = {
    val driver: Driver = neo4jConfig.driver()
    val session: Session = driver.session(neo4jConfig.sessionConfig(write))
    Retry.decorateCallable(
        Retry.of("neo4jTransactionRetryPool", retryConfig),
        new Callable[(Driver, Session, Transaction, Result)] {
          override def call(): (Driver, Session, Transaction, Result) = {
            val transaction = session.beginTransaction()
            val result = transaction.run(query, params)
            (driver, session, transaction, result)
          }
        }
      )
      .call()
  }

  def convert(value: AnyRef): AnyRef = value match {
    case m: ZonedDateTime => new Timestamp(DateTimeUtils.fromUTCTime(m.toInstant.toEpochMilli, m.getZone.getId))
    case m: LocalDateTime => new Timestamp(DateTimeUtils.fromUTCTime(m.toInstant(ZoneOffset.UTC).toEpochMilli,"UTC"))
    case m: LocalDate => java.sql.Date.valueOf(m)
    case m: OffsetTime => new Timestamp(m.atDate(LocalDate.ofEpochDay(0)).toInstant.toEpochMilli)
    case _ => value
  }

}

Source File: AnnouncementService.scala From recogito2 with Apache License 2.0

5 votes

package services.announcement

import java.sql.Timestamp
import java.util.Date
import javax.inject.{Inject, Singleton}
import scala.concurrent.{ExecutionContext, Future}
import services.BaseService
import services.generated.Tables.SERVICE_ANNOUNCEMENT
import services.generated.tables.records.ServiceAnnouncementRecord
import storage.db.DB
import services.user.UserService
import java.util.UUID

@Singleton
class AnnouncementService @Inject() (val db: DB, users: UserService, implicit val ctx: ExecutionContext) extends BaseService {
  
  def findLatestUnread(username: String): Future[Option[ServiceAnnouncementRecord]] = db.query { sql =>
    Option(sql.selectFrom(SERVICE_ANNOUNCEMENT)
              .where(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username)
                .and(SERVICE_ANNOUNCEMENT.RESPONSE.isNull))
              .orderBy(SERVICE_ANNOUNCEMENT.CREATED_AT.desc())
              .fetchOne())
  }
  
  def confirm(uuid: UUID, username: String, response: String): Future[Boolean] = db.query { sql =>
    val result =
      sql.update(SERVICE_ANNOUNCEMENT)
         .set(SERVICE_ANNOUNCEMENT.VIEWED_AT, new Timestamp(new Date().getTime))
         .set(SERVICE_ANNOUNCEMENT.RESPONSE, response)
         .where(SERVICE_ANNOUNCEMENT.ID.equal(uuid).and(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username)))
         .execute()
         
    result == 1
  }
  
  def clearAll(): Future[Boolean] = db.query { sql =>
    sql.deleteFrom(SERVICE_ANNOUNCEMENT).execute()
    true
  } recover { case t: Throwable =>
    t.printStackTrace()
    false
  }
  
  def deleteForUser(username: String) = db.query { sql =>
    sql.deleteFrom(SERVICE_ANNOUNCEMENT)
       .where(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username)).execute()
  }
  
  def insertBroadcastAnnouncement(content: String): Future[Boolean] =  {
    val BATCH_SIZE = 200
    
    def insertOneBatch(users: Seq[String]): Future[_] = db.query { sql =>
      sql.batch(users.map { user =>
        sql.insertInto(SERVICE_ANNOUNCEMENT,
          SERVICE_ANNOUNCEMENT.ID, 
          SERVICE_ANNOUNCEMENT.FOR_USER,
          SERVICE_ANNOUNCEMENT.CONTENT,
          SERVICE_ANNOUNCEMENT.CREATED_AT,
          SERVICE_ANNOUNCEMENT.VIEWED_AT,
          SERVICE_ANNOUNCEMENT.RESPONSE
        ).values(
          UUID.randomUUID(),
          user,
          content,
          new Timestamp(new Date().getTime),
          null,
          null)
      }:_*).execute()
    }
    
    def insertBatchesRecursive(offset: Int, numUsers: Int): Future[Boolean] =
      users.listUsers(offset, BATCH_SIZE, None, None).flatMap { users =>
        insertOneBatch(users.items.map(_._1.getUsername))
      } flatMap { _ =>
        if (offset + BATCH_SIZE >= numUsers)
          Future.successful(true)
        else
          insertBatchesRecursive(offset + BATCH_SIZE, numUsers)
      }
      
    val f = for {
      numUsers <- users.countUsers()
      success <- insertBatchesRecursive(0, numUsers)
    } yield (success)
    
    f.recover { case t: Throwable =>
      play.api.Logger.info(t.getMessage)
      t.printStackTrace()
      false
    }
  }
  
}

Source File: MimirUDF.scala From mimir with Apache License 2.0

5 votes

package mimir.exec.spark.udf

import java.sql.{ Timestamp, Date }

import org.apache.spark.sql.types.{ DataType, StructType, StructField }

import mimir.algebra._
import mimir.exec.spark._
import mimir.util.SparkUtils

class MimirUDF {
  def getPrimitive(t:Type, value:Any) = value match {
    case null => NullPrimitive()
    case _ => t match {
      //case TInt() => IntPrimitive(value.asInstanceOf[Long])
      case TInt() => IntPrimitive(value.asInstanceOf[Long])
      case TFloat() => FloatPrimitive(value.asInstanceOf[Double])
      case TDate() => SparkUtils.convertDate(value.asInstanceOf[Date])
      case TTimestamp() => SparkUtils.convertTimestamp(value.asInstanceOf[Timestamp])
      case TString() => StringPrimitive(value.asInstanceOf[String])
      case TBool() => BoolPrimitive(value.asInstanceOf[Boolean])
      case TRowId() => RowIdPrimitive(value.asInstanceOf[String])
      case TType() => TypePrimitive(Type.fromString(value.asInstanceOf[String]))
      //case TAny() => NullPrimitive()
      //case TUser(name) => name.toLowerCase
      //case TInterval() => Primitive(value.asInstanceOf[Long])
      case _ => StringPrimitive(value.asInstanceOf[String])
    }
  }
  def getNative(primitive : PrimitiveValue) : AnyRef = 
    primitive match {
      case NullPrimitive() => null
      case RowIdPrimitive(s) => s
      case StringPrimitive(s) => s
      case IntPrimitive(i) => new java.lang.Long(i)
      case FloatPrimitive(f) => new java.lang.Double(f)
      case BoolPrimitive(b) => new java.lang.Boolean(b)
      case ts@TimestampPrimitive(y,m,d,h,mm,s,ms) => SparkUtils.convertTimestamp(ts)
      case dt@DatePrimitive(y,m,d) => SparkUtils.convertDate(dt)
      case x =>  x.asString
    }
  def getStructType(datatypes:Seq[DataType]): StructType = {
    StructType(datatypes.map(dti => StructField("", RAToSpark.getInternalSparkType(dti), true)))
  }
}

Source File: TaskRecordAggregate.scala From recogito2 with Apache License 2.0

5 votes

package services.task

import java.sql.Timestamp
import java.util.UUID
import services.generated.tables.records.TaskRecord
import play.api.libs.json._
import play.api.libs.json.Reads._
import play.api.libs.functional.syntax._

case class TaskRecordAggregate(taskRecords: Seq[TaskRecord]) {
  
  private def getDistinctField[T](filter: TaskRecord => T, errorMessage: String): T = {
    val fields = taskRecords.map(filter).distinct
    if (fields.size != 1)
      throw new RuntimeException("Invalid task record aggregation: " + errorMessage + " (" + fields.mkString(", ") + ")")
    fields.head
  }

  lazy val taskType = TaskType(getDistinctField[String](_.getTaskType, "different task types"))

  lazy val className = getDistinctField[String](_.getClassName, "different class names")
  
  lazy val documentId = getDistinctField[String](_.getDocumentId, "different document IDs")
  
  lazy val spawnedBy = getDistinctField[String](_.getSpawnedBy, "different values for spawned_by")
  
  lazy val spawnedAt = taskRecords.sortBy(_.getSpawnedAt.getTime).head.getSpawnedAt
  
  lazy val stoppedAt = {
    val stoppedAtByTask = taskRecords.map(task => Option(task.getStoppedAt))
    if (stoppedAtByTask.exists(_.isEmpty))
      // At least one sub-task is unfinished - report aggregate task as unfinished 
      None
    else
      /// All stopped - use latest stop time
      Some(stoppedAtByTask.flatten.sortBy(_.getTime).reverse.head)
  }
  
  lazy val stoppedWith =
    taskRecords.flatMap(task => Option(task.getStoppedWith))

  lazy val status =
    taskRecords.map(task => TaskStatus.withName(task.getStatus)) match {
    
      case statusByTask if statusByTask.exists(_ == TaskStatus.FAILED) =>
        // Any task that failed?
        TaskStatus.FAILED
        
      case statusByTask if statusByTask.forall(_ == TaskStatus.COMPLETED) =>
        // All complete?
        TaskStatus.COMPLETED
        
      case statusByTask if statusByTask.forall(_ == TaskStatus.PENDING) =>
        // All pending?
        TaskStatus.PENDING
        
      case _ => TaskStatus.RUNNING
        
    }
  
  lazy val progress =
    taskRecords.map(_.getProgress.toInt).sum / taskRecords.size
  
}

object TaskRecordAggregate {
  
  implicit val taskRecordWrites: Writes[TaskRecord] = (
    (JsPath \ "task_type").write[String] and
    (JsPath \ "filepart_id").write[UUID] and
    (JsPath \ "status").write[String] and
    (JsPath \ "progress").write[Int]
  )(r => (
     r.getTaskType,
     r.getFilepartId,
     r.getStatus,
     r.getProgress
  ))
  
  implicit val aggregateTaskRecordWrites: Writes[TaskRecordAggregate] = (
    (JsPath \ "document_id").write[String] and
    (JsPath \ "status").write[String] and
    (JsPath \ "progress").write[Int] and
    (JsPath \ "subtasks").write[Seq[TaskRecord]]
  )(r => (
      r.documentId,
      r.status.toString,
      r.progress,
      r.taskRecords
  ))  
  
}

Source File: NetworkOps.scala From recogito2 with Apache License 2.0

5 votes

package services.document.network

import java.sql.Timestamp
import scala.concurrent.{ExecutionContext, Future}
import services.document.DocumentService

trait NetworkOps { self: DocumentService => 

  
  def getNetwork(docId: String)(implicit ctx: ExecutionContext): Future[Option[AncestryTree]] = {
    val f = for {
      maybeRoot <- getNetworkRoot(docId)
      descendants <- maybeRoot.map(rootNode => getDescendants(rootNode.id))
                       .getOrElse(Future.successful(Seq.empty[TreeRecord]))
    } yield (maybeRoot, descendants)

    f.map { case (maybeRoot, descendants) => 
      maybeRoot.map(rootNode => AncestryTree(rootNode, descendants))
    }
  }

}

Source File: AncestryTree.scala From recogito2 with Apache License 2.0

5 votes

package services.document.network

import java.sql.Timestamp


case class AncestryTree(private val root: TreeRecord, private[network] val descendants: Seq[TreeRecord]) {

  val rootNode = AncestryTreeNode(
    root.id, 
    root.owner, 
    root.clonedFrom, root.clonedAt, // Should ALWAYS be None
    this)

}

case class AncestryTreeNode(
  id: String, 
  owner: String, 
  clonedFrom: Option[String], 
  clonedAt: Option[Timestamp], 
  private val tree: AncestryTree
) {

  lazy val children: Seq[AncestryTreeNode] = 
    tree.descendants
      .filter(_.clonedFrom  == Some(id))
      .map(r => AncestryTreeNode(r.id, r.owner, r.clonedFrom, r.clonedAt, tree))
      
}

Source File: PublicAccountInfo.scala From recogito2 with Apache License 2.0

5 votes

package controllers.my.account

import java.sql.Timestamp
import org.joda.time.DateTime
import play.api.libs.json._
import play.api.libs.functional.syntax._
import services.HasDate
import services.contribution.stats.ContributorActivity
import services.document.read.AccessibleDocumentsCount
import services.user.User


case class PublicAccountInfo(
  user: User, 
  accessibleDocuments: AccessibleDocumentsCount,
  stats: ContributorActivity)

object PublicAccountInfo extends HasDate {

  implicit val accessibleDocumentsWrites: Writes[AccessibleDocumentsCount] = (
    (JsPath \ "public").write[Long] and
    (JsPath \ "shared_with_me").writeNullable[Long]
  )(d => (d.public, d.shared))  

  implicit val visitedAccountInfoWrites: Writes[PublicAccountInfo] = (
    (JsPath \ "username").write[String] and
    (JsPath \ "real_name").writeNullable[String] and
    (JsPath \ "member_since").write[DateTime] and
    (JsPath \ "bio").writeNullable[String] and
    (JsPath \ "website").writeNullable[String] and
    (JsPath \ "documents").write[AccessibleDocumentsCount] and
    (JsPath \ "stats").write[ContributorActivity]
  )(v => (
      v.user.username,
      v.user.realName,
      new DateTime(v.user.memberSince.getTime),
      v.user.bio,
      v.user.website,
      v.accessibleDocuments,
      v.stats
  ))  
  
}

Source File: PrivateAccountInfo.scala From recogito2 with Apache License 2.0

5 votes

package controllers.my.account

import java.sql.Timestamp
import org.joda.time.DateTime
import play.api.libs.json._
import play.api.libs.functional.syntax._
import services.{HasDate, HasNullableSeq}
import services.contribution.stats.ContributorActivity
import services.user.User


case class PrivateAccountInfo(
  user: User, 
  myDocumentsCount: Long, 
  sharedWithMeCount: Long,
  stats: ContributorActivity,
  usedMb: Double)

object PrivateAccountInfo extends HasDate with HasNullableSeq {

  implicit val personalAccountInfoWrites: Writes[PrivateAccountInfo] = (
    (JsPath \ "username").write[String] and
    (JsPath \ "real_name").writeNullable[String] and
    (JsPath \ "member_since").write[DateTime] and
    (JsPath \ "bio").writeNullable[String] and
    (JsPath \ "website").writeNullable[String] and
    (JsPath \ "feature_toggles").writeNullable[Seq[String]] and
    (JsPath \ "documents").write[JsObject] and
    (JsPath \ "storage").write[JsObject] and
    (JsPath \ "stats").write[ContributorActivity]
  )(p => (
      p.user.username,
      p.user.realName,
      new DateTime(p.user.memberSince.getTime),
      p.user.bio,
      p.user.website,
      toOptSeq(p.user.featureToggles),
      Json.obj(
        "my_documents" -> p.myDocumentsCount,
        "shared_with_me" -> p.sharedWithMeCount
      ),
      Json.obj(
        "quota_mb" -> p.user.quotaMb.toInt,
        "used_mb" -> p.usedMb
      ),
      p.stats
  ))

}

Source File: TwitterBatchTimely.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet.timeseries

import java.sql.Timestamp

import com.cloudera.sparkts.{DateTimeIndex, TimeSeriesRDD}
import io.gzet.timeseries.timely.MetricImplicits._
import io.gzet.timeseries.timely.TimelyImplicits._
import io.gzet.timeseries.twitter.Twitter._
import io.gzet.utils.spark.accumulo.AccumuloConfig
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.joda.time.{DateTime, Minutes, Period}

object TwitterBatchTimely extends SimpleConfig {

  case class Observation(
                          hashtag: String,
                          time: Timestamp,
                          count: Double
                        )

  def main(args: Array[String]) = {

    val sparkConf = new SparkConf().setAppName("Twitter Extractor")
    val sc = new SparkContext(sparkConf)
    val sqlContext = new SQLContext(sc)
    import sqlContext.implicits._

    val twitterJsonRDD = sc.textFile("file:///Users/antoine/CHAPTER/twitter-trump", 500)
    val tweetRDD = twitterJsonRDD mapPartitions analyzeJson cache()

    // Publish metrics to Timely
    tweetRDD.count()
    tweetRDD.countByState.publish()
    tweetRDD.sentimentByState.publish()

    // Read metrics from Timely
    val conf = AccumuloConfig("GZET", "alice", "alice", "localhost:2181")
    val metricsRDD = sc.timely(conf, Some("io.gzet.count"))

    val minDate = metricsRDD.map(_.time).min()
    val maxDate = metricsRDD.map(_.time).max()

    class TwitterFrequency(val minutes: Int) extends com.cloudera.sparkts.PeriodFrequency(Period.minutes(minutes)) {
      def difference(dt1: DateTime, dt2: DateTime): Int = Minutes.minutesBetween(dt1, dt2).getMinutes / minutes
      override def toString: String = s"minutes $minutes"
    }

    val dtIndex = DateTimeIndex.uniform(minDate, maxDate, new TwitterFrequency(1))

    val metricsDF = metricsRDD.filter({
      metric =>
        metric.tags.keys.toSet.contains("tag")
    }).flatMap({
      metric =>
        metric.tags map {
          case (k, v) =>
            ((v, roundFloorMinute(metric.time, 1)), metric.value)
        }
    }).reduceByKey(_+_).map({
      case ((metric, time), sentiment) =>
        Observation(metric, new Timestamp(time), sentiment)
    }).toDF()

    val tsRDD = TimeSeriesRDD.timeSeriesRDDFromObservations(dtIndex, metricsDF, "time", "hashtag", "count").filter(_._2.toArray.exists(!_.isNaN))

  }

  def roundFloorMinute(time: Long, windowMinutes: Int) = {
    val dt = new DateTime(time)
    dt.withMinuteOfHour((dt.getMinuteOfHour / windowMinutes) * windowMinutes).minuteOfDay().roundFloorCopy().toDate.getTime
  }

}

Source File: FieldDateTime.scala From spark-gdb with Apache License 2.0

5 votes

package com.esri.gdb

import java.nio.ByteBuffer
import java.sql.Timestamp

import org.apache.spark.sql.types.{Metadata, TimestampType}


class FieldDateTime(name: String, nullValueAllowed: Boolean, metadata:Metadata)
  extends Field(name, TimestampType, nullValueAllowed, metadata) {

  override def readValue(byteBuffer: ByteBuffer, oid: Int) = {
    val numDays = byteBuffer.getDouble
    // convert days since 12/30/1899 to 1/1/1970
    val unixDays = numDays - 25569
    val millis = (unixDays * 1000 * 60 * 60 * 24).ceil.toLong
    new Timestamp(millis)
  }
}

Source File: package.scala From modelmatrix with Apache License 2.0

5 votes

package com.collective.modelmatrix

import java.sql.Timestamp
import java.time.Instant

import org.apache.spark.sql.types._
import scodec.bits.ByteVector
import slick.driver.PostgresDriver.api._

package object catalog {

  implicit val instantColumnType =
    MappedColumnType.base[Instant, java.sql.Timestamp](
      instant => Timestamp.from(instant),
      _.toInstant
    )

  implicit val dataTypeColumnType =
    MappedColumnType.base[DataType, String]({
      case ShortType => "short"
      case IntegerType => "integer"
      case LongType => "long"
      case DoubleType => "double"
      case StringType => "string"
    }, {
      case "short" => ShortType
      case "integer" => IntegerType
      case "long" => LongType
      case "double" => DoubleType
      case "string" => StringType
    })

  implicit val byteVectorColumnType =
    MappedColumnType.base[ByteVector, Array[Byte]](
      _.toArray,
      ByteVector.apply
    )
}

Source File: Utils.scala From lemon-schedule with GNU General Public License v2.0

5 votes

package com.gabry.job.utils

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.collection.mutable.ArrayBuffer


  def getLoadedClass(classLoader: ClassLoader):Array[Class[_]] = {
    val loadedClass = ArrayBuffer.empty[Class[_]]

    val loadedClassEnum = classesField.get(classLoader).asInstanceOf[java.util.Vector[Class[_]]].elements()

    while(loadedClassEnum.hasMoreElements){
      val nextElement = loadedClassEnum.nextElement()
      loadedClass.append(nextElement)
    }
    loadedClass.toArray
  }
}

Source File: Utils.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.datasources

import java.sql.{Date, Timestamp}

import org.apache.hadoop.hbase.spark.AvroSerdes
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.yetus.audience.InterfaceAudience;

@InterfaceAudience.Private
object Utils {

  
  def hbaseFieldToScalaType(
      f: Field,
      src: Array[Byte],
      offset: Int,
      length: Int): Any = {
    if (f.exeSchema.isDefined) {
      // If we have avro schema defined, use it to get record, and then convert them to catalyst data type
      val m = AvroSerdes.deserialize(src, f.exeSchema.get)
      val n = f.avroToCatalyst.map(_(m))
      n.get
    } else  {
      // Fall back to atomic type
      f.dt match {
        case BooleanType => src(offset) != 0
        case ByteType => src(offset)
        case ShortType => Bytes.toShort(src, offset)
        case IntegerType => Bytes.toInt(src, offset)
        case LongType => Bytes.toLong(src, offset)
        case FloatType => Bytes.toFloat(src, offset)
        case DoubleType => Bytes.toDouble(src, offset)
        case DateType => new Date(Bytes.toLong(src, offset))
        case TimestampType => new Timestamp(Bytes.toLong(src, offset))
        case StringType => UTF8String.fromBytes(src, offset, length)
        case BinaryType =>
          val newArray = new Array[Byte](length)
          System.arraycopy(src, offset, newArray, 0, length)
          newArray
        // TODO: SparkSqlSerializer.deserialize[Any](src)
        case _ => throw new Exception(s"unsupported data type ${f.dt}")
      }
    }
  }

  // convert input to data type
  def toBytes(input: Any, field: Field): Array[Byte] = {
    if (field.schema.isDefined) {
      // Here we assume the top level type is structType
      val record = field.catalystToAvro(input)
      AvroSerdes.serialize(record, field.schema.get)
    } else {
      field.dt match {
        case BooleanType => Bytes.toBytes(input.asInstanceOf[Boolean])
        case ByteType => Array(input.asInstanceOf[Number].byteValue)
        case ShortType => Bytes.toBytes(input.asInstanceOf[Number].shortValue)
        case IntegerType => Bytes.toBytes(input.asInstanceOf[Number].intValue)
        case LongType => Bytes.toBytes(input.asInstanceOf[Number].longValue)
        case FloatType => Bytes.toBytes(input.asInstanceOf[Number].floatValue)
        case DoubleType => Bytes.toBytes(input.asInstanceOf[Number].doubleValue)
        case DateType | TimestampType => Bytes.toBytes(input.asInstanceOf[java.util.Date].getTime)
        case StringType => Bytes.toBytes(input.toString)
        case BinaryType => input.asInstanceOf[Array[Byte]]
        case _ => throw new Exception(s"unsupported data type ${field.dt}")
      }
    }
  }
}

Source File: RDBDataTypeConverter.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.connector.jdbc.datatype

import java.sql.{Date, ResultSet, Time, Timestamp}
import java.util.Optional

import oharastream.ohara.client.configurator.InspectApi.RdbColumn
import oharastream.ohara.connector.jdbc.util.DateTimeUtils

trait RDBDataTypeConverter {
  
  def converterValue(resultSet: ResultSet, column: RdbColumn): Any = {
    val columnName             = column.name
    val typeName               = column.dataType.toUpperCase
    val dataType: DataTypeEnum = converterDataType(column)
    dataType match {
      case DataTypeEnum.INTEGER =>
        java.lang.Integer.valueOf(resultSet.getInt(columnName))
      case DataTypeEnum.LONG =>
        java.lang.Long.valueOf(resultSet.getLong(columnName))
      case DataTypeEnum.BOOLEAN =>
        java.lang.Boolean.valueOf(resultSet.getBoolean(columnName))
      case DataTypeEnum.FLOAT =>
        java.lang.Float.valueOf(resultSet.getFloat(columnName))
      case DataTypeEnum.DOUBLE =>
        java.lang.Double.valueOf(resultSet.getDouble(columnName))
      case DataTypeEnum.BIGDECIMAL =>
        Optional.ofNullable(resultSet.getBigDecimal(columnName)).orElseGet(() => new java.math.BigDecimal(0L))
      case DataTypeEnum.STRING =>
        Optional.ofNullable(resultSet.getString(columnName)).orElseGet(() => "null")
      case DataTypeEnum.DATE =>
        Optional.ofNullable(resultSet.getDate(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Date(0))
      case DataTypeEnum.TIME =>
        Optional.ofNullable(resultSet.getTime(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Time(0))
      case DataTypeEnum.TIMESTAMP =>
        Optional
          .ofNullable(resultSet.getTimestamp(columnName, DateTimeUtils.CALENDAR))
          .orElseGet(() => new Timestamp(0))
      case DataTypeEnum.BYTES =>
        Optional.ofNullable(resultSet.getBytes(columnName)).orElseGet(() => Array())
      case _ =>
        throw new UnsupportedOperationException(
          s"JDBC Source Connector not support ${typeName} data type in ${columnName} column for ${dataBaseProductName} implement."
        )
    }
  }
  protected[datatype] def dataBaseProductName: String

  protected[datatype] def converterDataType(column: RdbColumn): DataTypeEnum
}

Source File: SnowflakeWriter.scala From spark-snowflake with Apache License 2.0

5 votes

package net.snowflake.spark.snowflake

import java.sql.{Date, Timestamp}

import net.snowflake.client.jdbc.internal.apache.commons.codec.binary.Base64
import net.snowflake.spark.snowflake.Parameters.MergedParameters
import net.snowflake.spark.snowflake.io.SupportedFormat
import net.snowflake.spark.snowflake.io.SupportedFormat.SupportedFormat
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql._


  private def removeUselessColumns(dataFrame: DataFrame,
                                   params: MergedParameters): DataFrame =
    params.columnMap match {
      case Some(map) =>
        // Enclose column name with backtick(`) if dot(.) exists in column name
        val names = map.keys.toSeq.map(name =>
          if (name.contains(".")) {
            s"`$name`"
          } else {
            name
          })
        try {
          dataFrame.select(names.head, names.tail: _*)
        } catch {
          case e: AnalysisException =>
            throw new IllegalArgumentException(
              "Incorrect column name when column mapping: " + e.toString
            )
        }
      case _ => dataFrame
    }

  // Prepare a set of conversion functions, based on the schema
  def genConversionFunctions(schema: StructType): Array[Any => Any] =
    schema.fields.map { field =>
      field.dataType match {
        case DateType =>
          (v: Any) =>
            v match {
              case null => ""
              case t: Timestamp => Conversions.formatTimestamp(t)
              case d: Date => Conversions.formatDate(d)
            }
        case TimestampType =>
          (v: Any) =>
            {
              if (v == null) ""
              else Conversions.formatTimestamp(v.asInstanceOf[Timestamp])
            }
        case StringType =>
          (v: Any) =>
            {
              if (v == null) ""
              else Conversions.formatString(v.asInstanceOf[String])
            }
        case BinaryType =>
          (v: Any) =>
            v match {
              case null => ""
              case bytes: Array[Byte] => Base64.encodeBase64String(bytes)
            }
        case _ =>
          (v: Any) =>
            Conversions.formatAny(v)
      }
    }
}

object DefaultSnowflakeWriter extends SnowflakeWriter(DefaultJDBCWrapper)

Source File: CustomMatchers.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.extractor.services

import java.sql.Timestamp

import org.scalatest._
import matchers._

import scala.concurrent.duration._

trait CustomMatchers {
  class SqlTimestampMoreOrLessEquals(expected: Timestamp, tolerance: Duration)
      extends Matcher[Timestamp]
      with Matchers {
    def apply(left: Timestamp) = {
      MatchResult(
        left.getTime === (expected.getTime +- tolerance.toMillis),
        s"""Timestamp $left was not within ${tolerance} to "$expected"""",
        s"""Timestamp $left was within ${tolerance} to "$expected"""",
      )
    }
  }

  def beWithin5Minutes(expected: Timestamp) = new SqlTimestampMoreOrLessEquals(expected, 5.minutes)

  def beWithin(duration: Duration)(expected: Timestamp) =
    new SqlTimestampMoreOrLessEquals(expected, duration)

  def beWithinSeconds(seconds: Long)(expected: Timestamp) =
    new SqlTimestampMoreOrLessEquals(expected, seconds.seconds)

  def beWithinMillis(millis: Long)(expected: Timestamp) =
    new SqlTimestampMoreOrLessEquals(expected, millis.millis)
}

object CustomMatchers extends CustomMatchers

Source File: Transaction.scala From Scala-Programming-Projects with MIT License

5 votes

package coinyser

import java.sql.{Date, Timestamp}
import java.time.ZoneOffset

case class Transaction(timestamp: Timestamp,
                       date: Date,
                       tid: Int,
                       price: Double,
                       sell: Boolean,
                       amount: Double)


object Transaction {
  def apply(timestamp: Timestamp,
            tid: Int,
            price: Double,
            sell: Boolean,
            amount: Double) =
    new Transaction(
      timestamp = timestamp,
      date = Date.valueOf(
        timestamp.toInstant.atOffset(ZoneOffset.UTC).toLocalDate),
      tid = tid,
      price = price,
      sell = sell,
      amount = amount)
}

Source File: StreamingProducerSpec.scala From Scala-Programming-Projects with MIT License

5 votes

package coinyser

import java.sql.Timestamp

import coinyser.StreamingProducerSpec._
import org.scalactic.TypeCheckedTripleEquals
import org.scalatest.{Matchers, WordSpec}

class StreamingProducerSpec extends WordSpec with Matchers with TypeCheckedTripleEquals {

  "StreamingProducer.deserializeWebsocketTransaction" should {
    "deserialize a valid String to a WebsocketTransaction" in {
      val str =
        """{"amount": 0.045318270000000001, "buy_order_id": 1969499130,
          |"sell_order_id": 1969495276, "amount_str": "0.04531827",
          |"price_str": "6339.73", "timestamp": "1533797395",
          |"price": 6339.7299999999996, "type": 0, "id": 71826763}""".stripMargin
      StreamingProducer.deserializeWebsocketTransaction(str) should
        ===(SampleWebsocketTransaction)
    }
  }

  "StreamingProducer.convertWsTransaction" should {
    "convert a WebSocketTransaction to a Transaction" in {
      StreamingProducer.convertWsTransaction(SampleWebsocketTransaction) should
        ===(SampleTransaction)
    }
  }

  "StreamingProducer.serializeTransaction" should {
    "serialize a Transaction to a String" in {
      StreamingProducer.serializeTransaction(SampleTransaction) should
        ===(SampleJsonTransaction)
    }
  }

  "StreamingProducer.subscribe" should {
    "register a callback that receives live trades" in {
      val pusher = new FakePusher(Vector("a", "b", "c"))
      var receivedTrades = Vector.empty[String]
      val io = StreamingProducer.subscribe(pusher) { trade => receivedTrades = receivedTrades :+ trade }
      io.unsafeRunSync()
      receivedTrades should ===(Vector("a", "b", "c"))
    }
  }
}

object StreamingProducerSpec {
  val SampleWebsocketTransaction = WebsocketTransaction(
    amount = 0.04531827, buy_order_id = 1969499130, sell_order_id = 1969495276,
    amount_str = "0.04531827", price_str = "6339.73", timestamp = "1533797395",
    price = 6339.73, `type` = 0, id = 71826763)

  val SampleTransaction = Transaction(
    timestamp = new Timestamp(1533797395000L), tid = 71826763,
    price = 6339.73, sell = false, amount = 0.04531827)

  val SampleJsonTransaction =
    """{"timestamp":"2018-08-09 06:49:55",
      |"date":"2018-08-09","tid":71826763,"price":6339.73,"sell":false,
      |"amount":0.04531827}""".stripMargin

}

Source File: BatchProducerSpec.scala From Scala-Programming-Projects with MIT License

5 votes

package coinyser

import java.io.{BufferedOutputStream, StringReader}
import java.nio.CharBuffer
import java.sql.Timestamp

import cats.effect.IO
import org.apache.spark.sql._
import org.apache.spark.sql.test.SharedSparkSession
import org.scalatest.{Matchers, WordSpec}


class BatchProducerSpec extends WordSpec with Matchers with SharedSparkSession {

  val httpTransaction1 = HttpTransaction("1532365695", "70683282", "7740.00", "0", "0.10041719")
  val httpTransaction2 = HttpTransaction("1532365693", "70683281", "7739.99", "0", "0.00148564")

  "BatchProducer.jsonToHttpTransaction" should {
    "create a Dataset[HttpTransaction] from a Json string" in {
      val json =
        """[{"date": "1532365695", "tid": "70683282", "price": "7740.00", "type": "0", "amount": "0.10041719"},
          |{"date": "1532365693", "tid": "70683281", "price": "7739.99", "type": "0", "amount": "0.00148564"}]""".stripMargin

      val ds: Dataset[HttpTransaction] = BatchProducer.jsonToHttpTransactions(json)
      ds.collect() should contain theSameElementsAs Seq(httpTransaction1, httpTransaction2)
    }
  }

  "BatchProducer.httpToDomainTransactions" should {
    "transform a Dataset[HttpTransaction] into a Dataset[Transaction]" in {
      import testImplicits._
      val source: Dataset[HttpTransaction] = Seq(httpTransaction1, httpTransaction2).toDS()
      val target: Dataset[Transaction] = BatchProducer.httpToDomainTransactions(source)
      val transaction1 = Transaction(timestamp = new Timestamp(1532365695000L), tid = 70683282, price = 7740.00, sell = false, amount = 0.10041719)
      val transaction2 = Transaction(timestamp = new Timestamp(1532365693000L), tid = 70683281, price = 7739.99, sell = false, amount = 0.00148564)

      target.collect() should contain theSameElementsAs Seq(transaction1, transaction2)
    }
  }

}

Source File: BatchProducerIT.scala From Scala-Programming-Projects with MIT License

5 votes

package coinyser

import java.sql.Timestamp
import java.time.Instant
import java.util.concurrent.TimeUnit

import cats.effect.{IO, Timer}
import org.apache.spark.sql.test.SharedSparkSession
import org.scalatest.{Matchers, WordSpec}

import scala.concurrent.duration.FiniteDuration


class BatchProducerIT extends WordSpec with Matchers with SharedSparkSession {

  import testImplicits._

  "BatchProducer.save" should {
    "save a Dataset[Transaction] to parquet" in withTempDir { tmpDir =>
      val transaction1 = Transaction(timestamp = new Timestamp(1532365695000L), tid = 70683282, price = 7740.00, sell = false, amount = 0.10041719)
      val transaction2 = Transaction(timestamp = new Timestamp(1532365693000L), tid = 70683281, price = 7739.99, sell = false, amount = 0.00148564)
      val sourceDS = Seq(transaction1, transaction2).toDS()

      val uri = tmpDir.toURI
      BatchProducer.save(sourceDS, uri).unsafeRunSync()
      tmpDir.list() should contain("date=2018-07-23")
      val readDS = spark.read.parquet(uri.toString).as[Transaction]
      spark.read.parquet(uri + "/date=2018-07-23").show()
      sourceDS.collect() should contain theSameElementsAs readDS.collect()
    }
  }

  "BatchProducer.processOneBatch" should {
    "filter and save a batch of transaction, wait 59 mn, fetch the next batch" in withTempDir { tmpDir =>
      implicit object FakeTimer extends Timer[IO] {
        private var clockRealTimeInMillis: Long = Instant.parse("2018-08-02T01:00:00Z").toEpochMilli

        def clockRealTime(unit: TimeUnit): IO[Long] =
          IO(unit.convert(clockRealTimeInMillis, TimeUnit.MILLISECONDS))

        def sleep(duration: FiniteDuration): IO[Unit] = IO {
          clockRealTimeInMillis = clockRealTimeInMillis + duration.toMillis
        }

        def shift: IO[Unit] = ???

        def clockMonotonic(unit: TimeUnit): IO[Long] = ???
      }
      implicit val appContext: AppContext = new AppContext(transactionStorePath = tmpDir.toURI)

      implicit def toTimestamp(str: String): Timestamp = Timestamp.from(Instant.parse(str))
      val tx1 = Transaction("2018-08-01T23:00:00Z", 1, 7657.58, true, 0.021762)
      val tx2 = Transaction("2018-08-02T01:00:00Z", 2, 7663.85, false, 0.01385517)
      val tx3 = Transaction("2018-08-02T01:58:30Z", 3, 7663.85, false, 0.03782426)
      val tx4 = Transaction("2018-08-02T01:58:59Z", 4, 7663.86, false, 0.15750809)
      val tx5 = Transaction("2018-08-02T02:30:00Z", 5, 7661.49, true, 0.1)

     // Start at 01:00, tx 2 ignored (too soon)
      val txs0 = Seq(tx1)
      // Fetch at 01:59, get nb 2 and 3, but will miss nb 4 because of Api lag
      val txs1 = Seq(tx2, tx3)
      // Fetch at 02:58, get nb 3, 4, 5
      val txs2 = Seq(tx3, tx4, tx5)
      // Fetch at 03:57, get nothing
      val txs3 = Seq.empty[Transaction]

      val start0 = Instant.parse("2018-08-02T00:00:00Z")
      val end0 = Instant.parse("2018-08-02T00:59:55Z")
      val threeBatchesIO =
        for {
          tuple1 <- BatchProducer.processOneBatch(IO(txs1.toDS()), txs0.toDS(), start0, end0) // end - Api lag
          (ds1, start1, end1) = tuple1

          tuple2 <- BatchProducer.processOneBatch(IO(txs2.toDS()), ds1, start1, end1)
          (ds2, start2, end2) = tuple2

          _ <- BatchProducer.processOneBatch(IO(txs3.toDS()), ds2, start2, end2)
        } yield (ds1, start1, end1, ds2, start2, end2)

      val (ds1, start1, end1, ds2, start2, end2) = threeBatchesIO.unsafeRunSync()
      ds1.collect() should contain theSameElementsAs txs1
      start1 should ===(end0)
      end1 should ===(Instant.parse("2018-08-02T01:58:55Z")) // initialClock + 1mn - 15s - 5s

      ds2.collect() should contain theSameElementsAs txs2
      start2 should ===(end1)
      end2 should ===(Instant.parse("2018-08-02T02:57:55Z")) // initialClock + 1mn -15s + 1mn -15s -5s = end1 + 45s

      val lastClock = Instant.ofEpochMilli(
        FakeTimer.clockRealTime(TimeUnit.MILLISECONDS).unsafeRunSync())
      lastClock should === (Instant.parse("2018-08-02T03:57:00Z"))

      val savedTransactions = spark.read.parquet(tmpDir.toString).as[Transaction].collect()
      val expectedTxs = Seq(tx2, tx3, tx4, tx5)
      savedTransactions should contain theSameElementsAs expectedTxs
    }
  }


}

Source File: StructuredNetworkWordCountWindowed.scala From drizzle-spark with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println

Source File: LiteralGenerator.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}

import org.scalacheck.{Arbitrary, Gen}

import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval


object LiteralGenerator {

  lazy val byteLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType)

  lazy val shortLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType)

  lazy val integerLiteralGen: Gen[Literal] =
    for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType)

  lazy val longLiteralGen: Gen[Literal] =
    for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType)

  lazy val floatLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2,
        Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity)
    } yield Literal.create(f, FloatType)

  lazy val doubleLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2,
        Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity)
    } yield Literal.create(f, DoubleType)

  // TODO cache the generated data
  def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = {
    assert(scale >= 0)
    assert(precision >= scale)
    Arbitrary.arbBigInt.arbitrary.map { s =>
      val a = (s % BigInt(10).pow(precision - scale)).toString()
      val b = (s % BigInt(10).pow(scale)).abs.toString()
      Literal.create(
        Decimal(BigDecimal(s"$a.$b"), precision, scale),
        DecimalType(precision, scale))
    }
  }

  lazy val stringLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType)

  lazy val binaryLiteralGen: Gen[Literal] =
    for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) }
      yield Literal.create(ab.toArray, BinaryType)

  lazy val booleanLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType)

  lazy val dateLiteralGen: Gen[Literal] =
    for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType)

  lazy val timestampLiteralGen: Gen[Literal] =
    for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType)

  lazy val calendarIntervalLiterGen: Gen[Literal] =
    for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary}
      yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType)


  // Sometimes, it would be quite expensive when unlimited value is used,
  // for example, the `times` arguments for StringRepeat would hang the test 'forever'
  // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited
  // range is more reasonable
  lazy val limitedIntegerLiteralGen: Gen[Literal] =
    for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType)

  def randomGen(dt: DataType): Gen[Literal] = {
    dt match {
      case ByteType => byteLiteralGen
      case ShortType => shortLiteralGen
      case IntegerType => integerLiteralGen
      case LongType => longLiteralGen
      case DoubleType => doubleLiteralGen
      case FloatType => floatLiteralGen
      case DateType => dateLiteralGen
      case TimestampType => timestampLiteralGen
      case BooleanType => booleanLiteralGen
      case StringType => stringLiteralGen
      case BinaryType => binaryLiteralGen
      case CalendarIntervalType => calendarIntervalLiterGen
      case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale)
      case dt => throw new IllegalArgumentException(s"not supported type $dt")
    }
  }
}

Source File: SQLCompatibilityFunctionSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.math.BigDecimal
import java.sql.Timestamp

import org.apache.spark.sql.test.SharedSQLContext


class SQLCompatibilityFunctionSuite extends QueryTest with SharedSQLContext {

  test("ifnull") {
    checkAnswer(
      sql("SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nullif") {
    checkAnswer(
      sql("SELECT nullif('x', 'x'), nullif('x', 'y')"),
      Row(null, "x"))

    // Type coercion
    checkAnswer(
      sql("SELECT nullif(1, 2.1d), nullif(1, 1.0d)"),
      Row(1.0, null))
  }

  test("nvl") {
    checkAnswer(
      sql("SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl(1, 2.1d), nvl(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nvl2") {
    checkAnswer(
      sql("SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)"),
      Row("y", "x", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)"),
      Row(2.1, 1.0))
  }

  test("SPARK-16730 cast alias functions for Hive compatibility") {
    checkAnswer(
      sql("SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)"),
      Row(true, 1.toByte, 1.toShort, 1, 1L))

    checkAnswer(
      sql("SELECT float(1), double(1), decimal(1)"),
      Row(1.toFloat, 1.0, new BigDecimal(1)))

    checkAnswer(
      sql("SELECT date(\"2014-04-04\"), timestamp(date(\"2014-04-04\"))"),
      Row(new java.util.Date(114, 3, 4), new Timestamp(114, 3, 4, 0, 0, 0, 0)))

    checkAnswer(
      sql("SELECT string(1)"),
      Row("1"))

    // Error handling: only one argument
    val errorMsg = intercept[AnalysisException](sql("SELECT string(1, 2)")).getMessage
    assert(errorMsg.contains("Function string accepts only one argument"))
  }
}

Source File: DataConverter.scala From spark-cdm with MIT License

5 votes

package com.microsoft.cdm.utils

import java.text.SimpleDateFormat
import java.util.{Locale, TimeZone}
import java.sql.Timestamp

import org.apache.commons.lang.time.DateUtils
import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String


class DataConverter() extends Serializable {

  val dateFormatter = new SimpleDateFormat(Constants.SINGLE_DATE_FORMAT)
  val timestampFormatter = TimestampFormatter(Constants.TIMESTAMP_FORMAT, TimeZone.getTimeZone("UTC"))


  val toSparkType: Map[CDMDataType.Value, DataType] = Map(
    CDMDataType.int64 -> LongType,
    CDMDataType.dateTime -> DateType,
    CDMDataType.string -> StringType,
    CDMDataType.double -> DoubleType,
    CDMDataType.decimal -> DecimalType(Constants.DECIMAL_PRECISION,0),
    CDMDataType.boolean -> BooleanType,
    CDMDataType.dateTimeOffset -> TimestampType
  )

  def jsonToData(dt: DataType, value: String): Any = {
    return dt match {
      case LongType => value.toLong
      case DoubleType => value.toDouble
      case DecimalType() => Decimal(value)
      case BooleanType => value.toBoolean
      case DateType => dateFormatter.parse(value)
      case TimestampType => timestampFormatter.parse(value)
      case _ => UTF8String.fromString(value)
    }
  }

  def toCdmType(dt: DataType): CDMDataType.Value = {
    return dt match {
      case IntegerType => CDMDataType.int64
      case LongType => CDMDataType.int64
      case DateType => CDMDataType.dateTime
      case StringType => CDMDataType.string
      case DoubleType => CDMDataType.double
      case DecimalType() => CDMDataType.decimal
      case BooleanType => CDMDataType.boolean
      case TimestampType => CDMDataType.dateTimeOffset
    }
  }  

  def dataToString(data: Any, dataType: DataType): String = {
    (dataType, data) match {
      case (_, null) => null
      case (DateType, _) => dateFormatter.format(data)
      case (TimestampType, v: Number) => timestampFormatter.format(data.asInstanceOf[Long])
      case _ => data.toString
    }
  }

}

Source File: MessageSink.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s.indefinite

import java.sql.Timestamp
import java.util.UUID

import akka.Done
import akka.kafka.CommitterSettings
import akka.kafka.ConsumerMessage.CommittableOffsetBatch
import akka.kafka.scaladsl.Committer
import akka.stream.scaladsl.{Flow, Keep, Sink}
import com.github.mjakubowski84.parquet4s.{ChunkPathBuilder, ParquetStreams, ParquetWriter}
import com.google.common.io.Files
import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.metadata.CompressionCodecName

import scala.concurrent.Future
import scala.concurrent.duration._

object MessageSink {

  case class Data(timestamp: Timestamp, word: String)

  val MaxChunkSize: Int = 128
  val ChunkWriteTimeWindow: FiniteDuration = 10.seconds
  val WriteDirectoryName: String = "messages"

}

trait MessageSink {

  this: Akka =>

  import MessageSink._
  import MessageSource._

  protected val baseWritePath: String = new Path(Files.createTempDir().getAbsolutePath, WriteDirectoryName).toString

  private val writerOptions = ParquetWriter.Options(compressionCodecName = CompressionCodecName.SNAPPY)

  private lazy val committerSink = Flow.apply[Seq[Message]].map { messages =>
    CommittableOffsetBatch(messages.map(_.committableOffset))
  }.toMat(Committer.sink(CommitterSettings(system)))(Keep.right)

  def chunkPath: ChunkPathBuilder[Message] = {
    case (basePath, chunk) =>
      val lastElementDateTime = new Timestamp(chunk.last.record.timestamp()).toLocalDateTime
      val year = lastElementDateTime.getYear
      val month = lastElementDateTime.getMonthValue
      val day = lastElementDateTime.getDayOfMonth
      val uuid = UUID.randomUUID()

      basePath.suffix(s"/$year/$month/$day/part-$uuid.parquet")
  }

  lazy val messageSink: Sink[Message, Future[Done]] = ParquetStreams.toParquetIndefinite(
    path = baseWritePath,
    maxChunkSize = MaxChunkSize,
    chunkWriteTimeWindow = ChunkWriteTimeWindow,
    buildChunkPath = chunkPath,
    preWriteTransformation = { message: Message =>
      Data(
        timestamp = new Timestamp(message.record.timestamp()),
        word = message.record.value()
      )
    },
    postWriteSink = committerSink,
    options = writerOptions
  )

}

Source File: JdbcResultSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package daf.dataset.query.jdbc

import java.sql.Timestamp
import java.time.{ LocalDateTime, OffsetDateTime }

import org.scalatest.{ MustMatchers, WordSpec }
import play.api.libs.json._

class JdbcResultSpec extends WordSpec with MustMatchers {

  "A JDBC Result container" must {

    "convert to CSV" in {
      JdbcResults.flat.toCsv.toList must be {
        List(
          """"int", "string", "bool", "timestamp"""",
          """1, "str1", true, "2018-06-25T09:00:00Z"""",
          """2, "str2", false, "2018-06-25T09:30:00Z"""",
          """<null>, <null>, false, <null>"""
        )
      }
    }

    "convert to json" in {
      JdbcResults.flat.toJson.toList must be {
        Seq(
          JsObject {
            Seq(
              "int"       -> JsNumber(1),
              "string"    -> JsString("str1"),
              "bool"      -> JsBoolean(true),
              "timestamp" -> JsString("2018-06-25T09:00:00Z")
            )
          },
          JsObject {
            Seq(
              "int"       -> JsNumber(2),
              "string"    -> JsString("str2"),
              "bool"      -> JsBoolean(false),
              "timestamp" -> JsString("2018-06-25T09:30:00Z")
            )
          },
          JsObject {
            Seq(
              "int"       -> JsNull,
              "string"    -> JsNull,
              "bool"      -> JsBoolean(false),
              "timestamp" -> JsNull
            )
          }
        )
      }
    }

  }

}

object JdbcResults {

  private val offset = OffsetDateTime.now().getOffset

  private def timestamp(dateTime: LocalDateTime) = Timestamp.from { dateTime.toInstant(offset) }

  val flat = JdbcResult(
    header = Seq("int", "string", "bool", "timestamp"),
    rows   = Vector(
      List(
        Int.box(1),
        "str1",
        Boolean.box(true),
        timestamp { LocalDateTime.of(2018, 6, 25, 9, 0) }
      ),
      List(
        Int.box(2),
        "str2",
        Boolean.box(false),
        timestamp { LocalDateTime.of(2018, 6, 25, 9, 30) }
      ),
      List(
        null,
        null,
        Boolean.box(false),
        null
      )
    )
  )

}

Source File: UJESSQLTypeParser.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.ujes.jdbc



import java.sql.{Timestamp, Types}


object UJESSQLTypeParser {
  def parserFromName(typeName: String): Int = {
    typeName match {
      case null => throw new UJESSQLException(UJESSQLErrorCode.METADATA_EMPTY)
      case "string" => Types.CHAR
      case "short" => Types.SMALLINT
      case "int" => Types.INTEGER
      case "long" => Types.BIGINT
      case "float" => Types.FLOAT
      case "double" => Types.DOUBLE
      case "boolean" => Types.BOOLEAN
      case "byte" => Types.TINYINT
      case "char" => Types.CHAR
      case "timestamp" => Types.TIMESTAMP
      case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR)
    }
  }

  def parserFromVal(obj: Any): Int ={
    obj match {
      case _: String => Types.CHAR
      case _: Short => Types.SMALLINT
      case _: Int => Types.INTEGER
      case _: Long => Types.BIGINT
      case _: Float => Types.FLOAT
      case _: Double => Types.DOUBLE
      case _: Boolean => Types.BOOLEAN
      case _: Byte => Types.TINYINT
      case _: Char => Types.CHAR
      case _: Timestamp => Types.TIMESTAMP
      case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR)
    }
  }
  def parserFromMetaData(dataType: Int): String = {
    dataType match {
      case Types.CHAR => "string"
      case Types.SMALLINT => "short"
      case Types.INTEGER => "int"
      case Types.BIGINT => "long"
      case Types.FLOAT => "float"
      case Types.DOUBLE => "double"
      case Types.BOOLEAN => "boolean"
      case Types.TINYINT => "byte"
      case Types.CHAR => "char"
      case Types.TIMESTAMP => "timestamp"
      case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR)
    }
  }
}

Source File: FilterPushdown.scala From spark-select with Apache License 2.0

5 votes

package io.minio.spark.select

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.sources._
import org.apache.spark.sql.types._


  private def getTypeForAttribute(schema: StructType, attribute: String): Option[DataType] = {
    if (schema.fieldNames.contains(attribute)) {
      Some(schema(attribute).dataType)
    } else {
      None
    }
  }

  def queryFromSchema(schema: StructType, filters: Array[Filter]): String = {
    var columnList = schema.fields.map(x => s"s."+s""""${x.name}"""").mkString(",")
    if (columnList.length == 0) {
      columnList = "*"
    }
    val whereClause = buildWhereClause(schema, filters)
    if (whereClause.length == 0) {
      s"select $columnList from S3Object s"
    } else {
      s"select $columnList from S3Object s $whereClause"
    }
  }

}

Source File: TypeCast.scala From spark-select with Apache License 2.0

5 votes

package io.minio.spark.select.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.{SimpleDateFormat, NumberFormat}
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try


  @throws[IllegalArgumentException]
  private[select] def toChar(str: String): Char = {
    if (str.charAt(0) == '\\') {
      str.charAt(1)
      match {
        case 't' => '\t'
        case 'r' => '\r'
        case 'b' => '\b'
        case 'f' => '\f'
        case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
        case '\'' => '\''
        case 'u' if str == """\u0000""" => '\u0000'
        case _ =>
          throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
      }
    } else if (str.length == 1) {
      str.charAt(0)
    } else {
      throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
    }
  }
}

Source File: SlickJdbcScheduledMessagesRepository.scala From reliable-http-client with Apache License 2.0

5 votes

package rhttpc.transport.amqpjdbc.slick

import java.sql.Timestamp

import rhttpc.transport.amqpjdbc.{MessageToSchedule, ScheduledMessage, ScheduledMessagesRepository}
import slick.jdbc.{JdbcBackend, JdbcProfile}

import scala.concurrent.{ExecutionContext, Future}

private[amqpjdbc] class SlickJdbcScheduledMessagesRepository(profile: JdbcProfile, db: JdbcBackend.Database)
                                                            (implicit ec: ExecutionContext) extends ScheduledMessagesRepository {

  class V1_001__AddingPropertiesToScheduledMessagesMigration extends AddingPropertiesToScheduledMessagesMigration {
    override protected val profile: JdbcProfile = SlickJdbcScheduledMessagesRepository.this.profile
  }

  val messagesMigration = new V1_001__AddingPropertiesToScheduledMessagesMigration

  import messagesMigration._
  import profile.api._

  override def save(msg: MessageToSchedule): Future[Unit] = {
    import msg._
    val action = for {
      currentTimestamp <- sql"select current_timestamp".as[Timestamp].head
      plannedRun = new Timestamp(currentTimestamp.getTime + msg.delay.toMillis)
      messageToAdd = ScheduledMessage(None, queueName, content, properties, plannedRun)
      insertResult <- scheduledMessages += messageToAdd
    } yield ()
    db.run(action.transactionally)
  }

  override def fetchMessagesShouldByRun(queueName: String, batchSize: Int)
                                       (onMessages: (Seq[ScheduledMessage]) => Future[Any]): Future[Int] = {
    def drain(): Future[Int] = {
      val fetchAction = for {
        currentTimestamp <- sql"select current_timestamp".as[Timestamp].head
        fetched <- scheduledMessages.filter { msg =>
          msg.queueName === queueName &&
            msg.plannedRun <= currentTimestamp
        }.sortBy(_.plannedRun desc).take(batchSize).result
      } yield fetched

      def consumeAction(fetched: Seq[ScheduledMessage]) = {
        val fetchedIds = fetched.flatMap(_.id)
        for {
          deleted <- scheduledMessages.filter(_.id inSet fetchedIds).delete
          _ <- {
            if (deleted != fetched.size) {
              DBIO.failed(ConcurrentFetchException)
            } else {
              DBIO.successful(Unit)
            }
          }
          _ <- DBIO.from(onMessages(fetched))
        } yield fetched.size
      }

      val consumedFuture = for {
        fetched <- db.run(fetchAction.transactionally)
        consumed <- db.run(consumeAction(fetched).transactionally)
      } yield consumed

      val consumedRecovered = consumedFuture.recover {
        case ConcurrentFetchException => 0
      }

      for {
        consumed <- consumedRecovered
        consumedNext <- {
          if (consumed == batchSize)
            drain()
          else
            Future.successful(0)
        }
      } yield consumed + consumedNext
    }
    drain()
  }

  override def queuesStats(names: Set[String]): Future[Map[String, Int]] = {
    val action = scheduledMessages
      .filter(_.queueName inSet names)
      .groupBy(_.queueName).map {
      case (queueName, msgs) =>
        (queueName, msgs.size)
    }.result
    db.run(action).map(_.toMap)
  }
}

case object ConcurrentFetchException extends Exception(s"Concurrent fetch detected")

Source File: ReadParquetEEL.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels

import java.sql.Timestamp

import io.eels.component.parquet.{ParquetSink, ParquetSource}
import io.eels.datastream.DataStream
import io.eels.schema.{ArrayType, DecimalType, Field, IntType, Precision, Scale, StringType, StructType, TimestampMillisType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

object ReadParquetEEL extends App {

  def readParquet(path: Path): Unit = {

    implicit val hadoopConfiguration = new Configuration()
    implicit val hadoopFileSystem = FileSystem.get(hadoopConfiguration)
    val rows = ParquetSource(parquetFilePath).toDataStream().collect
    rows.foreach(row => println(row))
  }

  val parquetFilePath = new Path("file:///home/sam/development/person2.parquet")
  implicit val hadoopConfiguration = new Configuration()
  implicit val hadoopFileSystem = FileSystem.get(hadoopConfiguration)

  val friendStruct = Field.createStructField("FRIEND",
    Seq(
      Field("NAME", StringType),
      Field("AGE", IntType.Signed)
    )
  )

  val personDetailsStruct = Field.createStructField("PERSON_DETAILS",
    Seq(
      Field("NAME", StringType),
      Field("AGE", IntType.Signed),
      Field("SALARY", DecimalType(Precision(38), Scale(5))),
      Field("CREATION_TIME", TimestampMillisType)
    )
  )

  val friendType = StructType(friendStruct)
  val schema = StructType(personDetailsStruct, Field("FRIENDS", ArrayType(friendType), nullable = false))

  val friends = Vector(
    Vector(Vector("John", 25)),
    Vector(Vector("Adam", 26)),
    Vector(Vector("Steven", 27))
  )

  val rows = Vector(
    Vector(Vector("Fred", 50, BigDecimal("50000.99000"), new Timestamp(System.currentTimeMillis())), friends)
  )

  try {
    DataStream.fromValues(schema, rows).to(ParquetSink(parquetFilePath).withOverwrite(true))
  } catch {
    case e: Exception => e.printStackTrace()
  }

  try {
    readParquet(parquetFilePath)
  } catch {
    case e: Exception => e.printStackTrace()
  }
}

Source File: SchemaFn.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.schema

import java.sql.Timestamp

object SchemaFn {
  def toDataType(clz: Class[_]): DataType = {
    val intClass = classOf[Int]
    val floatClass = classOf[Float]
    val stringClass = classOf[String]
    val charClass = classOf[Char]
    val bigIntClass = classOf[BigInt]
    val booleanClass = classOf[Boolean]
    val doubleClass = classOf[Double]
    val bigdecimalClass = classOf[BigDecimal]
    val longClass = classOf[Long]
    val byteClass = classOf[Byte]
    val shortClass = classOf[Short]
    val timestampClass = classOf[Timestamp]
    clz match {
      case `bigdecimalClass` => DecimalType(Precision(22), Scale(5))
      case `bigIntClass` => BigIntType
      case `booleanClass` => BooleanType
      case `byteClass` => ByteType.Signed
      case `charClass` => CharType(1)
      case `doubleClass` => DoubleType
      case `intClass` => IntType.Signed
      case `floatClass` => FloatType
      case `longClass` => LongType.Signed
      case `stringClass` => StringType
      case `shortClass` => ShortType.Signed
      case `timestampClass` => TimestampMillisType
      case _ => sys.error(s"Can not map $clz to data type")
    }
  }
}

Source File: TimestampCastSpec.scala From flint with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.sql.Timestamp
import java.time.Instant

import scala.reflect.runtime.universe.TypeTag

import com.twosigma.flint.timeseries.TimeSeriesSuite
import org.apache.spark.sql.functions._

class TimestampCastSpec extends TimeSeriesSuite {
  import TimestampCastSpec._
  import testImplicits._

  behavior of "TimestampToNanos"

  testEvalAndCodegen("retain up to microsecond precision", nanosToTimestamp(expectedNanos)){ df =>
    val actual = df.select(TimestampToNanos(col("time")).as("long"))
      .collect()
      .map(_.getAs[Long]("long"))

    assert(actual === expectedNanos)
  }

  behavior of "LongToTimestamp"

  testEvalAndCodegen("retain up to microsecond precision", expectedNanos) { df =>
    val actual = df.select(NanosToTimestamp(col("time")).as("timestamp"))
      .collect()
      .map(_.getAs[Timestamp]("timestamp"))

    val expectedTimestamps = expectedNanos.map { nanos =>
      Timestamp.from(Instant.ofEpochSecond(0, nanos))
    }
    assert(actual === expectedTimestamps)
  }

  
  private def asExternalRDD[T: TypeTag](input: Seq[T]): DataFrame = {
    sc.range(0, input.size.toLong).map { i =>
      Tuple1(input(i.toInt))
    }.toDF("time")
  }

}

object TimestampCastSpec {

  val expectedNanos = Seq[Long](
    0L,
    Long.MaxValue - (Long.MaxValue % 1000), // clip to microsecond precision
    946684800000000000L, // 2001-01-01
    1262304000000000000L, // 2010-01-01
    1893456000000000000L // 2030-01-01
  )

  def nanosToTimestamp(input: Seq[Long]): Seq[Timestamp] = input.map { v =>
    Timestamp.from(Instant.ofEpochSecond(0, v))
  }

}

Source File: JsonSupport.scala From akka-http-slick-sample with MIT License

5 votes

package net.softler.data.model

import java.sql.Timestamp
import java.time.Instant
import java.util.UUID

import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport
import spray.json.{DefaultJsonProtocol, JsNumber, JsString, JsValue, JsonFormat, RootJsonFormat}

trait BaseJsonProtocol extends DefaultJsonProtocol {
  implicit val timestampFormat: JsonFormat[Timestamp] = new JsonFormat[Timestamp] {
    override def write(obj: Timestamp): JsValue = JsNumber(obj.getTime)

    override def read(json: JsValue): Timestamp = json match {
      case JsNumber(x) => Timestamp.from(Instant.ofEpochMilli(x.toLong))
      case _ =>
        throw new IllegalArgumentException(
          s"Can not parse json value [$json] to a timestamp object")
    }
  }

  implicit val uuidJsonFormat: JsonFormat[UUID] = new JsonFormat[UUID] {
    override def write(x: UUID): JsValue = JsString(x.toString)

    override def read(value: JsValue): UUID = value match {
      case JsString(x) => UUID.fromString(x)
      case x =>
        throw new IllegalArgumentException("Expected UUID as JsString, but got " + x.getClass)
    }
  }
}


trait JsonProtocol extends SprayJsonSupport with BaseJsonProtocol {
  implicit val userFormat: RootJsonFormat[User] = jsonFormat10(User)
}

Source File: entities.scala From akka-http-slick-sample with MIT License

5 votes

package net.softler.data.model

import java.sql.Timestamp
import java.util.UUID

trait Entity {
  def id: UUID
  def created: Timestamp
  def updated: Option[Timestamp]
  def deleted: Option[Timestamp]
}

case class User(id: UUID,
                login: String,
                password: String,
                email: String,
                firstName: Option[String],
                lastName: Option[String],
                readOnly: Boolean,
                created: Timestamp,
                updated: Option[Timestamp],
                deleted: Option[Timestamp])
    extends Entity

case class Role(id: UUID,
                name: String,
                readOnly: Boolean,
                created: Timestamp,
                updated: Option[Timestamp],
                deleted: Option[Timestamp])
    extends Entity

case class UserRole(user: User, role: Role)

Source File: StationSuite.scala From gihyo-spark-book-example with Apache License 2.0

5 votes

package jp.gihyo.spark.ch05

import java.sql.Timestamp
import java.text.SimpleDateFormat

import org.scalatest.FunSuite

class StationSuite extends FunSuite {

  test("should be parse") {
    val line = "2,San Jose Diridon Caltrain Station,37.329732,-121.901782,27,San Jose,8/6/2013"
    val station = Station.parse(line)

    val dateFormat = new SimpleDateFormat("MM/dd/yyy")
    assert(station.id === 2)
    assert(station.name === "San Jose Diridon Caltrain Station")
    assert(station.lat === 37.329732)
    assert(station.lon === -121.901782)
    assert(station.dockcount === 27)
    assert(station.landmark === "San Jose")
    assert(station.installation === new Timestamp(dateFormat.parse("8/6/2013").getTime))
  }
}

Source File: QueryPartitionSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.hive

import java.io.File
import java.sql.Timestamp

import com.google.common.io.Files
import org.apache.hadoop.fs.FileSystem

import org.apache.spark.internal.config._
import org.apache.spark.sql._
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.util.Utils

class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  import spark.implicits._

  private def queryWhenPathNotExist(): Unit = {
    withTempView("testData") {
      withTable("table_with_partition", "createAndInsertTest") {
        withTempDir { tmpDir =>
          val testData = sparkContext.parallelize(
            (1 to 10).map(i => TestData(i, i.toString))).toDF()
          testData.createOrReplaceTempView("testData")

          // create the table for test
          sql(s"CREATE TABLE table_with_partition(key int,value string) " +
              s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='2') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='3') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') " +
              "SELECT key,value FROM testData")

          // test for the exist path
          checkAnswer(sql("select key,value from table_with_partition"),
            testData.union(testData).union(testData).union(testData))

          // delete the path of one partition
          tmpDir.listFiles
              .find { f => f.isDirectory && f.getName().startsWith("ds=") }
              .foreach { f => Utils.deleteRecursively(f) }

          // test for after delete the path
          checkAnswer(sql("select key,value from table_with_partition"),
            testData.union(testData).union(testData))
        }
      }
    }
  }

  test("SPARK-5068: query data when path doesn't exist") {
    withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") {
      queryWhenPathNotExist()
    }
  }

  test("Replace spark.sql.hive.verifyPartitionPath by spark.files.ignoreMissingFiles") {
    withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "false") {
      sparkContext.conf.set(IGNORE_MISSING_FILES.key, "true")
      queryWhenPathNotExist()
    }
  }

  test("SPARK-21739: Cast expression should initialize timezoneId") {
    withTable("table_with_timestamp_partition") {
      sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)")
      sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " +
        "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)")

      // test for Cast expression in TableReader
      checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"),
        Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000"))))

      // test for Cast expression in HiveTableScanExec
      checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " +
        "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1))
    }
  }
}

Source File: LiteralGenerator.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}

import org.scalacheck.{Arbitrary, Gen}

import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval


object LiteralGenerator {

  lazy val byteLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType)

  lazy val shortLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType)

  lazy val integerLiteralGen: Gen[Literal] =
    for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType)

  lazy val longLiteralGen: Gen[Literal] =
    for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType)

  lazy val floatLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2,
        Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity)
    } yield Literal.create(f, FloatType)

  lazy val doubleLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2,
        Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity)
    } yield Literal.create(f, DoubleType)

  // TODO cache the generated data
  def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = {
    assert(scale >= 0)
    assert(precision >= scale)
    Arbitrary.arbBigInt.arbitrary.map { s =>
      val a = (s % BigInt(10).pow(precision - scale)).toString()
      val b = (s % BigInt(10).pow(scale)).abs.toString()
      Literal.create(
        Decimal(BigDecimal(s"$a.$b"), precision, scale),
        DecimalType(precision, scale))
    }
  }

  lazy val stringLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType)

  lazy val binaryLiteralGen: Gen[Literal] =
    for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) }
      yield Literal.create(ab.toArray, BinaryType)

  lazy val booleanLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType)

  lazy val dateLiteralGen: Gen[Literal] =
    for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType)

  lazy val timestampLiteralGen: Gen[Literal] =
    for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType)

  lazy val calendarIntervalLiterGen: Gen[Literal] =
    for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary}
      yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType)


  // Sometimes, it would be quite expensive when unlimited value is used,
  // for example, the `times` arguments for StringRepeat would hang the test 'forever'
  // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited
  // range is more reasonable
  lazy val limitedIntegerLiteralGen: Gen[Literal] =
    for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType)

  def randomGen(dt: DataType): Gen[Literal] = {
    dt match {
      case ByteType => byteLiteralGen
      case ShortType => shortLiteralGen
      case IntegerType => integerLiteralGen
      case LongType => longLiteralGen
      case DoubleType => doubleLiteralGen
      case FloatType => floatLiteralGen
      case DateType => dateLiteralGen
      case TimestampType => timestampLiteralGen
      case BooleanType => booleanLiteralGen
      case StringType => stringLiteralGen
      case BinaryType => binaryLiteralGen
      case CalendarIntervalType => calendarIntervalLiterGen
      case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale)
      case dt => throw new IllegalArgumentException(s"not supported type $dt")
    }
  }
}

Source File: CallMethodViaReflectionSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.sql.Timestamp

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.types.{IntegerType, StringType}


class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelper {

  import CallMethodViaReflection._

  // Get rid of the $ so we are getting the companion object's name.
  private val staticClassName = ReflectStaticClass.getClass.getName.stripSuffix("$")
  private val dynamicClassName = classOf[ReflectDynamicClass].getName

  test("findMethod via reflection for static methods") {
    assert(findMethod(staticClassName, "method1", Seq.empty).exists(_.getName == "method1"))
    assert(findMethod(staticClassName, "method2", Seq(IntegerType)).isDefined)
    assert(findMethod(staticClassName, "method3", Seq(IntegerType)).isDefined)
    assert(findMethod(staticClassName, "method4", Seq(IntegerType, StringType)).isDefined)
  }

  test("findMethod for a JDK library") {
    assert(findMethod(classOf[java.util.UUID].getName, "randomUUID", Seq.empty).isDefined)
  }

  test("class not found") {
    val ret = createExpr("some-random-class", "method").checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("not found") && errorMsg.contains("class"))
  }

  test("method not found because name does not match") {
    val ret = createExpr(staticClassName, "notfoundmethod").checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("cannot find a static method"))
  }

  test("method not found because there is no static method") {
    val ret = createExpr(dynamicClassName, "method1").checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("cannot find a static method"))
  }

  test("input type checking") {
    assert(CallMethodViaReflection(Seq.empty).checkInputDataTypes().isFailure)
    assert(CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes().isFailure)
    assert(CallMethodViaReflection(
      Seq(Literal(staticClassName), Literal(1))).checkInputDataTypes().isFailure)
    assert(createExpr(staticClassName, "method1").checkInputDataTypes().isSuccess)
  }

  test("unsupported type checking") {
    val ret = createExpr(staticClassName, "method1", new Timestamp(1)).checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("arguments from the third require boolean, byte, short"))
  }

  test("invoking methods using acceptable types") {
    checkEvaluation(createExpr(staticClassName, "method1"), "m1")
    checkEvaluation(createExpr(staticClassName, "method2", 2), "m2")
    checkEvaluation(createExpr(staticClassName, "method3", 3), "m3")
    checkEvaluation(createExpr(staticClassName, "method4", 4, "four"), "m4four")
  }

  private def createExpr(className: String, methodName: String, args: Any*) = {
    CallMethodViaReflection(
      Literal.create(className, StringType) +:
      Literal.create(methodName, StringType) +:
      args.map(Literal.apply)
    )
  }
}

Source File: SortOrderExpressionsSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}
import java.util.TimeZone

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._

class SortOrderExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("SortPrefix") {
    val b1 = Literal.create(false, BooleanType)
    val b2 = Literal.create(true, BooleanType)
    val i1 = Literal.create(20132983, IntegerType)
    val i2 = Literal.create(-20132983, IntegerType)
    val l1 = Literal.create(20132983, LongType)
    val l2 = Literal.create(-20132983, LongType)
    val millis = 1524954911000L;
    // Explicitly choose a time zone, since Date objects can create different values depending on
    // local time zone of the machine on which the test is running
    val oldDefaultTZ = TimeZone.getDefault
    val d1 = try {
      TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
      Literal.create(new java.sql.Date(millis), DateType)
    } finally {
      TimeZone.setDefault(oldDefaultTZ)
    }
    val t1 = Literal.create(new Timestamp(millis), TimestampType)
    val f1 = Literal.create(0.7788229f, FloatType)
    val f2 = Literal.create(-0.7788229f, FloatType)
    val db1 = Literal.create(0.7788229d, DoubleType)
    val db2 = Literal.create(-0.7788229d, DoubleType)
    val s1 = Literal.create("T", StringType)
    val s2 = Literal.create("This is longer than 8 characters", StringType)
    val bin1 = Literal.create(Array[Byte](12), BinaryType)
    val bin2 = Literal.create(Array[Byte](12, 17, 99, 0, 0, 0, 2, 3, 0xf4.asInstanceOf[Byte]),
      BinaryType)
    val dec1 = Literal(Decimal(20132983L, 10, 2))
    val dec2 = Literal(Decimal(20132983L, 19, 2))
    val dec3 = Literal(Decimal(20132983L, 21, 2))
    val list1 = Literal(List(1, 2), ArrayType(IntegerType))
    val nullVal = Literal.create(null, IntegerType)

    checkEvaluation(SortPrefix(SortOrder(b1, Ascending)), 0L)
    checkEvaluation(SortPrefix(SortOrder(b2, Ascending)), 1L)
    checkEvaluation(SortPrefix(SortOrder(i1, Ascending)), 20132983L)
    checkEvaluation(SortPrefix(SortOrder(i2, Ascending)), -20132983L)
    checkEvaluation(SortPrefix(SortOrder(l1, Ascending)), 20132983L)
    checkEvaluation(SortPrefix(SortOrder(l2, Ascending)), -20132983L)
    // For some reason, the Literal.create code gives us the number of days since the epoch
    checkEvaluation(SortPrefix(SortOrder(d1, Ascending)), 17649L)
    checkEvaluation(SortPrefix(SortOrder(t1, Ascending)), millis * 1000)
    checkEvaluation(SortPrefix(SortOrder(f1, Ascending)),
      DoublePrefixComparator.computePrefix(f1.value.asInstanceOf[Float].toDouble))
    checkEvaluation(SortPrefix(SortOrder(f2, Ascending)),
      DoublePrefixComparator.computePrefix(f2.value.asInstanceOf[Float].toDouble))
    checkEvaluation(SortPrefix(SortOrder(db1, Ascending)),
      DoublePrefixComparator.computePrefix(db1.value.asInstanceOf[Double]))
    checkEvaluation(SortPrefix(SortOrder(db2, Ascending)),
      DoublePrefixComparator.computePrefix(db2.value.asInstanceOf[Double]))
    checkEvaluation(SortPrefix(SortOrder(s1, Ascending)),
      StringPrefixComparator.computePrefix(s1.value.asInstanceOf[UTF8String]))
    checkEvaluation(SortPrefix(SortOrder(s2, Ascending)),
      StringPrefixComparator.computePrefix(s2.value.asInstanceOf[UTF8String]))
    checkEvaluation(SortPrefix(SortOrder(bin1, Ascending)),
      BinaryPrefixComparator.computePrefix(bin1.value.asInstanceOf[Array[Byte]]))
    checkEvaluation(SortPrefix(SortOrder(bin2, Ascending)),
      BinaryPrefixComparator.computePrefix(bin2.value.asInstanceOf[Array[Byte]]))
    checkEvaluation(SortPrefix(SortOrder(dec1, Ascending)), 20132983L)
    checkEvaluation(SortPrefix(SortOrder(dec2, Ascending)), 2013298L)
    checkEvaluation(SortPrefix(SortOrder(dec3, Ascending)),
      DoublePrefixComparator.computePrefix(201329.83d))
    checkEvaluation(SortPrefix(SortOrder(list1, Ascending)), 0L)
    checkEvaluation(SortPrefix(SortOrder(nullVal, Ascending)), null)
  }
}

Source File: ApplicationMonitor.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.monitor.application

import java.sql.{Connection, Timestamp}
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.spark.alarm.AlertMessage
import org.apache.spark.alarm.AlertType._
import org.apache.spark.monitor.Monitor
import org.apache.spark.monitor.MonitorItem.MonitorItem

abstract class ApplicationMonitor extends Monitor {
  override val alertType = Seq(Application)
}

class ApplicationInfo(
    title: MonitorItem,
    appName: String,
    appId: String,
    md5: String,
    startTime: Date,
    duration: Long,
    appUiUrl: String,
    historyUrl: String,
    eventLogDir: String,
    minExecutor: Int,
    maxExecutor: Int,
    executorCore: Int,
    executorMemoryMB: Long,
    executorAccu: Double,
    user: String)
  extends AlertMessage(title) {
  override def toCsv(): String = {
    s"${user},${appId}," +
      s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}," +
      s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}," +
      s"${executorMemoryMB},${executorCore},${executorAccu.formatted("%.2f")},${appName}"
  }
  // scalastyle:off
  override def toHtml(): String = {
    val html = <h1>任务完成! </h1>
        <h2>任务信息 </h2>
        <ul>
          <li>作业名：{appName}</li>
          <li>作业ID：{appId}</li>
          <li>开始时间：{new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}</li>
          <li>任务用时：{Duration(duration, TimeUnit.MILLISECONDS).toSeconds} s</li>
        </ul>
        <h2>资源用量</h2>
        <ul>
          <li>Executor个数：{minExecutor}~{maxExecutor}</li>
          <li>Executor内存：{executorMemoryMB} MB</li>
          <li>Executor核数：{executorCore}</li>
          <li>Executor累积用量：{executorAccu.formatted("%.2f")} executor*min</li>
        </ul>
        <h2>调试信息</h2>
        <ul>
          <li>回看链接1：<a href={appUiUrl.split(",").head}>{appUiUrl.split(",").head}</a></li>
          <li>回看链接2：<a href={historyUrl}>{historyUrl}</a></li>
          <li>日志文件所在目录：{eventLogDir}</li>
        </ul>
    html.mkString
  }

  override def toJdbc(conn: Connection, appId: String): Unit = {
    val query = "INSERT INTO `xsql_monitor`.`spark_history`(" +
      "`user`, `md5`, `appId`, `startTime`, `duration`, " +
      "`yarnURL`, `sparkHistoryURL`, `eventLogDir`, `coresPerExecutor`, `memoryPerExecutorMB`," +
      " `executorAcc`, `appName`, `minExecutors`, `maxExecutors`)" +
      " SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? FROM DUAL" +
      " WHERE NOT EXISTS (SELECT * FROM `xsql_monitor`.`spark_history` WHERE `appId` = ?);"

    val preparedStmt = conn.prepareStatement(query)
    preparedStmt.setString(1, user)
    preparedStmt.setString(2, md5)
    preparedStmt.setString(3, appId)
    preparedStmt.setTimestamp(4, new Timestamp(startTime.getTime))
    preparedStmt.setLong(5, Duration(duration, TimeUnit.MILLISECONDS).toSeconds)
    preparedStmt.setString(6, appUiUrl)
    preparedStmt.setString(7, historyUrl)
    preparedStmt.setString(8, eventLogDir)
    preparedStmt.setInt(9, executorCore)
    preparedStmt.setLong(10, executorMemoryMB)
    preparedStmt.setDouble(11, executorAccu)
    preparedStmt.setString(12, appName)
    preparedStmt.setInt(13, minExecutor)
    preparedStmt.setInt(14, maxExecutor)
    preparedStmt.setString(15, appId)
    preparedStmt.execute
  }
}

Source File: TimestampExpressionSuite.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.sql.Timestamp

import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.types.{DateType, IntegerType}
import org.scalatest.FunSuite

class TimestampExpressionSuite extends FunSuite with ExpressionEvalHelper {

  test("add_seconds") {
    // scalastyle:off magic.number
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-01 00:11:33")), Literal(28)),
      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-01-01 00:12:01")))
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-02 00:00:00")), Literal(-1)),
      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-01-01 23:59:59")))
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-01 00:00:00")), Literal(-1)),
      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2014-12-31 23:59:59")))
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-02 00:00:00")),
      Literal.create(null, IntegerType)), null)
    checkEvaluation(AddSeconds(Literal.create(null, DateType), Literal(1)), null)
    checkEvaluation(AddSeconds(Literal.create(null, DateType), Literal.create(null, IntegerType)),
      null)
  }
}

Source File: AddSecondsSuite.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.sql.Timestamp

import org.apache.spark.sql.{GlobalSapSQLContext, Row}
import org.scalatest.FunSuite

class AddSecondsSuite
  extends FunSuite
  with GlobalSapSQLContext {

  val rowA = TimestampRow("AAA", Timestamp.valueOf("2015-01-01 12:12:04"))
  val rowB = TimestampRow("BBB", Timestamp.valueOf("2015-01-01 00:00:00"))
  val rowC = TimestampRow("CCC", Timestamp.valueOf("2015-12-31 23:59:58"))
  val rowD = TimestampRow("DDD", Timestamp.valueOf("2012-01-01 23:30:45"))

  val dataWithTimestamps = Seq(rowA, rowB, rowC, rowD)

  test("add_seconds") {
    val rdd = sc.parallelize(dataWithTimestamps)
    val dSrc = sqlContext.createDataFrame(rdd).cache()
    dSrc.registerTempTable("src")

    val result1 = sqlContext.sql("SELECT name, ADD_SECONDS(t, 5) FROM src").collect()
    assertResult(
      Row(rowA.name, Timestamp.valueOf("2015-01-01 12:12:09")) ::
      Row(rowB.name, Timestamp.valueOf("2015-01-01 00:00:05")) ::
      Row(rowC.name, Timestamp.valueOf("2016-01-01 00:00:03")) ::
      Row(rowD.name, Timestamp.valueOf("2012-01-01 23:30:50")) :: Nil)(result1)

    val result2 = sqlContext.sql("SELECT name, ADD_SECONDS(t, -5) FROM src").collect()
    assertResult(
      Row(rowA.name, Timestamp.valueOf("2015-01-01 12:11:59")) ::
      Row(rowB.name, Timestamp.valueOf("2014-12-31 23:59:55")) ::
      Row(rowC.name, Timestamp.valueOf("2015-12-31 23:59:53")) ::
      Row(rowD.name, Timestamp.valueOf("2012-01-01 23:30:40")) :: Nil)(result2)

    // example from SAP HANA documentation at
    // http://help.sap.com/hana/SAP_HANA_SQL_and_System_Views_Reference_en.pdf
    val result3 = sqlContext.sql("SELECT name, ADD_SECONDS(t, 60*30) FROM src").collect()
    assertResult(
      Row(rowA.name, Timestamp.valueOf("2015-01-01 12:42:04")) ::
      Row(rowB.name, Timestamp.valueOf("2015-01-01 00:30:00")) ::
      Row(rowC.name, Timestamp.valueOf("2016-01-01 00:29:58")) ::
      Row(rowD.name, Timestamp.valueOf("2012-01-02 00:00:45")) :: Nil)(result3)
  }

}

Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0

5 votes

package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

}

Source File: SpecificPrimitivesSpec.scala From sbt-avrohugger with Apache License 2.0

5 votes

import test._
import org.specs2.mutable.Specification
import java.sql.{Date, Timestamp}
import java.util.UUID
class SpecificPrimitivesSpec extends Specification {

  "A case class with an `Int` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest00(1)
      val record2 = AvroTypeProviderTest00(2)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Float` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest01(1F)
      val record2 = AvroTypeProviderTest01(2F)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Long` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest02(1L)
      val record2 = AvroTypeProviderTest02(2L)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Double` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest03(1D)
      val record2 = AvroTypeProviderTest03(2D)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Boolean` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest04(true)
      val record2 = AvroTypeProviderTest04(false)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `String` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest05("hello world")
      val record2 = AvroTypeProviderTest05("hello galaxy")
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Null` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest06(null)
      val record2 = AvroTypeProviderTest06(null)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Array[Bytes]` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest69("hello world".getBytes)
      val record2 = AvroTypeProviderTest69("hello galaxy".getBytes)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with a `logicalType` fields from .avsc" should {
    "deserialize correctly" in {
      val t1 = System.currentTimeMillis()
      val t2 = System.currentTimeMillis()
      val record1 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t1), UUID.randomUUID())
      val record2 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t2), UUID.randomUUID())
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

}

Source File: ActionsHandler.scala From spark-http-stream with BSD 2-Clause "Simplified" License

5 votes

package org.apache.spark.sql.execution.streaming.http

import java.util.Properties
import scala.collection.mutable.ArrayBuffer
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.spark.internal.Logging
import org.apache.spark.sql.Row
import java.sql.Timestamp
import org.apache.spark.sql.types.StructType
import java.util.concurrent.atomic.AtomicInteger


	def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries;
	def destroy();
}

trait ActionsHandlerFactory {
	def createInstance(params: Params): ActionsHandler;
}

abstract class AbstractActionsHandler extends ActionsHandler {
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any = {
		val opt = requestBody.get(key);
		if (opt.isEmpty) {
			throw new MissingRequiredRequestParameterException(key);
		}

		opt.get;
	}

	override def destroy() = {
	}
}

class NullActionsHandler extends AbstractActionsHandler {
	override def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries = new ActionHandlerEntries() {
		def apply(action: String) = Map[String, Any]();
		//yes, do nothing
		def isDefinedAt(action: String) = false;
	};
}

//rich row with extra info: id, time stamp, ...
case class RowEx(originalRow: Row, batchId: Long, offsetInBatch: Long, timestamp: Timestamp) {
	def withTimestamp(): Row = Row.fromSeq(originalRow.toSeq :+ timestamp);
	def withId(): Row = Row.fromSeq(originalRow.toSeq :+ s"$batchId-$offsetInBatch");
	def extra: (Long, Long, Timestamp) = { (batchId, offsetInBatch, timestamp) };
}

trait SendStreamActionSupport {
	def onReceiveStream(topic: String, rows: Array[RowEx]);
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any;

	val listeners = ArrayBuffer[StreamListener]();

	def addListener(listener: StreamListener): this.type = {
		listeners += listener;
		this;
	}

	protected def notifyListeners(topic: String, data: Array[RowEx]) {
		listeners.foreach { _.onArrive(topic, data); }
	}

	def handleSendStream(requestBody: Map[String, Any]): Map[String, Any] = {
		val topic = getRequiredParam(requestBody, "topic").asInstanceOf[String];
		val batchId = getRequiredParam(requestBody, "batchId").asInstanceOf[Long];
		val rows = getRequiredParam(requestBody, "rows").asInstanceOf[Array[Row]];
		val ts = new Timestamp(System.currentTimeMillis());
		var index = -1;
		val rows2 = rows.map { row ⇒
			index += 1;
			RowEx(Row.fromSeq(row.toSeq), batchId, index, ts)
		}

		onReceiveStream(topic, rows2);
		notifyListeners(topic, rows2);
		Map("rowsCount" -> rows.size);
	}
}

Source File: SSHOrder.scala From Aton with GNU General Public License v3.0

5 votes

package model

import java.sql.Timestamp


case class SSHOrder(
                     id: Long,
                     sentDatetime: Timestamp,
                     superUser: Boolean,
                     interrupt: Boolean,
                     command: String,
                     webUser: String) {
  def this(sentDatetime: Timestamp, superUser: Boolean, interrupt: Boolean, command: String, username: String) =
    this(0, sentDatetime, superUser, interrupt, command, username)

  def this(sentDatetime: Timestamp, superUser: Boolean, command: String) = this(0, sentDatetime, superUser, false, command, "")

  def this(sentDatetime: Timestamp, command: String, webUser: String) = this(0, sentDatetime, false, false, command, webUser)
}

Source File: ConnectedUserTable.scala From Aton with GNU General Public License v3.0

5 votes

package model.table

import java.sql.Timestamp

import model.{ComputerState, ConnectedUser}
import slick.driver.H2Driver.api._
import slick.lifted.{ForeignKeyQuery, ProvenShape}


class ConnectedUserTable(tag: Tag) extends Table[ConnectedUser](tag, "CONNECTED_USER") {
  // Primary key
  def id: Rep[Int] = column[Int]("ID", O.PrimaryKey, O.AutoInc)

  // Date maps to java.sql.TimeStamp.
  // Ver: http://stackoverflow.com/questions/31351361/storing-date-and-time-into-mysql-using-slick-scala
  def computerStateRegisteredDate: Rep[Timestamp] = column[Timestamp]("COMPUTER_STATE_REGISTERED_DATE")

  // Other columns/attributes
  def computerStateComputerIp: Rep[String] = column[String]("COMPUTER_STATE_COMPUTER_IP")

  // Foreign key to Computer
  def computer: ForeignKeyQuery[ComputerStateTable, ComputerState] =
    foreignKey("CONNECTEC_USER_COMPUTER_STATE", (computerStateComputerIp, computerStateRegisteredDate),
      TableQuery[ComputerStateTable])(x => (x.computerIp, x.registeredDate), onUpdate = ForeignKeyAction.Restrict,
      onDelete = ForeignKeyAction.Cascade)

  // All tables need the * method with the type that it was created the table with.
  override def * : ProvenShape[ConnectedUser] =
    (id, username, computerStateComputerIp, computerStateRegisteredDate) <> (ConnectedUser.tupled, ConnectedUser.unapply)

  def username: Rep[String] = column[String]("USERNAME")
}

Source File: SuggestionController.scala From Aton with GNU General Public License v3.0

5 votes

package controllers

import java.sql.Timestamp
import java.util.Calendar

import com.google.inject.Inject
import model.form.SuggestionForm
import model.{Role, Suggestion}
import play.api.Environment
import play.api.i18n.MessagesApi
import services.{SuggestionService, UserService, state}
import views.html._

import scala.concurrent.{ExecutionContext, Future}


class SuggestionController @Inject()(suggestionService: SuggestionService, val messagesApi: MessagesApi)(implicit userService: UserService, executionContext: ExecutionContext, environment: Environment) extends ControllerWithNoAuthRequired {
  def home = AsyncStack { implicit request =>
    implicit val (username: Option[String], isAdmin: Boolean) = loggedIn match {
      case Some(user) => (Some(user.username), user.role == Role.Administrator)
      case _ => (None, false)
    }
    if (isAdmin) {
      suggestionService.listAll.map { suggestions =>
        Ok//(index(messagesApi("suggestion"), suggestionHome(SuggestionForm.form, suggestions)))
      }
    } else {
      Future.successful(Ok)//(index(messagesApi("suggestion"), suggestionHome(SuggestionForm.form, Seq.empty[Suggestion]))))
    }


  }

  def add = AsyncStack() { implicit request =>
    implicit val (username: Option[String], isAdmin: Boolean) = loggedIn match {
      case Some(user) => (Some(user.username), user.role == Role.Administrator)
      case _ => (None, false)
    }
    SuggestionForm.form.bindFromRequest().fold(
      errorForm => Future.successful(Ok(errorForm.toString)),
      data => {
        val text = data.suggestion
        val suggestion = Suggestion(0, text, now, username)
        suggestionService.add(suggestion).map {
          case state.ActionCompleted => Redirect(routes.SuggestionController.home())
          case _ => BadRequest
        }
      }
    )
  }

  private def now = new Timestamp(Calendar.getInstance().getTime.getTime)
}

Source File: RowReaderTest.scala From filo with Apache License 2.0

5 votes

package org.velvia.filo

import org.joda.time.DateTime
import java.sql.Timestamp
import org.scalatest.FunSpec
import org.scalatest.Matchers

class RowReaderTest extends FunSpec with Matchers {
  val schema = Seq(
                 VectorInfo("name", classOf[String]),
                 VectorInfo("age",  classOf[Int]),
                 VectorInfo("timestamp", classOf[Timestamp])
               )

  val rows = Seq(
               (Some("Matthew Perry"),     Some(18), Some(new Timestamp(10000L))),
               (Some("Michelle Pfeiffer"), None,     Some(new Timestamp(10010L))),
               (Some("George C"),          Some(59), None),
               (Some("Rich Sherman"),      Some(26), Some(new Timestamp(10000L)))
             )

  val csvRows = Seq(
    "Matthew Perry,18,1973-01-25T00Z",
    "Michelle Pfeiffer,,1970-07-08T00Z",
    "George C,59,",
    "Rich Sherman,26,1991-10-12T00Z"
  ).map(str => (str.split(',') :+ "").take(3))

  def readValues[T](r: FastFiloRowReader, len: Int)(f: FiloRowReader => T): Seq[T] = {
    (0 until len).map { i =>
      r.rowNo = i
      f(r)
    }
  }

  it("should extract from columns back to rows") {
    val columnData = RowToVectorBuilder.buildFromRows(rows.map(TupleRowReader).toIterator,
                                                      schema,
                                                      BuilderEncoder.SimpleEncoding)
    val chunks = Array(columnData("name"), columnData("age"), columnData("timestamp"))
    val types = schema.map(_.dataType)
    val reader = new FastFiloRowReader(chunks, types.toArray)

    readValues(reader, 4)(_.getString(0)) should equal (
      Seq("Matthew Perry", "Michelle Pfeiffer", "George C", "Rich Sherman"))

    reader.rowNo = 1
    reader.notNull(1) should equal (false)
    reader.as[Timestamp](2) should equal (new Timestamp(10010L))
  }

  it("should write to columns from ArrayStringRowReader and read back properly") {
    val columnData = RowToVectorBuilder.buildFromRows(csvRows.map(ArrayStringRowReader).toIterator,
                                                      schema,
                                                      BuilderEncoder.SimpleEncoding)
    val chunks = Array(columnData("name"), columnData("age"), columnData("timestamp"))
    val types = schema.map(_.dataType)
    val reader = new FastFiloRowReader(chunks, types.toArray)

    readValues(reader, 4)(_.getString(0)) should equal (
      Seq("Matthew Perry", "Michelle Pfeiffer", "George C", "Rich Sherman"))

    reader.rowNo = 1
    reader.notNull(1) should equal (false)
    reader.as[Timestamp](2) should equal (new Timestamp(DateTime.parse("1970-07-08T00Z").getMillis))
  }

  it("should read longs from timestamp strings from ArrayStringRowReader") {
    ArrayStringRowReader(csvRows.head).getLong(2) should equal (96768000000L)
  }

  import org.velvia.filo.{vectors => bv}

  it("should append to BinaryAppendableVector from Readers with RowReaderAppender") {
    val readers = rows.map(TupleRowReader)
    val appenders = Seq(
      new IntReaderAppender(bv.IntBinaryVector.appendingVector(10), 1),
      new LongReaderAppender(bv.LongBinaryVector.appendingVector(10), 2)
    )
    readers.foreach { r => appenders.foreach(_.append(r)) }
    val bufs = appenders.map(_.appender.optimize().toFiloBuffer).toArray
    val reader = new FastFiloRowReader(bufs, Array(classOf[Int], classOf[Long]))

    readValues(reader, 4)(_.getInt(0)) should equal (Seq(18, 0, 59, 26))
    reader.rowNo = 1
    reader.notNull(0) should equal (false)
}

  import RowReader._
  it("should compare RowReaders using TypedFieldExtractor") {
    val readers = rows.map(TupleRowReader)
    StringFieldExtractor.compare(readers(1), readers(2), 0) should be > (0)
    IntFieldExtractor.compare(readers(0), readers(2), 1) should be < (0)
    TimestampFieldExtractor.compare(readers(0), readers(3), 2) should equal (0)

    // Ok, we should be able to compare the reader with the NA / None too
    IntFieldExtractor.compare(readers(1), readers(2), 1) should be < (0)
  }
}

Source File: FastFiloRowReaderBenchmark.scala From filo with Apache License 2.0

5 votes

package org.velvia.filo

import java.sql.Timestamp
import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.BenchmarkMode
import org.openjdk.jmh.annotations.{Mode, State, Scope}
import org.openjdk.jmh.annotations.OutputTimeUnit
import scalaxy.loops._
import scala.language.postfixOps

import java.util.concurrent.TimeUnit


@State(Scope.Thread)
class FastFiloRowReaderBenchmark {
  import VectorReader._

  // Ok, create an IntColumn and benchmark it.
  val numValues = 10000

  val randomInts = (0 until numValues).map(i => util.Random.nextInt)
  val randomLongs = randomInts.map(_.toLong)
  val randomTs = randomLongs.map(l => new Timestamp(l))

  val chunks = Array(VectorBuilder(randomInts).toFiloBuffer,
                     VectorBuilder(randomLongs).toFiloBuffer,
                     VectorBuilder(randomTs).toFiloBuffer)
  val clazzes = Array[Class[_]](classOf[Int], classOf[Long], classOf[Timestamp])

  // According to @ktosopl, be sure to return some value if possible so that JVM won't
  // optimize out the method body.  However JMH is apparently very good at avoiding this.
  // fastest loop possible using FiloVectorApply method
  @Benchmark
  @BenchmarkMode(Array(Mode.AverageTime))
  @OutputTimeUnit(TimeUnit.MICROSECONDS)
  def createFastFiloRowReader(): RowReader = {
    new FastFiloRowReader(chunks, clazzes)
  }

  val fastReader = new FastFiloRowReader(chunks, clazzes)

  @Benchmark
  @BenchmarkMode(Array(Mode.Throughput))
  @OutputTimeUnit(TimeUnit.SECONDS)
  def fastFiloRowReaderReadOne(): Int = {
    fastReader.setRowNo(0)
    if (fastReader.notNull(0)) fastReader.getInt(0) + 1 else 0
  }
}

Source File: KustoResponseDeserializer.scala From azure-kusto-spark with Apache License 2.0

5 votes

package com.microsoft.kusto.spark.datasource

import java.sql.Timestamp
import java.util

import com.microsoft.azure.kusto.data.{KustoResultColumn, KustoResultSetTable, Results}
import com.microsoft.kusto.spark.utils.DataTypeMapping
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import org.apache.spark.sql.types.{StructType, _}
import org.joda.time.DateTime

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

object KustoResponseDeserializer {
  def apply(kustoResult: KustoResultSetTable): KustoResponseDeserializer = new KustoResponseDeserializer(kustoResult)
}

// Timespan columns are casted to strings in kusto side. A simple test to compare the translation to a Duration string
// in the format of timespan resulted in less performance. One way was using a new expression that extends UnaryExpression,
// second was by a udf function, both were less performant.
case class KustoSchema(sparkSchema: StructType, toStringCastedColumns: Set[String])

class KustoResponseDeserializer(val kustoResult: KustoResultSetTable) {
  val schema: KustoSchema = getSchemaFromKustoResult

  private def getValueTransformer(valueType: String): Any => Any = {

    valueType.toLowerCase() match {
      case "string" => value: Any => value
      case "int64" => value: Any => value
      case "datetime" => value: Any => new Timestamp(new DateTime(value).getMillis)
      case "timespan" => value: Any => value
      case "sbyte" => value: Any => value
      case "long" => value: Any => value match {
        case i: Int => i.toLong
        case _ => value.asInstanceOf[Long]
      }
      case "double" => value: Any => value
      case "decimal" => value: Any => BigDecimal(value.asInstanceOf[String])
      case "int" => value: Any => value
      case "int32" => value: Any => value
      case "bool" => value: Any => value
      case "real" => value: Any => value
      case _ => value: Any => value.toString
      }
  }

   private def getSchemaFromKustoResult: KustoSchema = {
    if (kustoResult.getColumns.isEmpty) {
      KustoSchema(StructType(List()), Set())
    } else {
      val columns = kustoResult.getColumns

      KustoSchema(StructType(columns.map(col => StructField(col.getColumnName,
            DataTypeMapping.kustoTypeToSparkTypeMap.getOrElse(col.getColumnType.toLowerCase, StringType)))),
        columns.filter(c => c.getColumnType.equalsIgnoreCase("TimeSpan")).map(c => c.getColumnName).toSet)
    }
  }

  def getSchema: KustoSchema = { schema }

  def toRows: java.util.List[Row] = {
    val columnInOrder = kustoResult.getColumns
    val value: util.ArrayList[Row] = new util.ArrayList[Row](kustoResult.count())

//     Calculate the transformer function for each column to use later by order
    val valueTransformers: mutable.Seq[Any => Any] = columnInOrder.map(col => getValueTransformer(col.getColumnType))
    kustoResult.getData.asScala.foreach(row => {
      val genericRow = row.toArray().zipWithIndex.map(
        column => {
          if (column._1 == null) null else valueTransformers(column._2)(column._1)
        })
      value.add(new GenericRowWithSchema(genericRow, schema.sparkSchema))
    })

    value
  }

//  private def getOrderedColumnName = {
//    val columnInOrder = ArrayBuffer.fill(kustoResult.getColumnNameToIndex.size()){ "" }
//    kustoResult.getColumns.foreach((columnIndexPair: KustoResultColumn) => columnInOrder(columnIndexPair.) = columnIndexPair._1)
//    columnInOrder
//  }
}

Source File: FileOutputIT.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta

import java.sql.Timestamp
import java.util.UUID

import com.github.nscala_time.time.Imports._
import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest._

import scala.reflect.io.File


class FileOutputIT extends FlatSpec with ShouldMatchers with BeforeAndAfterAll {
  self: FlatSpec =>

  @transient var sc: SparkContext = _

  override def beforeAll {
    Logger.getRootLogger.setLevel(Level.ERROR)
    sc = FileOutputIT.getNewLocalSparkContext(1, "test")
  }

  override def afterAll {
    sc.stop()
    System.clearProperty("spark.driver.port")
  }

  trait CommonValues {

    val sqlContext = SQLContext.getOrCreate(sc)

    import sqlContext.implicits._

    val time = new Timestamp(DateTime.now.getMillis)

    val data =
      sc.parallelize(Seq(Person("Kevin", 18, time), Person("Kira", 21, time), Person("Ariadne", 26, time))).toDF

    val tmpPath: String = s"/tmp/sparta-test/${UUID.randomUUID().toString}"
  }

  trait WithEventData extends CommonValues {
    val properties = Map("path" -> tmpPath, "createDifferentFiles" -> "false")
    val output = new FileOutput("file-test", properties)
  }

  "FileOutputIT" should "save a dataframe" in new WithEventData {
    output.save(data, SaveModeEnum.Append, Map(Output.TimeDimensionKey -> "minute", Output.TableNameKey -> "person"))

    val source = new java.io.File(tmpPath).listFiles()
    val read = sqlContext.read.json(tmpPath).toDF
    read.count shouldBe(3)
    File("/tmp/sparta-test").deleteRecursively
  }
}

object FileOutputIT {

  def getNewLocalSparkContext(numExecutors: Int = 1, title: String): SparkContext = {
    val conf = new SparkConf().setMaster(s"local[$numExecutors]").setAppName(title)
    SparkContext.getOrCreate(conf)
  }
}

case class Person(name: String, age: Int, minute: Timestamp) extends Serializable

Source File: CubeWriterHelper.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.driver.writer

import java.sql.{Date, Timestamp}

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.driver.factory.SparkContextFactory
import com.stratio.sparta.driver.step.Cube
import com.stratio.sparta.sdk.pipeline.aggregation.cube.{DimensionValue, DimensionValuesTime, MeasuresValues}
import com.stratio.sparta.sdk.pipeline.output.Output
import com.stratio.sparta.sdk.pipeline.schema.TypeOp
import org.apache.spark.sql._
import org.apache.spark.streaming.dstream.DStream

object CubeWriterHelper extends SLF4JLogging {

  def writeCube(cube: Cube, outputs: Seq[Output], stream: DStream[(DimensionValuesTime, MeasuresValues)]): Unit = {
    stream.map { case (dimensionValuesTime, measuresValues) =>
      toRow(cube, dimensionValuesTime, measuresValues)
    }.foreachRDD(rdd => {
      if (!rdd.isEmpty()) {
        val sparkSession = SparkContextFactory.sparkSessionInstance
        val cubeDf = sparkSession.createDataFrame(rdd, cube.schema)
        val extraOptions = Map(Output.TableNameKey -> cube.name)
        val cubeAutoCalculatedFieldsDf = WriterHelper.write(cubeDf, cube.writerOptions, extraOptions, outputs)

        TriggerWriterHelper.writeTriggers(cubeAutoCalculatedFieldsDf, cube.triggers, cube.name, outputs)
      } else log.debug("Empty event received")
    })
  }

  private[driver] def toRow(cube: Cube, dimensionValuesT: DimensionValuesTime, measures: MeasuresValues): Row = {
    val measuresSorted = measuresValuesSorted(measures.values)
    val rowValues = dimensionValuesT.timeConfig match {
      case None =>
        val dimensionValues = dimensionsValuesSorted(dimensionValuesT.dimensionValues)

        dimensionValues ++ measuresSorted
      case Some(timeConfig) =>
        val timeValue = Seq(timeFromDateType(timeConfig.eventTime, cube.dateType))
        val dimFilteredByTime = filterDimensionsByTime(dimensionValuesT.dimensionValues, timeConfig.timeDimension)
        val dimensionValues = dimensionsValuesSorted(dimFilteredByTime) ++ timeValue
        val measuresValuesWithTime = measuresSorted

        dimensionValues ++ measuresValuesWithTime
    }

    Row.fromSeq(rowValues)
  }

  private[driver] def dimensionsValuesSorted(dimensionValues: Seq[DimensionValue]): Seq[Any] =
    dimensionValues.sorted.map(dimVal => dimVal.value)

  private[driver] def measuresValuesSorted(measures: Map[String, Option[Any]]): Seq[Any] =
    measures.toSeq.sortWith(_._1 < _._1).map(measure => measure._2.getOrElse(null))

  private[driver] def filterDimensionsByTime(dimensionValues: Seq[DimensionValue],
                                             timeDimension: String): Seq[DimensionValue] =
    dimensionValues.filter(dimensionValue => dimensionValue.dimension.name != timeDimension)

  private[driver] def timeFromDateType(time: Long, dateType: TypeOp.Value): Any = {
    dateType match {
      case TypeOp.Date | TypeOp.DateTime => new Date(time)
      case TypeOp.Long => time
      case TypeOp.Timestamp => new Timestamp(time)
      case _ => time.toString
    }
  }
}

Source File: CubeMakerTest.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.driver.test.cube

import java.sql.Timestamp

import com.github.nscala_time.time.Imports._
import com.stratio.sparta.driver.step.{Cube, CubeOperations, Trigger}
import com.stratio.sparta.driver.writer.WriterOptions
import com.stratio.sparta.plugin.default.DefaultField
import com.stratio.sparta.plugin.cube.field.datetime.DateTimeField
import com.stratio.sparta.plugin.cube.operator.count.CountOperator
import com.stratio.sparta.sdk.pipeline.aggregation.cube.{Dimension, DimensionValue, DimensionValuesTime, InputFields}
import com.stratio.sparta.sdk.pipeline.schema.TypeOp
import com.stratio.sparta.sdk.utils.AggregationTime
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType, TimestampType}
import org.apache.spark.streaming.TestSuiteBase
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class CubeMakerTest extends TestSuiteBase {

  val PreserverOrder = false

  
  def getEventOutput(timestamp: Timestamp, millis: Long):
  Seq[Seq[(DimensionValuesTime, InputFields)]] = {
    val dimensionString = Dimension("dim1", "eventKey", "identity", new DefaultField)
    val dimensionTime = Dimension("minute", "minute", "minute", new DateTimeField)
    val dimensionValueString1 = DimensionValue(dimensionString, "value1")
    val dimensionValueString2 = dimensionValueString1.copy(value = "value2")
    val dimensionValueString3 = dimensionValueString1.copy(value = "value3")
    val dimensionValueTs = DimensionValue(dimensionTime, timestamp)
    val tsMap = Row(timestamp)
    val valuesMap1 = InputFields(Row("value1", timestamp), 1)
    val valuesMap2 = InputFields(Row("value2", timestamp), 1)
    val valuesMap3 = InputFields(Row("value3", timestamp), 1)

    Seq(Seq(
      (DimensionValuesTime("cubeName", Seq(dimensionValueString1, dimensionValueTs)), valuesMap1),
      (DimensionValuesTime("cubeName", Seq(dimensionValueString2, dimensionValueTs)), valuesMap2),
      (DimensionValuesTime("cubeName", Seq(dimensionValueString3, dimensionValueTs)), valuesMap3)
    ))
  }
}

Source File: AvroOutputIT.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.plugin.output.avro

import java.sql.Timestamp
import java.time.Instant

import com.databricks.spark.avro._
import com.stratio.sparta.plugin.TemporalSparkContext
import com.stratio.sparta.sdk.pipeline.output.{Output, SaveModeEnum}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SparkSession}
import org.junit.runner.RunWith
import org.scalatest._
import org.scalatest.junit.JUnitRunner

import scala.reflect.io.File
import scala.util.Random


@RunWith(classOf[JUnitRunner])
class AvroOutputIT extends TemporalSparkContext with Matchers {

  trait CommonValues {
    val tmpPath: String = File.makeTemp().name
    val sparkSession = SparkSession.builder().config(sc.getConf).getOrCreate()
    val schema = StructType(Seq(
      StructField("name", StringType),
      StructField("age", IntegerType),
      StructField("minute", LongType)
    ))

    val data =
      sparkSession.createDataFrame(sc.parallelize(Seq(
        Row("Kevin", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Kira", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Ariadne", Random.nextInt, Timestamp.from(Instant.now).getTime)
      )), schema)
  }

  trait WithEventData extends CommonValues {
    val properties = Map("path" -> tmpPath)
    val output = new AvroOutput("avro-test", properties)
  }


  "AvroOutput" should "throw an exception when path is not present" in {
    an[Exception] should be thrownBy new AvroOutput("avro-test", Map.empty)
  }

  it should "throw an exception when empty path " in {
    an[Exception] should be thrownBy new AvroOutput("avro-test", Map("path" -> "    "))
  }

  it should "save a dataframe " in new WithEventData {
    output.save(data, SaveModeEnum.Append, Map(Output.TableNameKey -> "person"))
    val read = sparkSession.read.avro(s"$tmpPath/person")
    read.count should be(3)
    read should be eq data
    File(tmpPath).deleteRecursively
    File("spark-warehouse").deleteRecursively
  }

}

Source File: CsvOutputIT.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.plugin.output.csv

import java.sql.Timestamp
import java.time.Instant

import com.databricks.spark.avro._
import com.stratio.sparta.plugin.TemporalSparkContext
import com.stratio.sparta.sdk.pipeline.output.{Output, SaveModeEnum}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SparkSession}
import org.junit.runner.RunWith
import org.scalatest._
import org.scalatest.junit.JUnitRunner

import scala.reflect.io.File
import scala.util.Random


@RunWith(classOf[JUnitRunner])
class CsvOutputIT extends TemporalSparkContext with Matchers {

  trait CommonValues {
    val tmpPath: String = File.makeTemp().name
    val sparkSession = SparkSession.builder().config(sc.getConf).getOrCreate()
    val schema = StructType(Seq(
      StructField("name", StringType),
      StructField("age", IntegerType),
      StructField("minute", LongType)
    ))

    val data =
      sparkSession.createDataFrame(sc.parallelize(Seq(
        Row("Kevin", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Kira", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Ariadne", Random.nextInt, Timestamp.from(Instant.now).getTime)
      )), schema)
  }

  trait WithEventData extends CommonValues {
    val properties = Map("path" -> tmpPath)
    val output = new CsvOutput("csv-test", properties)
  }


  "CsvOutput" should "throw an exception when path is not present" in {
    an[Exception] should be thrownBy new CsvOutput("csv-test", Map.empty)
  }

  it should "throw an exception when empty path " in {
    an[Exception] should be thrownBy new CsvOutput("csv-test", Map("path" -> "    "))
  }

  it should "save a dataframe " in new WithEventData {
    output.save(data, SaveModeEnum.Append, Map(Output.TableNameKey -> "person"))
    val read = sparkSession.read.csv(s"$tmpPath/person.csv")
    read.count should be(3)
    read should be eq data
    File(tmpPath).deleteRecursively
    File("spark-warehouse").deleteRecursively
  }

}

Source File: TestJodaTimeVersionedEntityRepository.scala From slick-repo with MIT License

5 votes

package com.byteslounge.slickrepo.repository

import java.sql.Timestamp

import com.byteslounge.slickrepo.meta.{Versioned, VersionedEntity}
import org.joda.time.Instant
import slick.ast.BaseTypedType
import com.byteslounge.slickrepo.scalaversion.JdbcProfile
import com.byteslounge.slickrepo.version.JodaTimeVersionImplicits.instantVersionGenerator

case class TestJodaTimeVersionedEntity(override val id: Option[Int], price: Double, override val version: Option[Instant]) extends VersionedEntity[TestJodaTimeVersionedEntity, Int, Instant] {
  def withId(id: Int): TestJodaTimeVersionedEntity = this.copy(id = Some(id))
  def withVersion(version: Instant): TestJodaTimeVersionedEntity = this.copy(version = Some(version))
}

class TestJodaTimeVersionedEntityRepository(override val driver: JdbcProfile) extends VersionedRepository[TestJodaTimeVersionedEntity, Int, Instant](driver) {

  import driver.api._

  implicit val jodaTimeInstantToSqlTimestampMapper = MappedColumnType.base[Instant, Timestamp](
    { instant => new java.sql.Timestamp(instant.getMillis) },
    { sqlTimestamp => new Instant(sqlTimestamp.getTime) })

  val pkType = implicitly[BaseTypedType[Int]]
  val versionType = implicitly[BaseTypedType[Instant]]
  val tableQuery = TableQuery[TestJodaTimeVersionedEntities]
  type TableType = TestJodaTimeVersionedEntities

  class TestJodaTimeVersionedEntities(tag: slick.lifted.Tag) extends Table[TestJodaTimeVersionedEntity](tag, "TJTV_ENTITY") with Versioned[Int, Instant] {
    def id = column[Int]("ID", O.PrimaryKey)
    def price = column[Double]("PRICE")
    def version = column[Instant]("VERSION")

    def * = (id.?, price, version.?) <> ((TestJodaTimeVersionedEntity.apply _).tupled, TestJodaTimeVersionedEntity.unapply)
  }

}

Source File: PredicatePushdownSuite.scala From spark-exasol-connector with Apache License 2.0

5 votes

package com.exasol.spark

import java.sql.Timestamp

import org.apache.spark.sql.functions.col

import com.holdenkarau.spark.testing.DataFrameSuiteBase
import org.scalatest.funsuite.AnyFunSuite


class PredicatePushdownSuite extends AnyFunSuite with BaseDockerSuite with DataFrameSuiteBase {

  test("with where clause build from filters: filter") {
    createDummyTable()

    import spark.implicits._

    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()
      .filter($"id" < 3)
      .filter(col("city").like("Ber%"))
      .select("id", "city")

    val result = df.collect().map(x => (x.getLong(0), x.getString(1))).toSet
    assert(result.size === 1)
    assert(result === Set((1, "Berlin")))
  }

  test("with where clause build from filters: createTempView and spark.sql") {
    createDummyTable()

    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()

    df.createOrReplaceTempView("myTable")

    val myDF = spark
      .sql("SELECT id, city FROM myTable WHERE id BETWEEN 1 AND 3 AND name < 'Japan'")

    val result = myDF.collect().map(x => (x.getLong(0), x.getString(1))).toSet
    assert(result.size === 2)
    assert(result === Set((1, "Berlin"), (2, "Paris")))
  }

  test("date and timestamp should be read and filtered correctly") {
    import java.sql.Date

    createDummyTable()
    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT date_info, updated_at FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()
    val minTimestamp = Timestamp.valueOf("2017-12-30 00:00:00.0000")
    val testDate = Date.valueOf("2017-12-31")

    val resultDate = df.collect().map(_.getDate(0))
    assert(resultDate.contains(testDate))

    val resultTimestamp = df.collect().map(_.getTimestamp(1)).map(x => x.after(minTimestamp))
    assert(!resultTimestamp.contains(false))

    val filteredByDateDF = df.filter(col("date_info") === testDate)
    assert(filteredByDateDF.count() === 1)

    val filteredByTimestampDF = df.filter(col("updated_at") < minTimestamp)
    assert(filteredByTimestampDF.count() === 0)
  }

  test("count should be performed successfully") {
    createDummyTable()
    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()
    val result = df.count()
    assert(result === 3)
  }
}

Source File: StructuredNetworkWordCountWindowed.scala From sparkoscope with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println

Source File: XGBoostBigModelTimeSeries.scala From uberdata with Apache License 2.0

5 votes

package org.apache.spark.ml

import java.sql.Timestamp

import eleflow.uberdata.IUberdataForecastUtil
import eleflow.uberdata.core.data.DataTransformer
import eleflow.uberdata.enums.SupportedAlgorithm
import ml.dmlc.xgboost4j.scala.spark.XGBoostModel
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.linalg.{VectorUDT, Vector => SparkVector}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared.HasTimeCol
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.{StructField, _}


class XGBoostBigModelTimeSeries[I](override val uid: String,
                                   override val models: Seq[(ParamMap, XGBoostModel)])
                                  extends XGBoostBigModel[I](uid, models) with HasTimeCol{

  def setTimecol(time: String): this.type = set(timeCol, Some(time))

  override def transform(dataSet: Dataset[_]): DataFrame = {
    val prediction = predict(dataSet)
    val rows = dataSet.rdd
      .map {
        case (row: Row) =>
          (DataTransformer.toFloat(row.getAs($(idCol))),
            (row.getAs[SparkVector](IUberdataForecastUtil.FEATURES_COL_NAME),
              row.getAs[java.sql.Timestamp]($(timeCol).get)))
      }
      .join(prediction)
      .map {
        case (id, ((features, time), predictValue)) =>
          Row(id, features, time, SupportedAlgorithm.XGBoostAlgorithm.toString, predictValue)
      }
    dataSet.sqlContext.createDataFrame(rows, transformSchema(dataSet.schema))
  }


  @DeveloperApi
  override def transformSchema(schema: StructType): StructType =
    StructType(Array(
      StructField($(idCol), FloatType),
      StructField(IUberdataForecastUtil.FEATURES_COL_NAME, new VectorUDT),
      StructField($(timeCol).get, TimestampType),
      StructField(IUberdataForecastUtil.ALGORITHM, StringType),
      StructField("prediction", FloatType)
    ) )
}

Source File: MergeProjection.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.command.mutation.merge

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.{CarbonDatasourceHadoopRelation, Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, GenericRowWithSchema, InterpretedMutableProjection, Projection}
import org.apache.spark.sql.catalyst.util.DateTimeUtils


case class MergeProjection(
    @transient tableCols: Seq[String],
    @transient statusCol : String,
    @transient ds: Dataset[Row],
    @transient rltn: CarbonDatasourceHadoopRelation,
    @transient sparkSession: SparkSession,
    @transient mergeAction: MergeAction) {

  private val cutOffDate = Integer.MAX_VALUE >> 1

  val isUpdate = mergeAction.isInstanceOf[UpdateAction]
  val isDelete = mergeAction.isInstanceOf[DeleteAction]

  def apply(row: GenericRowWithSchema): InternalRow = {
    // TODO we can avoid these multiple conversions if this is added as a SparkPlan node.
    val values = row.values.map {
      case s: String => org.apache.spark.unsafe.types.UTF8String.fromString(s)
      case d: java.math.BigDecimal => org.apache.spark.sql.types.Decimal.apply(d)
      case b: Array[Byte] => org.apache.spark.unsafe.types.UTF8String.fromBytes(b)
      case d: Date => DateTimeUtils.fromJavaDate(d)
      case t: Timestamp => DateTimeUtils.fromJavaTimestamp(t)
      case value => value
    }

    projection(new GenericInternalRow(values)).asInstanceOf[GenericInternalRow]
  }

  val (projection, output) = generateProjection

  private def generateProjection: (Projection, Array[Expression]) = {
    val existingDsOutput = rltn.carbonRelation.schema.toAttributes
    val colsMap = mergeAction match {
      case UpdateAction(updateMap) => updateMap
      case InsertAction(insertMap) => insertMap
      case _ => null
    }
    if (colsMap != null) {
      val output = new Array[Expression](tableCols.length)
      val expecOutput = new Array[Expression](tableCols.length)
      colsMap.foreach { case (k, v) =>
        val tableIndex = tableCols.indexOf(k.toString().toLowerCase)
        if (tableIndex < 0) {
          throw new CarbonMergeDataSetException(s"Mapping is wrong $colsMap")
        }
        output(tableIndex) = v.expr.transform {
          case a: Attribute if !a.resolved =>
            ds.queryExecution.analyzed.resolveQuoted(a.name,
              sparkSession.sessionState.analyzer.resolver).get
        }
        expecOutput(tableIndex) =
          existingDsOutput.find(_.name.equalsIgnoreCase(tableCols(tableIndex))).get
      }
      if (output.contains(null)) {
        throw new CarbonMergeDataSetException(s"Not all columns are mapped")
      }
      (new InterpretedMutableProjection(output++Seq(
        ds.queryExecution.analyzed.resolveQuoted(statusCol,
        sparkSession.sessionState.analyzer.resolver).get),
        ds.queryExecution.analyzed.output), expecOutput)
    } else {
      (null, null)
    }
  }
}

Source File: TimestampDataTypeNullDataTest.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.testsuite.directdictionary

import java.io.File
import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.TimeStampGranularityConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest


class TimestampDataTypeNullDataTest extends QueryTest with BeforeAndAfterAll {
  var hiveContext: HiveContext = _

  override def beforeAll {
    try {
      CarbonProperties.getInstance()
        .addProperty(TimeStampGranularityConstants.CARBON_CUTOFF_TIMESTAMP, "2000-12-13 02:10.00.0")
      CarbonProperties.getInstance()
        .addProperty(TimeStampGranularityConstants.CARBON_TIME_GRANULARITY,
          TimeStampGranularityConstants.TIME_GRAN_SEC.toString
        )
      sql(
        """CREATE TABLE IF NOT EXISTS timestampTyeNullData
                     (ID Int, dateField Timestamp, country String,
                     name String, phonetype String, serialname String, salary Int)
                    STORED AS carbondata"""
      )

      CarbonProperties.getInstance()
        .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
      val csvFilePath = s"$resourcesPath/datasamplenull.csv"
      sql("LOAD DATA LOCAL INPATH '" + csvFilePath + "' INTO TABLE timestampTyeNullData").collect();

    } catch {
      case x: Throwable => CarbonProperties.getInstance()
        .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
          CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
    }
  }

  test("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null") {
    checkAnswer(
      sql("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null"),
      Seq(Row(Timestamp.valueOf("2015-07-23 00:00:00.0"))
      )
    )
  }
  test("SELECT * FROM timestampTyeNullData where dateField is null") {
    checkAnswer(
      sql("SELECT dateField FROM timestampTyeNullData where dateField is null"),
      Seq(Row(null)
      ))
  }

  override def afterAll {
    sql("drop table timestampTyeNullData")
    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
        CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
    CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false")
  }

}

Source File: TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.testsuite.directdictionary

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.TimeStampGranularityConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest


class TimestampDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with BeforeAndAfterAll {
  var hiveContext: HiveContext = _

  override def beforeAll {
    CarbonProperties.getInstance()
      .addProperty(TimeStampGranularityConstants.CARBON_CUTOFF_TIMESTAMP, "2000-12-13 02:10.00.0")
    CarbonProperties.getInstance()
      .addProperty(TimeStampGranularityConstants.CARBON_TIME_GRANULARITY,
        TimeStampGranularityConstants.TIME_GRAN_SEC.toString
      )
    CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true")
    sql(
      """
         CREATE TABLE IF NOT EXISTS directDictionaryTable
        (empno String, doj Timestamp, salary Int)
         STORED AS carbondata"""
    )
    val csvFilePath = s"$resourcesPath/datasample.csv"
    sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS"
        + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')")
  }

  test("select doj from directDictionaryTable") {
    checkAnswer(
      sql("select doj from directDictionaryTable"),
      Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09.0")),
        Row(Timestamp.valueOf("2016-04-14 15:00:09.0")),
        Row(null)
      )
    )
  }


  test("select doj from directDictionaryTable with equals filter") {
    checkAnswer(
      sql("select doj from directDictionaryTable where doj='2016-03-14 15:00:09'"),
      Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09")))
    )

  }

  test("select doj from directDictionaryTable with greater than filter") {
    checkAnswer(
      sql("select doj from directDictionaryTable where doj>'2016-03-14 15:00:09'"),
      Seq(Row(Timestamp.valueOf("2016-04-14 15:00:09")))
    )

  }


  override def afterAll {
    sql("drop table directDictionaryTable")
    CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false")
  }
}

Source File: TimestampNoDictionaryColumnTestCase.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.testsuite.directdictionary

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest


class TimestampNoDictionaryColumnTestCase extends QueryTest with BeforeAndAfterAll {

  override def beforeAll {
    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")

    sql("drop table if exists timestamp_nodictionary")
    sql(
      """
         CREATE TABLE IF NOT EXISTS timestamp_nodictionary
        (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
        workgroupcategoryname String,
         projectcode int, projectjoindate Timestamp, projectenddate Timestamp, attendance int,
         utilization int, salary Int) STORED AS carbondata"""
    )

    val csvFilePath = s"$resourcesPath/data_beyond68yrs.csv"
    sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE timestamp_nodictionary OPTIONS"
        + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')")
  }

  test("select projectjoindate, projectenddate from timestamp_nodictionary") {
    checkAnswer(
      sql("select projectjoindate, projectenddate from timestamp_nodictionary"),
      Seq(Row(Timestamp.valueOf("2000-01-29 00:00:00.0"), Timestamp.valueOf("2016-06-29 00:00:00.0")),
        Row(Timestamp.valueOf("1800-02-17 00:00:00.0"), Timestamp.valueOf("1900-11-29 00:00:00.0")),
        Row(null, Timestamp.valueOf("2016-05-29 00:00:00.0")),
        Row(null, Timestamp.valueOf("2016-11-30 00:00:00.0")),
        Row(Timestamp.valueOf("3000-10-22 00:00:00.0"), Timestamp.valueOf("3002-11-15 00:00:00.0")),
        Row(Timestamp.valueOf("1802-06-29 00:00:00.0"), Timestamp.valueOf("1902-12-30 00:00:00.0")),
        Row(null, Timestamp.valueOf("2016-12-30 00:00:00.0")),
        Row(Timestamp.valueOf("2038-11-14 00:00:00.0"), Timestamp.valueOf("2041-12-29 00:00:00.0")),
        Row(null, null),
        Row(Timestamp.valueOf("2014-09-15 00:00:00.0"), Timestamp.valueOf("2016-05-29 00:00:00.0"))
      )
    )
  }


  test("select projectjoindate, projectenddate from timestamp_nodictionary where in filter") {
    checkAnswer(
      sql("select projectjoindate, projectenddate from timestamp_nodictionary where projectjoindate in" +
          "('1800-02-17 00:00:00','3000-10-22 00:00:00') or projectenddate in ('1900-11-29 00:00:00'," +
          "'3002-11-15 00:00:00','2041-12-29 00:00:00')"),
      Seq(Row(Timestamp.valueOf("1800-02-17 00:00:00.0"), Timestamp.valueOf("1900-11-29 00:00:00.0")),
        Row(Timestamp.valueOf("3000-10-22 00:00:00.0"), Timestamp.valueOf("3002-11-15 00:00:00.0")),
        Row(Timestamp.valueOf("2038-11-14 00:00:00.0"), Timestamp.valueOf("2041-12-29 00:00:00.0")))
    )

  }


  override def afterAll {
    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
        CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
    sql("drop table timestamp_nodictionary")
  }
}

Source File: Commons.scala From spark-structured-streaming with MIT License

5 votes

package com.kafkaToSparkToCass


import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}

object Commons {

  case class UserEvent(user_id: String, time: Timestamp, event: String)
      extends Serializable

  def getTimeStamp(timeStr: String): Timestamp = {
    val dateFormat1: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val dateFormat2: DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")

    val date: Option[Timestamp] = {
      try {
        Some(new Timestamp(dateFormat1.parse(timeStr).getTime))
      } catch {
        case e: java.text.ParseException =>
          Some(new Timestamp(dateFormat2.parse(timeStr).getTime))
      }
    }
    date.getOrElse(Timestamp.valueOf(timeStr))
  }

}

Source File: Statements.scala From spark-structured-streaming with MIT License

5 votes

package com.kafkaToSparkToCass

import java.sql.Timestamp

import com.datastax.driver.core.Session

object Statements extends Serializable {

  def cql(id: String, time: Timestamp, ename: String): String = s"""
       insert into my_keyspace.test_table (user_id,time,event)
       values('$id', '$time', '$ename event')"""

  def createKeySpaceAndTable(session: Session, dropTable: Boolean = false) = {
    session.execute(
      """CREATE KEYSPACE  if not exists  my_keyspace WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };""")
    if (dropTable)
      session.execute("""drop table if exists my_keyspace.test_table""")

    session.execute(
      """create table if not exists my_keyspace.test_table ( user_id  text, time timestamp, event text, primary key((user_id), time) ) WITH CLUSTERING ORDER BY (time DESC)""")
  }
}

Source File: database.scala From franklin with Apache License 2.0

5 votes

package com.azavea.franklin

import cats.implicits._
import com.azavea.stac4s.TemporalExtent
import doobie.implicits.javasql._
import doobie.util.meta.Meta
import doobie.util.{Read, Write}
import io.circe.{Decoder, Encoder}

import java.sql.Timestamp
import java.time.Instant

package object database extends CirceJsonbMeta with GeotrellisWktMeta with Filterables {

  implicit val instantMeta: Meta[Instant]   = Meta[Timestamp].imap(_.toInstant)(Timestamp.from)
  implicit val instantRead: Read[Instant]   = Read[Timestamp].imap(_.toInstant)(Timestamp.from)
  implicit val instantWrite: Write[Instant] = Write[Timestamp].imap(_.toInstant)(Timestamp.from)

  def stringToInstant: String => Either[Throwable, Instant] =
    (s: String) => Either.catchNonFatal(Instant.parse(s))

  def temporalExtentToString(te: TemporalExtent): String = {
    te.value match {
      case Some(start) :: Some(end) :: _ if start != end => s"${start.toString}/${end.toString}"
      case Some(start) :: Some(end) :: _ if start == end => s"${start.toString}"
      case Some(start) :: None :: _                      => s"${start.toString}/.."
      case None :: Some(end) :: _                        => s"../${end.toString}"
    }
  }

  def temporalExtentFromString(str: String): Either[String, TemporalExtent] = {
    str.split("/").toList match {
      case ".." :: endString :: _ =>
        val parsedEnd: Either[Throwable, Instant] = stringToInstant(endString)
        parsedEnd match {
          case Left(_)             => Left(s"Could not decode instant: $str")
          case Right(end: Instant) => Right(TemporalExtent(None, end))
        }
      case startString :: ".." :: _ =>
        val parsedStart: Either[Throwable, Instant] = stringToInstant(startString)
        parsedStart match {
          case Left(_)               => Left(s"Could not decode instant: $str")
          case Right(start: Instant) => Right(TemporalExtent(start, None))
        }
      case startString :: endString :: _ =>
        val parsedStart: Either[Throwable, Instant] = stringToInstant(startString)
        val parsedEnd: Either[Throwable, Instant]   = stringToInstant(endString)
        (parsedStart, parsedEnd).tupled match {
          case Left(_)                               => Left(s"Could not decode instant: $str")
          case Right((start: Instant, end: Instant)) => Right(TemporalExtent(start, end))
        }
      case _ =>
        Either.catchNonFatal(Instant.parse(str)) match {
          case Left(_)           => Left(s"Could not decode instant: $str")
          case Right(t: Instant) => Right(TemporalExtent(t, t))
        }
    }
  }

  implicit val encoderTemporalExtent: Encoder[TemporalExtent] =
    Encoder.encodeString.contramap[TemporalExtent] { extent => temporalExtentToString(extent) }

  implicit val decoderTemporalExtent: Decoder[TemporalExtent] = Decoder.decodeString.emap { str =>
    temporalExtentFromString(str)
  }
}

Source File: MQTTStreamWordCount.scala From bahir with Apache License 2.0

5 votes

package org.apache.bahir.examples.sql.streaming.mqtt

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession


object MQTTStreamWordCount  {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.err.println("Usage: MQTTStreamWordCount <brokerUrl> <topic>") // scalastyle:off println
      System.exit(1)
    }

    val brokerUrl = args(0)
    val topic = args(1)

    val spark = SparkSession
      .builder
      .appName("MQTTStreamWordCount")
      .master("local[4]")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to mqtt server
    val lines = spark.readStream
      .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
      .option("topic", topic).option("persistence", "memory")
      .load(brokerUrl).selectExpr("CAST(payload AS STRING)").as[String]

    // Split the lines into words
    val words = lines.flatMap(_.split(" "))

    // Generate running word count
    val wordCounts = words.groupBy("value").count()

    // Start running the query that prints the running counts to the console
    val query = wordCounts.writeStream
      .outputMode("complete")
      .format("console")
      .start()

    query.awaitTermination()
  }
}

Source File: AkkaStreamWordCount.scala From bahir with Apache License 2.0

5 votes

package org.apache.bahir.examples.sql.streaming.akka

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession


object AkkaStreamWordCount {
  def main(args: Array[String]): Unit = {
    if (args.length < 1) {
      System.err.println("Usage: AkkaStreamWordCount <urlOfPublisher>") // scalastyle:off println
      System.exit(1)
    }

    val urlOfPublisher = args(0)

    val spark = SparkSession
                .builder()
                .appName("AkkaStreamWordCount")
                .master("local[4]")
                .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection
    // to publisher or feeder actor
    val lines = spark.readStream
                .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
                .option("urlOfPublisher", urlOfPublisher)
                .load().as[(String, Timestamp)]

    // Split the lines into words
    val words = lines.map(_._1).flatMap(_.split(" "))

    // Generate running word count
    val wordCounts = words.groupBy("value").count()

    // Start running the query that prints the running counts to the console
    val query = wordCounts.writeStream
                .outputMode("complete")
                .format("console")
                .start()

    query.awaitTermination()
  }
}

Source File: NetezzaFilters.scala From spark-netezza with Apache License 2.0

5 votes

package com.ibm.spark.netezza

import java.sql.{Date, Timestamp}

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.sources._


  def generateFilterExpr(f: Filter): Option[String] = {
    Option(f match {
      case EqualTo(attr, value) => s"$attr = ${quoteValue(value)}"
      case EqualNullSafe(attr, value) =>
        s"(NOT ($attr != ${quoteValue(value)} OR $attr IS NULL OR " +
          s"${quoteValue(value)} IS NULL) OR ($attr IS NULL AND ${quoteValue(value)} IS NULL))"
      case LessThan(attr, value) => s"$attr < ${quoteValue(value)}"
      case GreaterThan(attr, value) => s"$attr > ${quoteValue(value)}"
      case LessThanOrEqual(attr, value) => s"$attr <= ${quoteValue(value)}"
      case GreaterThanOrEqual(attr, value) => s"$attr >= ${quoteValue(value)}"
      case IsNull(attr) => s"$attr IS NULL"
      case IsNotNull(attr) => s"$attr IS NOT NULL"
      case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'"
      case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'"
      case StringContains(attr, value) => s"${attr} LIKE '%${value}%'"
      case In(attr, value) => s"$attr IN (${quoteValue(value)})"
      case Not(f) => generateFilterExpr(f).map(p => s"(NOT ($p))").getOrElse(null)
      case Or(f1, f2) =>
        val or = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (or.size == 2) {
          or.map(p => s"($p)").mkString(" OR ")
        } else {
          null
        }
      case And(f1, f2) =>
        val and = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (and.size == 2) {
          and.map(p => s"($p)").mkString(" AND ")
        } else {
          null
        }
      case _ => null
    })
  }
}

Source File: DefaultDatabaseOperationsTest.scala From Conseil with Apache License 2.0

5 votes

package tech.cryptonomic.conseil.api.sql

import java.sql.Timestamp
import java.time.LocalDateTime

import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{Matchers, WordSpec}
import slick.jdbc.PostgresProfile.api._
import tech.cryptonomic.conseil.api.TezosInMemoryDatabaseSetup
import tech.cryptonomic.conseil.api.sql.DefaultDatabaseOperations._
import tech.cryptonomic.conseil.common.testkit.InMemoryDatabase
import tech.cryptonomic.conseil.common.tezos.Tables
import tech.cryptonomic.conseil.common.tezos.Tables.FeesRow

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.language.postfixOps

class DefaultDatabaseOperationsTest
    extends WordSpec
    with Matchers
    with InMemoryDatabase
    with TezosInMemoryDatabaseSetup
    with ScalaFutures {

  "The default database operations" should {
      val fees: List[FeesRow] = List.tabulate(5) { i =>
        FeesRow(
          1 + i,
          3 + i,
          5 + i,
          Timestamp.valueOf(LocalDateTime.of(2018, 11, 22, 12, 30)),
          s"$i-example",
          None,
          None
        )
      }

      "count distinct elements in column properly" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(countDistinct("tezos", "fees", "timestamp")).futureValue shouldBe 1
        dbHandler.run(countDistinct("tezos", "fees", "low")).futureValue shouldBe 5
      }

      "select distinct elements from column properly" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(selectDistinct("tezos", "fees", "timestamp")).futureValue shouldBe List(
          "2018-11-22 12:30:00"
        )
        dbHandler.run(selectDistinct("tezos", "fees", "low")).futureValue should contain theSameElementsAs List(
          "1",
          "2",
          "3",
          "4",
          "5"
        )
      }

      "select distinct elements from column with 'like' properly" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(selectDistinctLike("tezos", "fees", "kind", "1-")).futureValue shouldBe List(
          "1-example"
        )
      }
    }
}

Source File: DefaultDatabaseOperationsTest.scala From Conseil with Apache License 2.0

5 votes

package tech.cryptonomic.conseil.indexer.sql

import java.sql.Timestamp
import java.time.LocalDateTime

import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{Matchers, WordSpec}
import slick.jdbc.PostgresProfile.api._
import tech.cryptonomic.conseil.common.testkit.InMemoryDatabase
import tech.cryptonomic.conseil.common.tezos.Tables
import tech.cryptonomic.conseil.common.tezos.Tables.{Fees, FeesRow}
import tech.cryptonomic.conseil.indexer.sql.DefaultDatabaseOperations._
import tech.cryptonomic.conseil.indexer.tezos.TezosInMemoryDatabaseSetup

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.language.postfixOps

class DefaultDatabaseOperationsTest
    extends WordSpec
    with Matchers
    with InMemoryDatabase
    with TezosInMemoryDatabaseSetup
    with ScalaFutures {

  "The default database operations" should {
      val fees: List[FeesRow] = List.tabulate(5) { i =>
        FeesRow(
          1 + i,
          3 + i,
          5 + i,
          Timestamp.valueOf(LocalDateTime.of(2018, 11, 22, 12, 30)),
          s"$i-example",
          None,
          None
        )
      }

      "insert data when table is empty" in {
        dbHandler.run(insertWhenEmpty[Fees](Tables.Fees, fees)).futureValue shouldBe Some(5)
      }

      "do not insert data when table is not empty" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(insertWhenEmpty[Fees](Tables.Fees, fees)).futureValue.value shouldBe Some(0)
      }
    }
}

Source File: CustomerSerializers.scala From quiz-management-service with Apache License 2.0

5 votes

package com.danielasfregola.quiz.management.serializers

import java.sql.Timestamp

import org.json4s.CustomSerializer
import org.json4s.JsonAST.{JInt, JNull}

object CustomSerializers {
  val all = List(CustomTimestampSerializer)
}

case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format =>
  ({
    case JInt(x) => new Timestamp(x.longValue * 1000)
    case JNull => null
  },
    {
      case date: Timestamp => JInt(date.getTime / 1000)
    }))

Source File: CustomerSerializers.scala From quiz-management-service with Apache License 2.0

5 votes

package com.danielasfregola.quiz.management.serializers

import java.sql.Timestamp

import org.json4s.CustomSerializer
import org.json4s.JsonAST.{JInt, JNull}

object CustomSerializers {
  val all = List(CustomTimestampSerializer)
}

case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format =>
  ({
    case JInt(x) => new Timestamp(x.longValue * 1000)
    case JNull => null
  },
    {
      case date: Timestamp => JInt(date.getTime / 1000)
    }))

Source File: CustomerSerializers.scala From quiz-management-service with Apache License 2.0

5 votes

package com.danielasfregola.quiz.management.serializers

import java.sql.Timestamp

import org.json4s.CustomSerializer
import org.json4s.JsonAST.{JInt, JNull}

object CustomSerializers {
  val all = List(CustomTimestampSerializer)
}

case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format =>
  ({
    case JInt(x) => new Timestamp(x.longValue * 1000)
    case JNull => null
  },
    {
      case date: Timestamp => JInt(date.getTime / 1000)
    }))

Source File: DataFrameExtensions.scala From spark-powerbi-connector with Apache License 2.0

5 votes

package com.microsoft.azure.powerbi.extensions

import java.sql.Timestamp
import java.util.Date

import scala.collection.mutable.ListBuffer

import com.microsoft.azure.powerbi.authentication.PowerBIAuthentication
import com.microsoft.azure.powerbi.common.PowerBIUtils
import com.microsoft.azure.powerbi.models.{table, PowerBIDatasetDetails}

import org.apache.spark.sql.DataFrame

object DataFrameExtensions {

  implicit def PowerBIDataFrame(dataFrame: DataFrame): PowerBIDataFrame =
    new PowerBIDataFrame(dataFrame: DataFrame)

  class PowerBIDataFrame(dataFrame: DataFrame) extends Serializable{

    def toPowerBI(powerbiDatasetDetails: PowerBIDatasetDetails, powerbiTable: table,
                  powerBIAuthentication: PowerBIAuthentication): Unit = {

      var authenticationToken: String = powerBIAuthentication.getAccessToken

      dataFrame.foreachPartition { partition =>

        // PowerBI row limit in single request is 10,000. We limit it to 1000.

        partition.grouped(1000).foreach {
          group => {
            val powerbiRowListBuffer: ListBuffer[Map[String, Any]] = ListBuffer[Map[String, Any]]()
            group.foreach {
              record => {
                var powerbiRow: Map[String, Any] = Map[String, Any]()

                for (i <- 0 until record.length) {
                  powerbiRow += (powerbiTable.columns(i).name -> record(i))
                }

                powerbiRowListBuffer += powerbiRow
              }

              var attemptCount = 0
              var pushSuccessful = false

              while (!pushSuccessful && attemptCount < this.retryCount) {
                try {

                    PowerBIUtils.addMultipleRows(powerbiDatasetDetails, powerbiTable,
                      powerbiRowListBuffer, authenticationToken)
                    pushSuccessful = true
                }
                catch {
                  case e: Exception =>
                    println(f"Exception inserting multiple rows: ${e.getMessage}")
                    Thread.sleep(secondsBetweenRetry * 1000)
                    attemptCount += 1

                    authenticationToken = powerBIAuthentication.refreshAccessToken
                }
              }
            }
          }
        }
      }
    }

    def countTimelineToPowerBI(powerbiDatasetDetails: PowerBIDatasetDetails, powerbiTable: table,
                               powerBIAuthentication: PowerBIAuthentication): Unit = {

      var authenticationToken: String = powerBIAuthentication.getAccessToken
      val currentTimestamp = new Timestamp(new Date().getTime)

      val powerbiRow = Map(powerbiTable.columns.head.name -> currentTimestamp,
        powerbiTable.columns(1).name -> dataFrame.count())

      var attemptCount = 0
      var pushSuccessful = false

      while (!pushSuccessful && attemptCount < this.retryCount) {
        try {
          PowerBIUtils.addRow(powerbiDatasetDetails, powerbiTable, powerbiRow, authenticationToken)
          pushSuccessful = true
        }
        catch {
          case e: Exception => println("Exception inserting row: " + e.getMessage)
            Thread.sleep(secondsBetweenRetry * 1000)
            attemptCount += 1

            authenticationToken = powerBIAuthentication.refreshAccessToken
        }
      }
    }

    private val retryCount: Int = 3
    private val secondsBetweenRetry: Int = 1
  }
}

Source File: RepositoryMetadata.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.pretrained

import java.sql.Timestamp


case class RepositoryMetadata
(
  // Path to repository metadata file
  metadataFile: String,
  // Path to repository folder
  repoFolder: String,
  // Aws file metadata.json version
  version: String,
  // Last time metadata was downloaded
  lastMetadataDownloaded: Timestamp,
  // List of all available resources in repository
  metadata: List[ResourceMetadata]
)

Source File: TrainingHelper.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.util

import java.io.File
import java.nio.file.{Files, Paths, StandardCopyOption}
import java.sql.Timestamp
import java.util.Date

import com.johnsnowlabs.nlp.pretrained.ResourceType.ResourceType
import com.johnsnowlabs.nlp.pretrained.{ResourceMetadata, ResourceType}
import org.apache.commons.io.FileUtils
import org.apache.spark.ml.util.MLWriter


object TrainingHelper {

  def saveModel(name: String,
                language: Option[String],
                libVersion: Option[Version],
                sparkVersion: Option[Version],
                modelWriter: MLWriter,
                folder: String,
                category: Option[ResourceType] = Some(ResourceType.NOT_DEFINED)
               ): Unit = {

    // 1. Get current timestamp
    val timestamp = new Timestamp(new Date().getTime)


    // 2. Save model to file
    val file = Paths.get(folder, timestamp.toString).toString.replaceAllLiterally("\\", "/")
    modelWriter.save(file)

    // 3. Zip file
    val tempzipFile = Paths.get(folder, timestamp + ".zip")
    ZipArchiveUtil.zip(file, tempzipFile.toString)

    // 4. Set checksum
    val checksum = FileHelper.generateChecksum(tempzipFile.toString)

    // 5. Create resource metadata
    val meta = new ResourceMetadata(name, language, libVersion, sparkVersion, true, timestamp, true, category = category, checksum)

    val zipfile = Paths.get(meta.fileName)

    // 6. Move the zip
    Files.move(tempzipFile, zipfile, StandardCopyOption.REPLACE_EXISTING)

    // 7. Remove original file
    try {
      FileUtils.deleteDirectory(new File(file))
    } catch {
      case _: java.io.IOException => //file lock may prevent deletion, ignore and continue
    }

      // 6. Add to metadata.json info about resource
      val metadataFile = Paths.get(folder, "metadata.json").toString
      ResourceMetadata.addMetadataToFile(metadataFile, meta)
    }
}

Source File: CloudTestResources.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.pretrained

import java.sql.Timestamp
import com.johnsnowlabs.util.Version


object CloudTestResources {
  val name_en_123_345_new = new ResourceMetadata(
    "name",
    Some("en"),
    Some(Version(1, 2, 3)),
    Some(Version(3, 4, 5)),
    true,
    new Timestamp(50)
  )

  val name_en_12_34_old = new ResourceMetadata(
    "name",
    Some("en"),
    Some(Version(1, 2)),
    Some(Version(3, 4)),
    true,
    new Timestamp(1)
  )

  val name_en_old = new ResourceMetadata(
    "name",
    Some("en"),
    None,
    None,
    true,
    new Timestamp(1)
  )

  val name_en_new_disabled = new ResourceMetadata(
    "name",
    Some("en"),
    None,
    None,
    false,
    new Timestamp(1)
  )

  val name_de = new ResourceMetadata(
    "name",
    Some("de"),
    None,
    None,
    true,
    new Timestamp(1)
  )

  val all = List(name_en_123_345_new, name_en_12_34_old, name_en_old, name_en_new_disabled, name_de)
}

Source File: ResourceDownloaderSpec.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.pretrained

import java.sql.Timestamp
import com.johnsnowlabs.util.Version
import org.scalatest.FlatSpec


class ResourceDownloaderSpec extends FlatSpec {
  val b = CloudTestResources

  "CloudResourceMetadata" should "serialize and deserialize correctly" in {
    val resource = new ResourceMetadata("name",
      Some("en"),
      Some(Version(1,2,3)),
      Some(Version(5,4,3)),
      true,
      new Timestamp(123213))

    val json = ResourceMetadata.toJson(resource)
    val deserialized = ResourceMetadata.parseJson(json)

    assert(deserialized == resource)
  }

  "CloudResourceDownloader" should "choose the newest versions" in {
    val found = ResourceMetadata.resolveResource(b.all, ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5)))

    assert(found.isDefined)
    assert(found.get == b.name_en_123_345_new)
  }

  "CloudResourceDownloader" should "filter disabled resources" in {
    val found = ResourceMetadata.resolveResource(List(b.name_en_new_disabled), ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5)))

    assert(found.isEmpty)
  }

  "CloudResourceDownloader" should "filter language and allow empty versions" in {
    val found = ResourceMetadata.resolveResource(List(b.name_en_old, b.name_de), ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5)))

    assert(found.isDefined)
    assert(found.get == b.name_en_old)
  }
}

Source File: TimeBasedDataService.scala From kafka-jdbc-connector with Apache License 2.0

5 votes

package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp}
import java.util.{Date, GregorianCalendar, TimeZone}

import com.agoda.kafka.connector.jdbc.JdbcSourceConnectorConstants
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct.{MsSQL, MySQL}
import com.agoda.kafka.connector.jdbc.models.Mode.TimestampMode
import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.Try


case class TimeBasedDataService(databaseProduct: DatabaseProduct,
                                storedProcedureName: String,
                                batchSize: Int,
                                batchSizeVariableName: String,
                                timestampVariableName: String,
                                var timestampOffset: Long,
                                timestampFieldName: String,
                                topic: String,
                                keyFieldOpt: Option[String],
                                dataConverter: DataConverter,
                                calendar: GregorianCalendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"))
                               ) extends DataService {

  override def createPreparedStatement(connection: Connection): Try[PreparedStatement] = Try {
    val preparedStatement = databaseProduct match {
      case MsSQL => connection.prepareStatement(s"EXECUTE $storedProcedureName @$timestampVariableName = ?, @$batchSizeVariableName = ?")
      case MySQL => connection.prepareStatement(s"CALL $storedProcedureName (@$timestampVariableName := ?, @$batchSizeVariableName := ?)")
    }
    preparedStatement.setTimestamp(1, new Timestamp(timestampOffset), calendar)
    preparedStatement.setObject(2, batchSize)
    preparedStatement
  }

  override def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]] = Try {
    val sourceRecords = ListBuffer.empty[SourceRecord]
    var max = timestampOffset
    while (resultSet.next()) {
      dataConverter.convertRecord(schema, resultSet) map { record =>
        val time = record.get(timestampFieldName).asInstanceOf[Date].getTime
        max = if(time > max) {
          keyFieldOpt match {
            case Some(keyField) =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, null, schema, record.get(keyField), schema, record
              )
            case None           =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, schema, record
              )
          }
          time
        } else max
      }
    }
    timestampOffset = max
    sourceRecords
  }

  override def toString: String = {
    s"""
       |{
       |   "name" : "${this.getClass.getSimpleName}"
       |   "mode" : "${TimestampMode.entryName}"
       |   "stored-procedure.name" : "$storedProcedureName"
       |}
    """.stripMargin
  }
}

Source File: OAuthAuthorizationTokensDal.scala From slick-akka-http-oauth2 with Apache License 2.0

5 votes

package persistence.dals

import java.security.SecureRandom
import java.sql.Timestamp

import org.joda.time.DateTime
import persistence.entities.SlickTables.OauthAccessTokenTable
import persistence.entities.{Account, OAuthAccessToken, OAuthClient}
import slick.driver.H2Driver.api._
import slick.driver.JdbcProfile
import utils.{Configuration, PersistenceModule}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.util.Random


trait OAuthAccessTokensDal extends BaseDalImpl[OauthAccessTokenTable,OAuthAccessToken]{
  def create(account: Account, client: OAuthClient): Future[OAuthAccessToken]
  def delete(account: Account, client: OAuthClient): Future[Int]
  def refresh(account: Account, client: OAuthClient): Future[OAuthAccessToken]
  def findByAccessToken(accessToken: String): Future[Option[OAuthAccessToken]]
  def findByAuthorized(account: Account, clientId: String): Future[Option[OAuthAccessToken]]
  def findByRefreshToken(refreshToken: String): Future[Option[OAuthAccessToken]]
}

class OAuthAccessTokensDalImpl (modules: Configuration with PersistenceModule)(implicit override val db: JdbcProfile#Backend#Database) extends OAuthAccessTokensDal {
  override def create(account: Account, client: OAuthClient): Future[OAuthAccessToken] = {
    def randomString(length: Int) = new Random(new SecureRandom()).alphanumeric.take(length).mkString
    val accessToken = randomString(40)
    val refreshToken = randomString(40)
    val createdAt = new Timestamp(new DateTime().getMillis)
    val oauthAccessToken = new OAuthAccessToken(
      id = 0,
      accountId = account.id,
      oauthClientId = client.id,
      accessToken = accessToken,
      refreshToken = refreshToken,
      createdAt = createdAt
    )
    insert(oauthAccessToken).map(id => oauthAccessToken.copy(id = id))
  }

  override def delete(account: Account, client: OAuthClient): Future[Int] = {
    deleteByFilter( oauthToken => oauthToken.accountId === account.id && oauthToken.oauthClientId === client.id)
  }

  override def refresh(account: Account, client: OAuthClient): Future[OAuthAccessToken] = {
    delete(account, client)
    create(account, client)
  }

  override def findByAuthorized(account: Account, clientId: String): Future[Option[OAuthAccessToken]] = {
    val query = for {
      oauthClient <- modules.oauthClientsDal.tableQ
      token <- tableQ if oauthClient.id === token.oauthClientId && oauthClient.clientId === clientId && token.accountId === account.id
    } yield token
    db.run(query.result).map(_.headOption)
  }

  override def findByAccessToken(accessToken: String): Future[Option[OAuthAccessToken]] = {
    findByFilter(_.accessToken === accessToken).map(_.headOption)
  }

  override def findByRefreshToken(refreshToken: String): Future[Option[OAuthAccessToken]] = {
    val expireAt = new Timestamp(new DateTime().minusMonths(1).getMillis)
    findByFilter( token => token.refreshToken === refreshToken && token.createdAt > expireAt).map(_.headOption)

  }
}

Source File: OAuthAuthorizationCodesDal.scala From slick-akka-http-oauth2 with Apache License 2.0

5 votes

package persistence.dals

import java.sql.Timestamp

import org.joda.time.DateTime
import persistence.entities.OAuthAuthorizationCode
import persistence.entities.SlickTables.OauthAuthorizationCodeTable
import slick.driver.H2Driver.api._
import slick.driver.JdbcProfile

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future


trait OAuthAuthorizationCodesDal extends BaseDalImpl[OauthAuthorizationCodeTable,OAuthAuthorizationCode]{
  def findByCode(code: String): Future[Option[OAuthAuthorizationCode]]
  def delete(code: String): Future[Int]
}

class OAuthAuthorizationCodesDalImpl()(implicit override val db: JdbcProfile#Backend#Database) extends OAuthAuthorizationCodesDal {
  override def findByCode(code: String): Future[Option[OAuthAuthorizationCode]] = {
    val expireAt = new Timestamp(new DateTime().minusMinutes(30).getMillis)
    findByFilter(authCode => authCode.code === code && authCode.createdAt > expireAt).map(_.headOption)
  }

  override def delete(code: String): Future[Int] = deleteByFilter(_.code === code)

}

Source File: Boot.scala From slick-akka-http-oauth2 with Apache License 2.0

5 votes

import java.sql.Timestamp

import akka.http.scaladsl.Http
import akka.http.scaladsl.server.RouteConcatenation
import akka.stream.ActorMaterializer
import org.joda.time.DateTime
import persistence.entities.{Account, OAuthClient}
import rest.OAuthRoutes
import utils._

object Main extends App with RouteConcatenation {
  // configuring modules for application, cake pattern for DI
  val modules = new ConfigurationModuleImpl  with ActorModuleImpl with PersistenceModuleImpl
  implicit val system = modules.system
  implicit val materializer = ActorMaterializer()
  implicit val ec = modules.system.dispatcher

  modules.generateDDL()

  for {
    createAccounts <- modules.accountsDal.insert(Seq(
      Account(0, "[email protected]", "48181acd22b3edaebc8a447868a7df7ce629920a", new Timestamp(new DateTime().getMillis)) // password:bob
    ))
    createOauthClients <- modules.oauthClientsDal.insert(Seq(
      OAuthClient(0, 1, "client_credentials", "bob_client_id", "bob_client_secret", Some("redirectUrl"), new Timestamp(new DateTime().getMillis))))
  } yield {
    println(s"Database initialized with default values for bob and alice")
  }

  val bindingFuture = Http().bindAndHandle(
    new OAuthRoutes(modules).routes, "localhost", 8080)

  println(s"Server online at http://localhost:8080/")

}

Source File: Schema.scala From osmesa with Apache License 2.0

5 votes

package osmesa.analytics.updater

import java.sql.Timestamp
import java.time.Instant

import geotrellis.vectortile.Layer
import org.apache.log4j.Logger
import osmesa.analytics.updater.Implicits._

trait Schema {
  val layer: Layer
  val features: Map[String, (Option[AugmentedDiffFeature], AugmentedDiffFeature)]

  val newFeatures: Seq[VTFeature]
  lazy val replacementFeatures: Seq[VTFeature] = Seq.empty[VTFeature]
  lazy val retainedFeatures: Seq[VTFeature] = Seq.empty[VTFeature]

  protected lazy val logger: Logger = Logger.getLogger(getClass)

  protected lazy val touchedFeatures: Map[String, Seq[VTFeature]] =
    Map.empty[String, Seq[VTFeature]]

  protected lazy val versionInfo: Map[String, (Int, Int, Timestamp)] =
    touchedFeatures
      .mapValues(_.last)
      .mapValues(
        f =>
          (
            f.data("__version").toInt,
            f.data("__minorVersion").toInt,
            Timestamp.from(Instant.ofEpochMilli(f.data("__updated")))
        ))

  protected lazy val minorVersions: Map[String, Int] =
    features
      .mapValues {
        case (_, curr) => curr.data
      }
      .map {
        case (id, f) =>
          versionInfo.get(id) match {
            case Some((prevVersion, _, _)) if prevVersion < f.version => (id, 0)
            case Some((prevVersion, prevMinorVersion, _)) if prevVersion == f.version =>
              (id, prevMinorVersion + 1)
            case _ => (id, 0)
          }
      }
}

trait SchemaBuilder {
  val layerName: String

  def apply(layer: Layer,
            features: Map[String, (Option[AugmentedDiffFeature], AugmentedDiffFeature)]): Schema
}

Source File: PostgresBookingViewRepository.scala From ticket-booking-aecor with Apache License 2.0

5 votes

package ru.pavkin.booking.booking.view

import java.sql.Timestamp
import java.time.Instant

import cats.Monad
import cats.implicits._
import doobie._
import doobie.implicits._
import doobie.util.transactor.Transactor
import io.circe.{ Decoder, Encoder, Json }
import io.circe.parser._
import org.postgresql.util.PGobject
import ru.pavkin.booking.common.models._

class PostgresBookingViewRepository[F[_]: Monad](transactor: Transactor[F],
                                                 tableName: String = "bookings")
    extends BookingViewRepository[F] {

  implicit val jsonMeta: Meta[Json] =
    Meta.Advanced
      .other[PGobject]("json")
      .timap[Json](a => parse(a.getValue).leftMap[Json](e => throw e).merge)(a => {
        val o = new PGobject
        o.setType("json")
        o.setValue(a.noSpaces)
        o
      })

  implicit val seatsMeta: Meta[List[Seat]] = jsonMeta.timap(
    j => Decoder[List[Seat]].decodeJson(j).right.get
  )(s => Encoder[List[Seat]].apply(s))

  implicit val ticketsMeta: Meta[List[Ticket]] = jsonMeta.timap(
    j => Decoder[List[Ticket]].decodeJson(j).right.get
  )(s => Encoder[List[Ticket]].apply(s))

  implicit val instantMeta: Meta[Instant] =
    Meta[Timestamp].timap(_.toInstant)(Timestamp.from)

  implicit val bookingStatusMeta: Meta[BookingStatus] =
    Meta[String].timap(BookingStatus.withName)(_.entryName)

  def get(bookingId: BookingKey): F[Option[BookingView]] =
    queryView(bookingId).option.transact(transactor)

  def byClient(clientId: ClientId): F[List[BookingView]] =
    queryForClient(clientId).to[List].transact(transactor)

  def set(view: BookingView): F[Unit] =
    Update[BookingView](setViewQuery).run(view).transact(transactor).void

  def expired(now: Instant): fs2.Stream[F, BookingKey] =
    queryExpired(now).stream.transact(transactor)

  def createTable: F[Unit] = createTableQuery.transact(transactor).void

  private val setViewQuery =
    s"""INSERT INTO $tableName
    (booking_id, client_id, concert_id, seats, tickets, status, confirmed_at, expires_at, version)
    VALUES (?,?,?,?,?,?,?,?,?)
    ON CONFLICT (booking_id)
    DO UPDATE SET
     tickets = EXCLUDED.tickets,
     status = EXCLUDED.status,
     confirmed_at = EXCLUDED.confirmed_at,
     expires_at = EXCLUDED.expires_at,
     version = EXCLUDED.version;"""

  private def queryView(bookingId: BookingKey) =
    (fr"SELECT * FROM " ++ Fragment.const(tableName) ++
      fr"WHERE booking_id = $bookingId;")
      .query[BookingView]

  private def queryExpired(now: Instant) =
    (fr"SELECT booking_id FROM " ++ Fragment.const(tableName) ++
      fr"WHERE status = ${BookingStatus.Confirmed: BookingStatus} AND expires_at < $now;")
      .query[BookingKey]

  private def queryForClient(clientId: ClientId) =
    (fr"SELECT * FROM " ++ Fragment.const(tableName) ++
      fr"WHERE client_id = $clientId;")
      .query[BookingView]

  private val createTableQuery = (fr"""
    CREATE TABLE IF NOT EXISTS """ ++ Fragment.const(tableName) ++
    fr""" (
    booking_id    text      NOT NULL PRIMARY KEY,
    client_id     text      NOT NULL,
    concert_id    text      NOT NULL,
    seats         json      NOT NULL,
    tickets       json      NOT NULL,
    status        text      NOT NULL,
    confirmed_at  timestamptz,
    expires_at    timestamptz,
    version       bigint    NOT NULL
    );
  """).update.run

}

Source File: SetDifferenceAndFilter.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.analytics.util

import java.sql.Timestamp

import org.apache.spark.sql.{Dataset, SparkSession}


object SetDifferenceAndFilter {

  def apply(uuids1: Dataset[KeyFields],
            uuids2: Dataset[KeyFields],
            consistencyThreshold: Long,
            filterOutMeta: Boolean = false)
           (implicit spark: SparkSession): Dataset[KeyFields] = {

    import spark.implicits._

    // The original setDifference implementation used the SQL except function, but that ignores any pre-partitioning.
    // The next implementation used a left-anti join, but that created a weird execution plan that caused poor performance.
    // The current implementation uses a outer join - which uses an efficient sort-merge join.

    def setDifference(uuids1: Dataset[KeyFields], uuids2: Dataset[KeyFields]): Dataset[KeyFields] =
      uuids1.join(uuids2, uuids1("uuid") === uuids2("uuid"), "left_outer")
        .filter(uuids2("uuid").isNull)
        .select(uuids1("*"))
        .as[KeyFields]

    // Calculate the set difference between the two sets of uuids.
    // The anti-join produces just the left side, and only the ones that are not in the right side.
    val positives = setDifference(uuids1, uuids2)

    val timeToConsistencyFilter = positives("lastModified") < new Timestamp(consistencyThreshold)
    val overallFilter = if (filterOutMeta)
      timeToConsistencyFilter &&
        (positives("path") =!= "/" && positives("path") =!= "/meta" && !positives("path").startsWith("/meta/"))
    else
      timeToConsistencyFilter

    // Filter out any positives that occurred after the current threshold
    positives.filter(overallFilter)
  }
}

Source File: RowComparer.scala From spark-fast-tests with MIT License

5 votes

package com.github.mrpowers.spark.fast.tests

import org.apache.spark.sql.Row

import java.sql.Timestamp
import scala.math.abs

object RowComparer {

  
  def areRowsEqual(r1: Row, r2: Row, tol: Double): Boolean = {
    if (r1.length != r2.length) {
      return false
    } else {
      (0 until r1.length).foreach(idx => {
        if (r1.isNullAt(idx) != r2.isNullAt(idx)) {
          return false
        }

        if (!r1.isNullAt(idx)) {
          val o1 = r1.get(idx)
          val o2 = r2.get(idx)
          o1 match {
            case b1: Array[Byte] =>
              if (!java.util.Arrays.equals(
                    b1,
                    o2.asInstanceOf[Array[Byte]]
                  )) {
                return false
              }

            case f1: Float =>
              if (java.lang.Float.isNaN(f1) !=
                    java.lang.Float.isNaN(o2.asInstanceOf[Float])) {
                return false
              }
              if (abs(f1 - o2.asInstanceOf[Float]) > tol) {
                return false
              }

            case d1: Double =>
              if (java.lang.Double.isNaN(d1) !=
                    java.lang.Double.isNaN(o2.asInstanceOf[Double])) {
                return false
              }
              if (abs(d1 - o2.asInstanceOf[Double]) > tol) {
                return false
              }

            case d1: java.math.BigDecimal =>
              if (d1.compareTo(o2.asInstanceOf[java.math.BigDecimal]) != 0) {
                return false
              }

            case t1: Timestamp =>
              if (abs(t1.getTime - o2.asInstanceOf[Timestamp].getTime) > tol) {
                return false
              }

            case _ =>
              if (o1 != o2) return false
          }
        }
      })
    }
    true
  }

}

Source File: BigQueryUtilsSpec.scala From comet-data-pipeline with Apache License 2.0

5 votes

package com.ebiznext.comet.utils.conversion

import java.sql.{Date, Timestamp}

import com.ebiznext.comet.TestHelper
import com.ebiznext.comet.config.SparkEnv
import com.ebiznext.comet.utils.conversion.BigQueryUtils._
import com.ebiznext.comet.utils.conversion.syntax._
import org.apache.spark.sql.SparkSession
import com.google.cloud.bigquery.{Field, StandardSQLTypeName, Schema => BQSchema}

class BigQueryUtilsSpec extends TestHelper {
  new WithSettings() {
    val sparkEnv: SparkEnv = new SparkEnv("test")
    val session: SparkSession = sparkEnv.session
    import session.implicits._

    "Spark Types" should "be converted to corresponding BQ Types" in {
      val res: BQSchema = List(
        (
          1,
          true,
          2.5,
          "hello",
          'x'.asInstanceOf[Byte],
          new Date(System.currentTimeMillis()),
          new Timestamp(System.currentTimeMillis())
        )
      ).toDF().to[BQSchema]
      //Schema{fields=[Field{name=value, type=INTEGER, mode=NULLABLE, description=, policyTags=null}]}
      val fields =
        List(
          Field
            .newBuilder("_1", StandardSQLTypeName.INT64)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_2", StandardSQLTypeName.BOOL)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_3", StandardSQLTypeName.FLOAT64)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_4", StandardSQLTypeName.STRING)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_5", StandardSQLTypeName.INT64)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_6", StandardSQLTypeName.DATE)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_7", StandardSQLTypeName.TIMESTAMP)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build()
        )
      res.getFields should contain theSameElementsInOrderAs fields
    }
  }
}

Source File: StructuredNetworkWordCountWindowed.scala From multi-tenancy-spark with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println

Source File: SchemaColumnSelection.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker.schema.table.columns

import scala.reflect.runtime.universe.TypeTag
import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.{rand, udf}

case class SchemaColumnSelection[T](override val name: String, values: List[T])(implicit tag: TypeTag[T]) extends SchemaColumn {
  override def column(rowID: Option[Column] = None): Column = {
    val intToSelectionUDF = udf((index: Int) => {
      values(index)
    })

    intToSelectionUDF(rand() * values.length % values.length)
  }
}

object SchemaColumnSelectionProtocol extends SchemaColumnSelectionProtocol
trait SchemaColumnSelectionProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnSelectionFormat extends YamlFormat[SchemaColumnSelection[_]] {

    override def read(yaml: YamlValue): SchemaColumnSelection[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set"))
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val values = fields.getOrElse(YamlString("values"), deserializationError("selection values not set"))

      dataType match {
        case SchemaColumnDataType.Int => SchemaColumnSelection(name, values.convertTo[List[Int]])
        case SchemaColumnDataType.Long => SchemaColumnSelection(name, values.convertTo[List[Long]])
        case SchemaColumnDataType.Float => SchemaColumnSelection(name, values.convertTo[List[Float]])
        case SchemaColumnDataType.Double => SchemaColumnSelection(name, values.convertTo[List[Double]])
        case SchemaColumnDataType.Date => SchemaColumnSelection(name, values.convertTo[List[Date]])
        case SchemaColumnDataType.Timestamp => SchemaColumnSelection(name, values.convertTo[List[Timestamp]])
        case SchemaColumnDataType.String => SchemaColumnSelection(name, values.convertTo[List[String]])
        case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Selection}")
      }

    }

    override def write(obj: SchemaColumnSelection[_]): YamlValue = ???

  }

}

Source File: SchemaColumnRandom.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.{to_utc_timestamp, round, rand, from_unixtime, to_date}
import org.apache.spark.sql.types.{IntegerType, LongType}

trait SchemaColumnRandom[T] extends SchemaColumn

object SchemaColumnRandom {
  val FloatDP = 3
  val DoubleDP = 3

  def apply(name: String, min: Int, max: Int): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Long, max: Long): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Float, max: Float): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Double, max: Double): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Date, max: Date): SchemaColumn = SchemaColumnRandomDate(name, min, max)
  def apply(name: String, min: Timestamp, max: Timestamp): SchemaColumn = SchemaColumnRandomTimestamp(name, min, max)
  def apply(name: String): SchemaColumn = SchemaColumnRandomBoolean(name)
}

private case class SchemaColumnRandomNumeric[T: Numeric](override val name: String, min: T, max: T) extends SchemaColumnRandom[T] {
  override def column(rowID: Option[Column] = None): Column = {
    import Numeric.Implicits._

    (min, max) match {
      case (_: Int, _: Int) => round(rand() * (max - min) + min, 0).cast(IntegerType)
      case (_: Long, _: Long) => round(rand() * (max - min) + min, 0).cast(LongType)
      case (_: Float, _: Float) => round(rand() * (max - min) + min, SchemaColumnRandom.FloatDP)
      case (_: Double, _: Double) => round(rand() * (max - min) + min, SchemaColumnRandom.DoubleDP)
    }
  }
}

private case class SchemaColumnRandomTimestamp(override val name: String, min: Timestamp, max: Timestamp) extends SchemaColumnRandom[Timestamp] {
  override def column(rowID: Option[Column] = None): Column = {
    val minTime = min.getTime / 1000
    val maxTime = max.getTime / 1000
    to_utc_timestamp(from_unixtime(rand() * (maxTime - minTime) + minTime), "UTC")
  }
}

private case class SchemaColumnRandomDate(override val name: String, min: Date, max: Date) extends SchemaColumnRandom[Date] {
  val timestamp = SchemaColumnRandomTimestamp(name, new Timestamp(min.getTime), new Timestamp(max.getTime + 86400000))

  override def column(rowID: Option[Column] = None): Column = to_date(timestamp.column())
}

private case class SchemaColumnRandomBoolean(override val name: String) extends SchemaColumnRandom[Boolean] {
  override def column(rowID: Option[Column] = None): Column = rand() < 0.5f
}

object SchemaColumnRandomProtocol extends SchemaColumnRandomProtocol
trait SchemaColumnRandomProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnRandomFormat extends YamlFormat[SchemaColumnRandom[_]] {

    override def read(yaml: YamlValue): SchemaColumnRandom[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name"))

      if (dataType == SchemaColumnDataType.Boolean) {
        SchemaColumnRandomBoolean(name)
      }
      else {
        val min = fields.getOrElse(YamlString("min"), deserializationError(s"min not set for $name"))
        val max = fields.getOrElse(YamlString("max"), deserializationError(s"max not set for $name"))

        dataType match {
          case SchemaColumnDataType.Int => SchemaColumnRandomNumeric(name, min.convertTo[Int], max.convertTo[Int])
          case SchemaColumnDataType.Long => SchemaColumnRandomNumeric(name, min.convertTo[Long], max.convertTo[Long])
          case SchemaColumnDataType.Float => SchemaColumnRandomNumeric(name, min.convertTo[Float], max.convertTo[Float])
          case SchemaColumnDataType.Double => SchemaColumnRandomNumeric(name, min.convertTo[Double], max.convertTo[Double])
          case SchemaColumnDataType.Date => SchemaColumnRandomDate(name, min.convertTo[Date], max.convertTo[Date])
          case SchemaColumnDataType.Timestamp => SchemaColumnRandomTimestamp(name, min.convertTo[Timestamp], max.convertTo[Timestamp])
          case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Random}")
        }
      }

    }

    override def write(obj: SchemaColumnRandom[_]): YamlValue = ???

  }

}

Source File: SchemaColumnSequential.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.{to_utc_timestamp, from_unixtime, monotonically_increasing_id, to_date}

trait SchemaColumnSequential[T] extends SchemaColumn

object SchemaColumnSequential {
  def apply(name: String, start: Int, step: Int): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Long, step: Long): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Float, step: Float): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Double, step: Double): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Date, step: Int): SchemaColumn = SchemaColumnSequentialDate(name, start, step)
  def apply(name: String, start: Timestamp, step: Int): SchemaColumn = SchemaColumnSequentialTimestamp(name, start, step)
}

private case class SchemaColumnSequentialNumeric[T: Numeric](override val name: String, start: T, step: T) extends SchemaColumnSequential[T] {
  override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = (rowID.get * step) + start
}

private case class SchemaColumnSequentialTimestamp(override val name: String, start: Timestamp, stepSeconds: Int) extends SchemaColumnSequential[Timestamp] {
  override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = {
    val startTime = start.getTime / 1000
    to_utc_timestamp(from_unixtime(rowID.get * stepSeconds + startTime), "UTC")
  }
}

private case class SchemaColumnSequentialDate(override val name: String, start: Date, stepDays: Int) extends SchemaColumnSequential[Date] {
  val timestamp = SchemaColumnSequentialTimestamp(name, new Timestamp(start.getTime), stepDays * 86400)

  override def column(rowID: Option[Column]): Column = to_date(timestamp.column())
}

object SchemaColumnSequentialProtocol extends SchemaColumnSequentialProtocol
trait SchemaColumnSequentialProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnSequentialFormat extends YamlFormat[SchemaColumnSequential[_]] {

    override def read(yaml: YamlValue): SchemaColumnSequential[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set"))
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val start = fields.getOrElse(YamlString("start"), deserializationError("start not set"))
      val step = fields.getOrElse(YamlString("step"), deserializationError("step not set"))

      dataType match {
        case "Int" => SchemaColumnSequentialNumeric(name, start.convertTo[Int], step.convertTo[Int])
        case "Long" => SchemaColumnSequentialNumeric(name, start.convertTo[Long], step.convertTo[Long])
        case "Float" => SchemaColumnSequentialNumeric(name, start.convertTo[Float], step.convertTo[Float])
        case "Double" => SchemaColumnSequentialNumeric(name, start.convertTo[Double], step.convertTo[Double])
        case "Date" => SchemaColumnSequentialDate(name, start.convertTo[Date], step.convertTo[Int])
        case "Timestamp" => SchemaColumnSequentialTimestamp(name, start.convertTo[Timestamp], step.convertTo[Int])
        case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Sequential}")
      }

    }

    override def write(obj: SchemaColumnSequential[_]): YamlValue = ???

  }

}

Source File: SchemaColumnFixed.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.lit

case class SchemaColumnFixed[T](override val name: String, value: T) extends SchemaColumn {
  override def column(rowID: Option[Column] = None): Column = lit(value)
}

object SchemaColumnFixedProtocol extends SchemaColumnFixedProtocol
trait SchemaColumnFixedProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnFixedFormat extends YamlFormat[SchemaColumnFixed[_]] {

    override def read(yaml: YamlValue): SchemaColumnFixed[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name"))
      val value = fields.getOrElse(YamlString("value"), deserializationError(s"value not set for $name"))

      dataType match {
        case SchemaColumnDataType.Int => SchemaColumnFixed(name, value.convertTo[Int])
        case SchemaColumnDataType.Long => SchemaColumnFixed(name, value.convertTo[Long])
        case SchemaColumnDataType.Float => SchemaColumnFixed(name, value.convertTo[Float])
        case SchemaColumnDataType.Double => SchemaColumnFixed(name, value.convertTo[Double])
        case SchemaColumnDataType.Date => SchemaColumnFixed(name, value.convertTo[Date])
        case SchemaColumnDataType.Timestamp => SchemaColumnFixed(name, value.convertTo[Timestamp])
        case SchemaColumnDataType.String => SchemaColumnFixed(name, value.convertTo[String])
        case SchemaColumnDataType.Boolean => SchemaColumnFixed(name, value.convertTo[Boolean])
        case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Fixed}")
      }

    }

    override def write(obj: SchemaColumnFixed[_]): YamlValue = ???

  }

}

Source File: YamlParserTest.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class YamlParserTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._
  import net.jcazevedo.moultingyaml._

  "YamlParser" must {
    "convert a YamlDate to java.sql.Date" in {
      val date = "1998-06-03"
      val string = s"""$date""".stripMargin
      string.parseYaml.convertTo[Date] mustBe Date.valueOf(date)
    }

    "convert a YamlDate to java.sql.Timestamp" in {
      val timestamp = "1998-06-03 01:23:45"
      val string = s"""$timestamp""".stripMargin
      string.parseYaml.convertTo[Timestamp] mustBe Timestamp.valueOf(timestamp)
    }
  }
}

Source File: SchemaColumnFixedTest.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class SchemaColumnFixedTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnFixedProtocol._
  import net.jcazevedo.moultingyaml._

  val name = "test"
  val column_type = "Fixed"

  val baseString =
    s"""name: $name
       |column_type: $column_type
    """.stripMargin

  "SchemaColumnFixed" must {
    "read an Int column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Int}
           |value: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1)
    }

    "read a Long column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Long}
           |value: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1l)
    }

    "read a Float column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Float}
           |value: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1f)
    }

    "read a Double column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Double}
           |value: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1d)
    }

    "read a Date column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Date}
           |value: 1998-06-03
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Date.valueOf("1998-06-03"))
    }

    "read a Timestamp column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Timestamp}
           |value: 1998-06-03 01:23:45
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Timestamp.valueOf("1998-06-03 01:23:45"))
    }

    "read a String column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.String}
           |value: test
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, "test")
    }

    "read a Boolean column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Boolean}
           |value: true
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, true)
    }
  }
}

Source File: SchemaColumnSequentialTest.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class SchemaColumnSequentialTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnSequentialProtocol._
  import net.jcazevedo.moultingyaml._

  val name = "test"
  val column_type = "Sequential"

  val baseString =
    s"""name: $name
       |column_type: $column_type
    """.stripMargin

  "SchemaColumnSequential" must {
    "read an Int column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Int}
           |start: 1
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1, 1)
    }

    "read a Long column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Long}
           |start: 1
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1l, 1l)
    }

    "read a Float column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Float}
           |start: 1.0
           |step: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1f, 1f)
    }

    "read a Double column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Double}
           |start: 1.0
           |step: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1d, 1d)
    }

    "read a Date column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Date}
           |start: 1998-06-03
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Date.valueOf("1998-06-03"), 1)
    }

    "read a Timestamp column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Timestamp}
           |start: 1998-06-03 01:23:45
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Timestamp.valueOf("1998-06-03 01:23:45"), 1)
    }
  }
}

Source File: ArgsParserTest.scala From data-faker with MIT License

5 votes

package com.dunnhumby.datafaker

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class ArgsParserTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._
  import net.jcazevedo.moultingyaml._

  "ArgsParser" must {
    "accepts --file arg" in {
      ArgsParser.parseArgs(List("--file", "test")) mustBe Map("file" -> "test")
    }

    "accepts --database arg" in {
      ArgsParser.parseArgs(List("--database", "test")) mustBe Map("database" -> "test")
    }
  }
}

Source File: literals.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types._

object Literal {
  def apply(v: Any): Literal = v match {
    case i: Int => Literal(i, IntegerType)
    case l: Long => Literal(l, LongType)
    case d: Double => Literal(d, DoubleType)
    case f: Float => Literal(f, FloatType)
    case b: Byte => Literal(b, ByteType)
    case s: Short => Literal(s, ShortType)
    case s: String => Literal(UTF8String(s), StringType)
    case b: Boolean => Literal(b, BooleanType)
    case d: BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
    case d: java.math.BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
    case d: Decimal => Literal(d, DecimalType.Unlimited)
    case t: Timestamp => Literal(t, TimestampType)
    case d: Date => Literal(DateUtils.fromJavaDate(d), DateType)
    case a: Array[Byte] => Literal(a, BinaryType)
    case null => Literal(null, NullType)
    case _ =>
      throw new RuntimeException("Unsupported literal type " + v.getClass + " " + v)
  }

  def create(v: Any, dataType: DataType): Literal = {
    Literal(CatalystTypeConverters.convertToCatalyst(v), dataType)
  }
}


case class Literal protected (value: Any, dataType: DataType) extends LeafExpression {

  override def foldable: Boolean = true
  override def nullable: Boolean = value == null

  override def toString: String = if (value != null) value.toString else "null"

  type EvaluatedType = Any
  override def eval(input: Row): Any = value
}

// TODO: Specialize
case class MutableLiteral(var value: Any, dataType: DataType, nullable: Boolean = true)
    extends LeafExpression {
  type EvaluatedType = Any

  def update(expression: Expression, input: Row): Unit = {
    value = expression.eval(input)
  }

  override def eval(input: Row): Any = value
}

Source File: DataFrameDateSuite.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.sql.{Date, Timestamp}

class DataFrameDateTimeSuite extends QueryTest {

  private lazy val ctx = org.apache.spark.sql.test.TestSQLContext
  import ctx.implicits._

  test("timestamp comparison with date strings") {
    val df = Seq(
      (1, Timestamp.valueOf("2015-01-01 00:00:00")),
      (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t")

    checkAnswer(
      df.select("t").filter($"t" <= "2014-06-01"),
      Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil)


    checkAnswer(
      df.select("t").filter($"t" >= "2014-06-01"),
      Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil)
  }

  test("date comparison with date strings") {
    val df = Seq(
      (1, Date.valueOf("2015-01-01")),
      (2, Date.valueOf("2014-01-01"))).toDF("i", "t")

    checkAnswer(
      df.select("t").filter($"t" <= "2014-06-01"),
      Row(Date.valueOf("2014-01-01")) :: Nil)


    checkAnswer(
      df.select("t").filter($"t" >= "2015"),
      Row(Date.valueOf("2015-01-01")) :: Nil)
  }
}

Source File: ColumnarTestUtils.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.sql.columnar

import java.sql.Timestamp

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types.{UTF8String, DataType, Decimal, AtomicType}

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case STRING => UTF8String(Random.nextString(Random.nextInt(32)))
      case BOOLEAN => Random.nextBoolean()
      case BINARY => randomBytes(Random.nextInt(32))
      case DATE => Random.nextInt()
      case TIMESTAMP =>
        val timestamp = new Timestamp(Random.nextLong())
        timestamp.setNanos(Random.nextInt(999999999))
        timestamp
      case _ =>
        // Using a random one-element map instead of an arbitrary object
        Map(Random.nextInt() -> Random.nextString(Random.nextInt(32)))
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[T <: DataType, JvmType](
      columnType: ColumnType[T, JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Row = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Row = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericMutableRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: ArrayEncoders.scala From quill with Apache License 2.0

5 votes

package io.getquill.context.jasync

import java.sql.Timestamp
import java.time.LocalDate
import java.util.Date

import io.getquill.PostgresJAsyncContext
import io.getquill.context.sql.encoding.ArrayEncoding
import org.joda.time.{ DateTime => JodaDateTime, LocalDate => JodaLocalDate, LocalDateTime => JodaLocalDateTime }

trait ArrayEncoders extends ArrayEncoding {
  self: PostgresJAsyncContext[_] =>

  implicit def arrayStringEncoder[Col <: Seq[String]]: Encoder[Col] = arrayRawEncoder[String, Col]
  implicit def arrayBigDecimalEncoder[Col <: Seq[BigDecimal]]: Encoder[Col] = arrayRawEncoder[BigDecimal, Col]
  implicit def arrayBooleanEncoder[Col <: Seq[Boolean]]: Encoder[Col] = arrayRawEncoder[Boolean, Col]
  implicit def arrayByteEncoder[Col <: Seq[Byte]]: Encoder[Col] = arrayRawEncoder[Byte, Col]
  implicit def arrayShortEncoder[Col <: Seq[Short]]: Encoder[Col] = arrayRawEncoder[Short, Col]
  implicit def arrayIntEncoder[Col <: Seq[Index]]: Encoder[Col] = arrayRawEncoder[Index, Col]
  implicit def arrayLongEncoder[Col <: Seq[Long]]: Encoder[Col] = arrayRawEncoder[Long, Col]
  implicit def arrayFloatEncoder[Col <: Seq[Float]]: Encoder[Col] = arrayRawEncoder[Float, Col]
  implicit def arrayDoubleEncoder[Col <: Seq[Double]]: Encoder[Col] = arrayRawEncoder[Double, Col]
  implicit def arrayDateEncoder[Col <: Seq[Date]]: Encoder[Col] = arrayEncoder[Date, Col](d => Timestamp.from(d.toInstant))
  implicit def arrayJodaDateTimeEncoder[Col <: Seq[JodaDateTime]]: Encoder[Col] = arrayEncoder[JodaDateTime, Col](_.toLocalDateTime)
  implicit def arrayJodaLocalDateTimeEncoder[Col <: Seq[JodaLocalDateTime]]: Encoder[Col] = arrayRawEncoder[JodaLocalDateTime, Col]
  implicit def arrayJodaLocalDateEncoder[Col <: Seq[JodaLocalDate]]: Encoder[Col] = arrayRawEncoder[JodaLocalDate, Col]
  implicit def arrayLocalDateEncoder[Col <: Seq[LocalDate]]: Encoder[Col] = arrayEncoder[LocalDate, Col](encodeLocalDate.f)

  def arrayEncoder[T, Col <: Seq[T]](mapper: T => Any): Encoder[Col] =
    encoder[Col]((col: Col) => col.toIndexedSeq.map(mapper).mkString("{", ",", "}"), SqlTypes.ARRAY)

  def arrayRawEncoder[T, Col <: Seq[T]]: Encoder[Col] = arrayEncoder[T, Col](identity)

}

Source File: FinagleMysqlEncoders.scala From quill with Apache License 2.0

5 votes

package io.getquill.context.finagle.mysql

import java.sql.Timestamp
import java.time.{ LocalDate, LocalDateTime }
import java.util.{ Date, UUID }

import com.twitter.finagle.mysql.CanBeParameter._
import com.twitter.finagle.mysql.Parameter.wrap
import com.twitter.finagle.mysql._
import io.getquill.FinagleMysqlContext

trait FinagleMysqlEncoders {
  this: FinagleMysqlContext[_] =>

  type Encoder[T] = FinagleMySqlEncoder[T]

  case class FinagleMySqlEncoder[T](encoder: BaseEncoder[T]) extends BaseEncoder[T] {
    override def apply(index: Index, value: T, row: PrepareRow) =
      encoder(index, value, row)
  }

  def encoder[T](f: T => Parameter): Encoder[T] =
    FinagleMySqlEncoder((index, value, row) => row :+ f(value))

  def encoder[T](implicit cbp: CanBeParameter[T]): Encoder[T] =
    encoder[T]((v: T) => v: Parameter)

  private[this] val nullEncoder = encoder((_: Null) => Parameter.NullParameter)

  implicit def optionEncoder[T](implicit e: Encoder[T]): Encoder[Option[T]] =
    FinagleMySqlEncoder { (index, value, row) =>
      value match {
        case None    => nullEncoder.encoder(index, null, row)
        case Some(v) => e.encoder(index, v, row)
      }
    }

  implicit def mappedEncoder[I, O](implicit mapped: MappedEncoding[I, O], e: Encoder[O]): Encoder[I] =
    FinagleMySqlEncoder(mappedBaseEncoder(mapped, e.encoder))

  implicit val stringEncoder: Encoder[String] = encoder[String]
  implicit val bigDecimalEncoder: Encoder[BigDecimal] =
    encoder[BigDecimal] { (value: BigDecimal) =>
      BigDecimalValue(value): Parameter
    }
  implicit val booleanEncoder: Encoder[Boolean] = encoder[Boolean]
  implicit val byteEncoder: Encoder[Byte] = encoder[Byte]
  implicit val shortEncoder: Encoder[Short] = encoder[Short]
  implicit val intEncoder: Encoder[Int] = encoder[Int]
  implicit val longEncoder: Encoder[Long] = encoder[Long]
  implicit val floatEncoder: Encoder[Float] = encoder[Float]
  implicit val doubleEncoder: Encoder[Double] = encoder[Double]
  implicit val byteArrayEncoder: Encoder[Array[Byte]] = encoder[Array[Byte]]
  implicit val dateEncoder: Encoder[Date] = encoder[Date] {
    (value: Date) => timestampValue(new Timestamp(value.getTime)): Parameter
  }
  implicit val localDateEncoder: Encoder[LocalDate] = encoder[LocalDate] {
    (d: LocalDate) => DateValue(java.sql.Date.valueOf(d)): Parameter
  }
  implicit val localDateTimeEncoder: Encoder[LocalDateTime] = encoder[LocalDateTime] {
    (d: LocalDateTime) => timestampValue(new Timestamp(d.atZone(injectionTimeZone.toZoneId).toInstant.toEpochMilli)): Parameter
  }
  implicit val uuidEncoder: Encoder[UUID] = mappedEncoder(MappedEncoding(_.toString), stringEncoder)
}

Source File: ArrayDecoders.scala From quill with Apache License 2.0

5 votes

package io.getquill.context.jdbc

import java.sql.Timestamp
import java.time.LocalDate
import java.util.Date
import java.sql.{ Date => SqlDate }
import java.math.{ BigDecimal => JBigDecimal }

import io.getquill.context.sql.encoding.ArrayEncoding
import io.getquill.util.Messages.fail

import scala.collection.compat._
import scala.reflect.ClassTag

trait ArrayDecoders extends ArrayEncoding {
  self: JdbcContextBase[_, _] =>

  implicit def arrayStringDecoder[Col <: Seq[String]](implicit bf: CBF[String, Col]): Decoder[Col] = arrayRawDecoder[String, Col]
  implicit def arrayBigDecimalDecoder[Col <: Seq[BigDecimal]](implicit bf: CBF[BigDecimal, Col]): Decoder[Col] = arrayDecoder[JBigDecimal, BigDecimal, Col](BigDecimal.apply)
  implicit def arrayBooleanDecoder[Col <: Seq[Boolean]](implicit bf: CBF[Boolean, Col]): Decoder[Col] = arrayRawDecoder[Boolean, Col]
  implicit def arrayByteDecoder[Col <: Seq[Byte]](implicit bf: CBF[Byte, Col]): Decoder[Col] = arrayRawDecoder[Byte, Col]
  implicit def arrayShortDecoder[Col <: Seq[Short]](implicit bf: CBF[Short, Col]): Decoder[Col] = arrayRawDecoder[Short, Col]
  implicit def arrayIntDecoder[Col <: Seq[Int]](implicit bf: CBF[Int, Col]): Decoder[Col] = arrayRawDecoder[Int, Col]
  implicit def arrayLongDecoder[Col <: Seq[Long]](implicit bf: CBF[Long, Col]): Decoder[Col] = arrayRawDecoder[Long, Col]
  implicit def arrayFloatDecoder[Col <: Seq[Float]](implicit bf: CBF[Float, Col]): Decoder[Col] = arrayRawDecoder[Float, Col]
  implicit def arrayDoubleDecoder[Col <: Seq[Double]](implicit bf: CBF[Double, Col]): Decoder[Col] = arrayRawDecoder[Double, Col]
  implicit def arrayDateDecoder[Col <: Seq[Date]](implicit bf: CBF[Date, Col]): Decoder[Col] = arrayRawDecoder[Date, Col]
  implicit def arrayTimestampDecoder[Col <: Seq[Timestamp]](implicit bf: CBF[Timestamp, Col]): Decoder[Col] = arrayRawDecoder[Timestamp, Col]
  implicit def arrayLocalDateDecoder[Col <: Seq[LocalDate]](implicit bf: CBF[LocalDate, Col]): Decoder[Col] = arrayDecoder[SqlDate, LocalDate, Col](_.toLocalDate)

  
  def arrayRawDecoder[T: ClassTag, Col <: Seq[T]](implicit bf: CBF[T, Col]): Decoder[Col] =
    arrayDecoder[T, T, Col](identity)
}

Source File: Encoders.scala From quill with Apache License 2.0

5 votes

package io.getquill.context.jdbc

import java.sql.{ Date, Timestamp, Types }
import java.time.{ LocalDate, LocalDateTime }
import java.util.{ Calendar, TimeZone }
import java.{ sql, util }

trait Encoders {
  this: JdbcContextBase[_, _] =>

  type Encoder[T] = JdbcEncoder[T]

  protected val dateTimeZone = TimeZone.getDefault

  case class JdbcEncoder[T](sqlType: Int, encoder: BaseEncoder[T]) extends BaseEncoder[T] {
    override def apply(index: Index, value: T, row: PrepareRow) =
      encoder(index + 1, value, row)
  }

  def encoder[T](sqlType: Int, f: (Index, T, PrepareRow) => Unit): Encoder[T] =
    JdbcEncoder(sqlType, (index: Index, value: T, row: PrepareRow) => {
      f(index, value, row)
      row
    })

  def encoder[T](sqlType: Int, f: PrepareRow => (Index, T) => Unit): Encoder[T] =
    encoder(sqlType, (index: Index, value: T, row: PrepareRow) => f(row)(index, value))

  implicit def mappedEncoder[I, O](implicit mapped: MappedEncoding[I, O], e: Encoder[O]): Encoder[I] =
    JdbcEncoder(e.sqlType, mappedBaseEncoder(mapped, e.encoder))

  private[this] val nullEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setNull)

  implicit def optionEncoder[T](implicit d: Encoder[T]): Encoder[Option[T]] =
    JdbcEncoder(
      d.sqlType,
      (index, value, row) =>
        value match {
          case Some(v) => d.encoder(index, v, row)
          case None    => nullEncoder.encoder(index, d.sqlType, row)
        }
    )

  implicit val stringEncoder: Encoder[String] = encoder(Types.VARCHAR, _.setString)
  implicit val bigDecimalEncoder: Encoder[BigDecimal] =
    encoder(Types.NUMERIC, (index, value, row) => row.setBigDecimal(index, value.bigDecimal))
  implicit val byteEncoder: Encoder[Byte] = encoder(Types.TINYINT, _.setByte)
  implicit val shortEncoder: Encoder[Short] = encoder(Types.SMALLINT, _.setShort)
  implicit val intEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setInt)
  implicit val longEncoder: Encoder[Long] = encoder(Types.BIGINT, _.setLong)
  implicit val floatEncoder: Encoder[Float] = encoder(Types.FLOAT, _.setFloat)
  implicit val doubleEncoder: Encoder[Double] = encoder(Types.DOUBLE, _.setDouble)
  implicit val byteArrayEncoder: Encoder[Array[Byte]] = encoder(Types.VARBINARY, _.setBytes)
  implicit val dateEncoder: Encoder[util.Date] =
    encoder(Types.TIMESTAMP, (index, value, row) =>
      row.setTimestamp(index, new sql.Timestamp(value.getTime), Calendar.getInstance(dateTimeZone)))
  implicit val localDateEncoder: Encoder[LocalDate] =
    encoder(Types.DATE, (index, value, row) =>
      row.setDate(index, Date.valueOf(value), Calendar.getInstance(dateTimeZone)))
  implicit val localDateTimeEncoder: Encoder[LocalDateTime] =
    encoder(Types.TIMESTAMP, (index, value, row) =>
      row.setTimestamp(index, Timestamp.valueOf(value), Calendar.getInstance(dateTimeZone)))
}

Source File: ArrayJdbcEncodingSpec.scala From quill with Apache License 2.0

5 votes

package io.getquill.context.jdbc.postgres

import java.sql.Timestamp
import java.time.LocalDate
import java.util.UUID

import io.getquill.context.sql.encoding.ArrayEncodingBaseSpec
import io.getquill.{ Literal, PostgresJdbcContext }

class ArrayJdbcEncodingSpec extends ArrayEncodingBaseSpec {
  val ctx = testContext
  import ctx._

  val q = quote(query[ArraysTestEntity])
  val corrected = e.copy(timestamps = e.timestamps.map(d => new Timestamp(d.getTime)))

  "Support all sql base types and `Seq` implementers" in {
    ctx.run(q.insert(lift(corrected)))
    val actual = ctx.run(q).head
    actual mustEqual corrected
    baseEntityDeepCheck(actual, corrected)
  }

  "Support Seq encoding basing on MappedEncoding" in {
    val wrapQ = quote(querySchema[WrapEntity]("ArraysTestEntity"))
    ctx.run(wrapQ.insert(lift(wrapE)))
    ctx.run(wrapQ).head.texts mustBe wrapE.texts
  }

  "Timestamps" in {
    case class Timestamps(timestamps: List[Timestamp])
    val tE = Timestamps(List(new Timestamp(System.currentTimeMillis())))
    val tQ = quote(querySchema[Timestamps]("ArraysTestEntity"))
    ctx.run(tQ.insert(lift(tE)))
    ctx.run(tQ).head.timestamps mustBe tE.timestamps
  }

  "Catch invalid decoders" in {
    val newCtx = new PostgresJdbcContext(Literal, "testPostgresDB") {
      // avoid transforming from java.sql.Date to java.time.LocalDate
      override implicit def arrayLocalDateDecoder[Col <: Seq[LocalDate]](implicit bf: CBF[LocalDate, Col]): Decoder[Col] =
        arrayDecoder[LocalDate, LocalDate, Col](identity)
    }
    import newCtx._
    newCtx.run(query[ArraysTestEntity].insert(lift(corrected)))
    intercept[IllegalStateException] {
      newCtx.run(query[ArraysTestEntity]).head mustBe corrected
    }
    newCtx.close()
  }

  "Custom decoders/encoders" in {
    case class Entity(uuids: List[UUID])
    val e = Entity(List(UUID.randomUUID(), UUID.randomUUID()))
    val q = quote(querySchema[Entity]("ArraysTestEntity"))

    implicit def arrayUUIDEncoder[Col <: Seq[UUID]]: Encoder[Col] = arrayRawEncoder[UUID, Col]("uuid")
    implicit def arrayUUIDDecoder[Col <: Seq[UUID]](implicit bf: CBF[UUID, Col]): Decoder[Col] = arrayRawDecoder[UUID, Col]

    ctx.run(q.insert(lift(e)))
    ctx.run(q).head.uuids mustBe e.uuids
  }

  "Arrays in where clause" in {
    ctx.run(q.insert(lift(corrected)))
    val actual1 = ctx.run(q.filter(_.texts == lift(List("test"))))
    val actual2 = ctx.run(q.filter(_.texts == lift(List("test2"))))
    actual1 mustEqual List(corrected)
    actual2 mustEqual List()
  }

  "empty array on found null" in {
    case class ArraysTestEntity(texts: Option[List[String]])
    ctx.run(query[ArraysTestEntity].insert(lift(ArraysTestEntity(None))))

    case class E(texts: List[String])
    ctx.run(querySchema[E]("ArraysTestEntity")).headOption.map(_.texts) mustBe Some(Nil)
  }

  override protected def beforeEach(): Unit = {
    ctx.run(q.delete)
    ()
  }
}

Source File: DateTimeConverter.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.commons.datetime

import java.sql.Timestamp

import org.joda.time.format.{DateTimeFormatter, ISODateTimeFormat}
import org.joda.time.{DateTime, DateTimeZone}

trait DateTimeConverter {
  val zone: DateTimeZone = DateTimeZone.getDefault
  val dateTimeFormatter: DateTimeFormatter = ISODateTimeFormat.dateTime()
  def toString(dateTime: DateTime): String = dateTime.toString(dateTimeFormatter)
  def parseDateTime(s: String): DateTime = dateTimeFormatter.parseDateTime(s).withZone(zone)
  def parseTimestamp(s: String): Timestamp = new Timestamp(parseDateTime(s).getMillis)
  def now: DateTime = new DateTime(zone)
  def fromMillis(millis: Long): DateTime = new DateTime(zone).withMillis(millis)
  def dateTime(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, zone)
  def dateTimeFromUTC(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(
      year,
      monthOfyear,
      dayOfMonth,
      hourOfDay,
      minutesOfHour,
      secondsOfMinute,
      DateTimeZone.UTC).withZone(DateTimeConverter.zone)
}

object DateTimeConverter extends DateTimeConverter

Source File: CsvSchemaStringifierBeforeCsvWriting.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import ai.deepsense.commons.datetime.DateTimeConverter
import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import ai.deepsense.deeplang.doperations.exceptions.UnsupportedColumnTypeException


object CsvSchemaStringifierBeforeCsvWriting {

  def preprocess(dataFrame: DataFrame)
                (implicit context: ExecutionContext): DataFrame = {
    requireNoComplexTypes(dataFrame)

    val schema = dataFrame.sparkDataFrame.schema
    def stringifySelectedTypes(schema: StructType): StructType = {
      StructType(
        schema.map {
          case field: StructField => field.copy(dataType = StringType)
        }
      )
    }

    context.dataFrameBuilder.buildDataFrame(
      stringifySelectedTypes(schema),
      dataFrame.sparkDataFrame.rdd.map(stringifySelectedCells(schema)))
  }

  private def requireNoComplexTypes(dataFrame: DataFrame): Unit = {
    dataFrame.sparkDataFrame.schema.fields.map(structField =>
      (structField.dataType, structField.name)
    ).foreach {
      case (dataType, columnName) => dataType match {
        case _: ArrayType | _: MapType | _: StructType =>
          throw UnsupportedColumnTypeException(columnName, dataType)
        case _ => ()
      }
    }

  }

  private def stringifySelectedCells(originalSchema: StructType)(row: Row): Row = {
    Row.fromSeq(
      row.toSeq.zipWithIndex map { case (value, index) =>
        (value, originalSchema(index).dataType) match {
          case (null, _) => ""
          case (_, BooleanType) =>
            if (value.asInstanceOf[Boolean]) "1" else "0"
          case (_, TimestampType) =>
            DateTimeConverter.toString(
              DateTimeConverter.fromMillis(value.asInstanceOf[Timestamp].getTime))
          case (x, _) => value.toString
        }
      })
  }

}

Source File: WriteReadDataFrameWithDriverFilesIntegSpec.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalatest.BeforeAndAfter

import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import ai.deepsense.deeplang.doperations.inout._

class WriteReadDataFrameWithDriverFilesIntegSpec
  extends DeeplangIntegTestSupport
  with BeforeAndAfter with TestFiles {

  import DeeplangIntegTestSupport._

  val schema: StructType =
    StructType(Seq(
      StructField("boolean", BooleanType),
      StructField("double", DoubleType),
      StructField("string", StringType)
    ))

  val rows = {
    val base = Seq(
      Row(true, 0.45, "3.14"),
      Row(false, null, "\"testing...\""),
      Row(false, 3.14159, "Hello, world!"),
      // in case of CSV, an empty string is the same as null - no way around it
      Row(null, null, "")
    )
    val repeatedFewTimes = (1 to 10).flatMap(_ => base)
    repeatedFewTimes
  }

  lazy val dataFrame = createDataFrame(rows, schema)

  "WriteDataFrame and ReadDataFrame" should {
    "write and read CSV file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(
            new OutputStorageTypeChoice.File()
              .setOutputFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(
                new OutputFileFormatChoice.Csv()
                  .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Comma())
                  .setNamesIncluded(true)))
      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(
            new InputStorageTypeChoice.File()
              .setSourceFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(new InputFileFormatChoice.Csv()
                .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Comma())
                .setNamesIncluded(true)
                .setShouldConvertToBoolean(true)))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }

    "write and read JSON file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(new OutputStorageTypeChoice.File()
            .setOutputFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new OutputFileFormatChoice.Json()))

      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(new InputStorageTypeChoice.File()
            .setSourceFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new InputFileFormatChoice.Json()))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }
  }
}

Source File: DataFrameReportPerformanceSpec.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import ai.deepsense.commons.utils.{DoubleUtils, Logging}
import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report()
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

Source File: StatisticsForContinuousIntegSpec.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperables.dataframe.report.distribution

import java.sql.Timestamp

import org.apache.spark.rdd.RDD
import org.apache.spark.sql
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import ai.deepsense.commons.datetime.DateTimeConverter
import ai.deepsense.deeplang.DeeplangIntegTestSupport
import ai.deepsense.deeplang.doperables.dataframe.{DataFrame, DataFrameTestFactory}
import ai.deepsense.reportlib.model._

class StatisticsForContinuousIntegSpec extends DeeplangIntegTestSupport with DataFrameTestFactory {

  "Statistics (Min, max and mean values)" should {
    "be calculated for each continuous column in distribution" when {
      "data is of type int" in {
        val distribution = distributionForInt(1, 2, 3, 4, 5)
        distribution.statistics.min shouldEqual Some("1")
        distribution.statistics.max shouldEqual Some("5")
        distribution.statistics.mean shouldEqual Some("3")
      }
      "data is of type Timestamp" in {
        val distribution =
          distributionForTimestamps(new Timestamp(1000), new Timestamp(2000), new Timestamp(3000))
        distribution.statistics.min shouldEqual Some(formatDate(1000))
        distribution.statistics.max shouldEqual Some(formatDate(3000))
        distribution.statistics.mean shouldEqual Some(formatDate(2000))
      }
    }
  }
  "Null value in data" should {
    val distribution = distributionForDouble(1, 2, 3, 4, Double.NaN, 5)
    "not be skipped in calculating min and max" in {
      distribution.statistics.min shouldEqual Some("1")
      distribution.statistics.max shouldEqual Some("5")
    }
    "result in mean value NaN" in {
      distribution.statistics.mean shouldEqual Some("NaN")
    }
  }

  lazy val columnName = "column_name"

  private def distributionForDouble(data: Double*): ContinuousDistribution = {
    distributionFor(data, DoubleType)
  }

  private def distributionForInt(data: Int*): ContinuousDistribution = {
    distributionFor(data, IntegerType)
  }

  private def distributionForTimestamps(data: Timestamp*): ContinuousDistribution = {
    distributionFor(data, TimestampType)
  }

  private def distributionFor(data: Seq[Any], dataType: DataType): ContinuousDistribution = {
    val schema = StructType(Array(
      StructField(columnName, dataType)
    ))

    val rows = data.map(v => Row(v))
    val dataFrame = createDataFrame(rows, schema)

    val report = dataFrame.report()
    report.content.distributions(columnName).asInstanceOf[ContinuousDistribution]
  }

  def buildDataFrame(schema: StructType, data: RDD[Row]): DataFrame = {
    val dataFrame: sql.DataFrame = sparkSQLSession.createDataFrame(data, schema)
    DataFrame.fromSparkDataFrame(dataFrame)
  }

  def formatDate(millis: Long): String = {
    DateTimeConverter.toString(DateTimeConverter.fromMillis(millis))
  }

}

Source File: DateEncoderTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.record.encoder

import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime}

import com.sksamuel.avro4s.{AvroSchema, DefaultFieldMapper, Encoder, ImmutableRecord}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

//noinspection ScalaDeprecation
class DateEncoderTest extends AnyFunSuite with Matchers {

  test("encode LocalTime as TIME-MILLIS") {
    case class Foo(s: LocalTime)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(LocalTime.of(12, 50, 45))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(46245000000L)))
  }

  test("encode LocalDate as DATE") {
    case class Foo(s: LocalDate)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(LocalDate.of(2018, 9, 10))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784)))
  }

  test("encode java.sql.Date as DATE") {
    case class Foo(s: Date)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(Date.valueOf(LocalDate.of(2018, 9, 10)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784)))
  }

  test("encode LocalDateTime as timestamp-nanos") {
    case class Foo(s: LocalDateTime)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000000123L)))
    Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123009))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000123009L)))
    Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 328187943))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739328187943L)))
  }

  test("encode Timestamp as TIMESTAMP-MILLIS") {
    case class Foo(s: Timestamp)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(Timestamp.from(Instant.ofEpochMilli(1538312231000L)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L)))
  }

  test("encode Instant as TIMESTAMP-MILLIS") {
    case class Foo(s: Instant)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(Instant.ofEpochMilli(1538312231000L))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L)))
  }
}

Source File: DateDecoderTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.record.decoder

import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime}

import com.sksamuel.avro4s.SchemaFor.TimestampNanosLogicalType
import com.sksamuel.avro4s.{AvroSchema, Decoder, SchemaFor}
import org.apache.avro.generic.GenericData
import org.apache.avro.{LogicalTypes, SchemaBuilder}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

//noinspection ScalaDeprecation
class DateDecoderTest extends AnyFunSuite with Matchers {

  case class WithLocalTime(z: LocalTime)
  case class WithLocalDate(z: LocalDate)
  case class WithDate(z: Date)
  case class WithLocalDateTime(z: LocalDateTime)
  case class WithTimestamp(z: Timestamp)
  case class WithInstant(z: Instant)

  test("decode int to LocalTime") {
    val schema = AvroSchema[WithLocalTime]
    val record = new GenericData.Record(schema)
    record.put("z", 46245000000L)
    Decoder[WithLocalTime].decode(record) shouldBe WithLocalTime(LocalTime.of(12, 50, 45))
  }

  test("decode int to LocalDate") {
    val schema = AvroSchema[WithLocalDate]
    val record = new GenericData.Record(schema)
    record.put("z", 17784)
    Decoder[WithLocalDate].decode(record) shouldBe WithLocalDate(LocalDate.of(2018, 9, 10))
  }

  test("decode int to java.sql.Date") {
    val schema = AvroSchema[WithDate]
    val record = new GenericData.Record(schema)
    record.put("z", 17784)
    Decoder[WithDate].decode(record) shouldBe WithDate(Date.valueOf(LocalDate.of(2018, 9, 10)))
  }

  test("decode timestamp-millis to LocalDateTime") {
    val dateSchema = LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder.longType)
    val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord()
    val record = new GenericData.Record(schema)
    record.put("z", 1572707106376L)
    Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime(
      LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000000))
  }

  test("decode timestamp-micros to LocalDateTime") {
    val dateSchema = LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder.longType)
    val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord()
    val record = new GenericData.Record(schema)
    record.put("z", 1572707106376001L)
    Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime(
      LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376001000))
  }

  test("decode timestamp-nanos to LocalDateTime") {
    val dateSchema = TimestampNanosLogicalType.addToSchema(SchemaBuilder.builder.longType)
    val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord()
    val record = new GenericData.Record(schema)
    record.put("z", 1572707106376000002L)
    Decoder[WithLocalDateTime].decode(record) shouldBe WithLocalDateTime(
      LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000002))
  }

  test("decode long to Timestamp") {
    val schema = AvroSchema[WithTimestamp]
    val record = new GenericData.Record(schema)
    record.put("z", 1538312231000L)
    Decoder[WithTimestamp].decode(record) shouldBe WithTimestamp(new Timestamp(1538312231000L))
  }

  test("decode long to Instant") {
    val schema = AvroSchema[WithInstant]
    val record = new GenericData.Record(schema)
    record.put("z", 1538312231000L)
    Decoder[WithInstant].decode(record) shouldBe WithInstant(Instant.ofEpochMilli(1538312231000L))
  }
}

Source File: DateSchemaTest.scala From avro4s with Apache License 2.0

5 votes

package com.sksamuel.avro4s.schema

import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime}

import com.sksamuel.avro4s.AvroSchema
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class DateSchemaTest extends AnyFunSuite with Matchers {

  test("generate date logical type for LocalDate") {
    case class LocalDateTest(date: LocalDate)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdate.json"))
    val schema = AvroSchema[LocalDateTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate date logical type for Date") {
    case class DateTest(date: Date)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/date.json"))
    val schema = AvroSchema[DateTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate time logical type for LocalTime") {
    case class LocalTimeTest(time: LocalTime)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localtime.json"))
    val schema = AvroSchema[LocalTimeTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate timestamp-nanos for LocalDateTime") {
    case class LocalDateTimeTest(time: LocalDateTime)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdatetime.json"))
    val schema = AvroSchema[LocalDateTimeTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate timestamp-millis logical type for Instant") {
    case class InstantTest(instant: Instant)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/instant.json"))
    val schema = AvroSchema[InstantTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate timestamp-millis logical type for Timestamp") {
    case class TimestampTest(ts: Timestamp)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/timestamp.json"))
    val schema = AvroSchema[TimestampTest]
    schema.toString(true) shouldBe expected.toString(true)
  }
}

Source File: SparkUtilities.scala From spark-practice with MIT License

5 votes

package utilities

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import probelms.customerInsights.CIConstants


object SparkUtilities {

  def  getSparkContext(appName:String):SparkContext={
    val conf = new SparkConf().setAppName(appName).setMaster("local")
                             // .set("spark.serializer","spark.kryo.registrator")
    val sc = new SparkContext(conf)
    sc
  }

  def getSparkSession(appName:String):SparkSession={
    val spark = SparkSession.builder()
                            .appName(appName)
                            .master("local")
                          //  .config("spark.serializer","spark.kryo.registrator")
                            .getOrCreate()

    spark
  }

  def convertCurrencyToDouble(currency:String):Double={
    currency.stripPrefix("$").trim.toDouble
  }

  def getDate(date:String):Timestamp={
    new java.sql.Timestamp(CIConstants.formatter.parseDateTime(date).getMillis)
  }

}

Source File: StructuredNetworkWordCountWindowed.scala From Spark-2.3.1 with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println

Source File: QueryPartitionSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.hive

import java.io.File
import java.sql.Timestamp

import com.google.common.io.Files
import org.apache.hadoop.fs.FileSystem

import org.apache.spark.sql._
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.util.Utils

class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  import spark.implicits._

  test("SPARK-5068: query data when path doesn't exist") {
    withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "true")) {
      val testData = sparkContext.parallelize(
        (1 to 10).map(i => TestData(i, i.toString))).toDF()
      testData.createOrReplaceTempView("testData")

      val tmpDir = Files.createTempDir()
      // create the table for test
      sql(s"CREATE TABLE table_with_partition(key int,value string) " +
        s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') " +
        "SELECT key,value FROM testData")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='2') " +
        "SELECT key,value FROM testData")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='3') " +
        "SELECT key,value FROM testData")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') " +
        "SELECT key,value FROM testData")

      // test for the exist path
      checkAnswer(sql("select key,value from table_with_partition"),
        testData.toDF.collect ++ testData.toDF.collect
          ++ testData.toDF.collect ++ testData.toDF.collect)

      // delete the path of one partition
      tmpDir.listFiles
        .find { f => f.isDirectory && f.getName().startsWith("ds=") }
        .foreach { f => Utils.deleteRecursively(f) }

      // test for after delete the path
      checkAnswer(sql("select key,value from table_with_partition"),
        testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect)

      sql("DROP TABLE IF EXISTS table_with_partition")
      sql("DROP TABLE IF EXISTS createAndInsertTest")
    }
  }

  test("SPARK-21739: Cast expression should initialize timezoneId") {
    withTable("table_with_timestamp_partition") {
      sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)")
      sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " +
        "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)")

      // test for Cast expression in TableReader
      checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"),
        Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000"))))

      // test for Cast expression in HiveTableScanExec
      checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " +
        "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1))
    }
  }
}

Source File: TypeCast.scala From mimir with Apache License 2.0

5 votes

package mimir.exec.spark.datasource.google.spreadsheet

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheet] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
}

Source File: TestResultSetDataConverter.scala From ohara with Apache License 2.0

4 votes

package oharastream.ohara.connector.jdbc.source

import java.sql.{ResultSet, Time, Timestamp}

import oharastream.ohara.client.configurator.InspectApi.RdbColumn
import oharastream.ohara.common.rule.OharaTest
import oharastream.ohara.connector.jdbc.datatype.{MySQLDataTypeConverter, RDBDataTypeConverter}
import oharastream.ohara.connector.jdbc.util.{ColumnInfo, DateTimeUtils}
import org.junit.Test
import org.mockito.Mockito
import org.mockito.Mockito._
import org.scalatest.matchers.should.Matchers._

class TestResultSetDataConverter extends OharaTest {
  private[this] val VARCHAR: String   = "VARCHAR"
  private[this] val TIMESTAMP: String = "TIMESTAMP"
  private[this] val INT: String       = "INT"
  private[this] val DATE: String      = "DATE"
  private[this] val TIME: String      = "TIME"

  @Test
  def testConverterRecord(): Unit = {
    val resultSet: ResultSet = Mockito.mock(classOf[ResultSet])
    when(resultSet.getTimestamp("column1", DateTimeUtils.CALENDAR)).thenReturn(new Timestamp(0L))
    when(resultSet.getString("column2")).thenReturn("aaa")
    when(resultSet.getInt("column3")).thenReturn(10)

    val columnList = Seq(
      RdbColumn("column1", TIMESTAMP, true),
      RdbColumn("column2", VARCHAR, false),
      RdbColumn("column3", INT, false)
    )
    val dataTypeConverter: RDBDataTypeConverter = new MySQLDataTypeConverter()
    val result: Seq[ColumnInfo[_]]              = ResultSetDataConverter.converterRecord(dataTypeConverter, resultSet, columnList)
    result.head.columnName shouldBe "column1"
    result.head.columnType shouldBe TIMESTAMP
    result.head.value.toString shouldBe "1970-01-01 08:00:00.0"

    result(1).columnName shouldBe "column2"
    result(1).columnType shouldBe VARCHAR
    result(1).value shouldBe "aaa"

    result(2).columnName shouldBe "column3"
    result(2).columnType shouldBe INT
    result(2).value shouldBe 10
  }

  @Test
  def testNullValue(): Unit = {
    val resultSet: ResultSet = Mockito.mock(classOf[ResultSet])
    when(resultSet.getTimestamp("column1", DateTimeUtils.CALENDAR)).thenReturn(new Timestamp(0L))
    when(resultSet.getString("column2")).thenReturn(null)
    when(resultSet.getDate("column3")).thenReturn(null)
    when(resultSet.getTime("column4")).thenReturn(null)

    val columnList = Seq(
      RdbColumn("column1", TIMESTAMP, true),
      RdbColumn("column2", VARCHAR, false),
      RdbColumn("column3", DATE, false),
      RdbColumn("column4", TIME, false)
    )
    val dataTypeConverter: RDBDataTypeConverter = new MySQLDataTypeConverter()
    val result: Seq[ColumnInfo[_]]              = ResultSetDataConverter.converterRecord(dataTypeConverter, resultSet, columnList)
    result(1).columnName shouldBe "column2"
    result(1).columnType shouldBe VARCHAR
    result(1).value shouldBe "null"

    result(2).columnName shouldBe "column3"
    result(2).columnType shouldBe DATE
    result(2).value.toString shouldBe "1970-01-01"

    result(3).columnName shouldBe "column4"
    result(3).columnType shouldBe TIME
    result(3).value.toString shouldBe new Time(0).toString
  }
}

Source File: StreamingProducer.scala From Scala-Programming-Projects with MIT License

4 votes

package coinyser

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.TimeZone

import cats.effect.IO
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.pusher.client.Client
import com.pusher.client.channel.SubscriptionEventListener
import com.typesafe.scalalogging.StrictLogging

object StreamingProducer extends StrictLogging {

  def subscribe(pusher: Client)(onTradeReceived: String => Unit): IO[Unit] =
    for {
      _ <- IO(pusher.connect())
      channel <- IO(pusher.subscribe("live_trades"))

      _ <- IO(channel.bind("trade", new SubscriptionEventListener() {
        override def onEvent(channel: String, event: String, data: String): Unit = {
          logger.info(s"Received event: $event with data: $data")
          onTradeReceived(data)
        }
      }))
    } yield ()


  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    // Very important: the storage must be in UTC
    sdf.setTimeZone(TimeZone.getTimeZone("UTC"))
    m.setDateFormat(sdf)
  }

  def deserializeWebsocketTransaction(s: String): WebsocketTransaction =
    mapper.readValue(s, classOf[WebsocketTransaction])

  def convertWsTransaction(wsTx: WebsocketTransaction): Transaction =
    Transaction(
      timestamp = new Timestamp(wsTx.timestamp.toLong * 1000), tid = wsTx.id,
      price = wsTx.price, sell = wsTx.`type` == 1, amount = wsTx.amount)

  def serializeTransaction(tx: Transaction): String =
    mapper.writeValueAsString(tx)

}

Source File: StreamingPredictionsSpec.scala From odsc-east-realish-predictions with Apache License 2.0

4 votes

package com.twilio.open.odsc.realish

import java.sql.Timestamp
import java.time.Instant
import java.util.{Random, UUID}

import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoders, SQLContext, SparkSession}
import org.scalatest.{FunSuite, Matchers}
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.{OutputMode, Trigger}

import scala.concurrent.duration._

class StreamingPredictionsSpec extends FunSuite with Matchers with SharedSparkSql {

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("odsc-spark-utils")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.session.timeZone", "UTC")
  }

  final val notRandomRandom = {
    val generator = new Random
    generator.setSeed(100L)
    generator
  }

  test("should stream in some mock data for fun") {
    implicit val spark: SparkSession = sparkSql
    import spark.implicits._
    implicit val sqlContext: SQLContext = spark.sqlContext

    implicit val metricEncoder = Encoders.product[Metric]
    val metricData = MemoryStream[Metric]

    val startingInstant = Instant.now()

    val backingData = (1 to 10000).map(offset => {
      val metric = if (offset % 2 == 0) "loss_percentage" else "connect_duration"
      val nextLoss = notRandomRandom.nextDouble() * notRandomRandom.nextInt(100)
      Metric(
        Timestamp.from(startingInstant.minusSeconds(offset)),
        UUID.randomUUID().toString,
        metric,
        value = if (metric == "loss_percentage") nextLoss else notRandomRandom.nextDouble() * notRandomRandom.nextInt(240),
        countryCode = if (offset % 8 == 0) "US" else "BR",
        callDirection = if (metric == "loss_percentage") "inbound" else "outbound"
      )
    })
    val processingTimeTrigger = Trigger.ProcessingTime(2.seconds)


    val streamingQuery = metricData.toDF()
      .withWatermark("timestamp", "2 hours")
      .groupBy(col("metric"), col("countryCode"), window($"timestamp", "5 minutes"))
      .agg(
        min("value") as "min",
        avg("value") as "mean",
        max("value") as "max",
        count("*") as "total"
      )
      .writeStream
      .format("memory")
      .queryName("datastream")
      .outputMode(OutputMode.Append())
      .trigger(processingTimeTrigger)
      .start()

    metricData.addData(backingData)

    streamingQuery.processAllAvailable()

    spark.sql("select * from datastream").show(20, false)

    val checkChange = spark.sql("select * from datastream")
      .groupBy("metric","countryCode")
      .agg(
        sum("total") as "total",
        avg("mean") as "mean"
      )

    checkChange.show(20, false)

    // now can do interesting things with minor back tracking...

    streamingQuery.stop()

  }

}

java.sql.Timestamp Scala Examples