java.sql.Timestamp Scala Examples

The following examples show how to use java.sql.Timestamp. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkRandomGenDataIngress.scala    From pipelines-examples   with Apache License 2.0 5 votes vote down vote up
package pipelines.example

import java.sql.Timestamp

import scala.util.Random

import pipelines.streamlets.{ DurationConfigParameter, IntegerConfigParameter, StreamletShape }
import pipelines.streamlets.avro._
import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.{ OutputMode, Trigger }

import pipelines.spark.sql.SQLImplicits._

case class Rate(timestamp: Timestamp, value: Long)

class SparkRandomGenDataIngress extends SparkStreamlet {
  val out = AvroOutlet[Data]("out", d ⇒ d.src)
  val shape = StreamletShape(out)

  val RecordsPerSecond = IntegerConfigParameter(
    "records-per-second",
    "Records per second to produce.",
    Some(50))

  val RampUpTime = DurationConfigParameter(
    "ramp-up-time",
    "Time to reach max records per second.",
    Some("0 seconds"))

  override def configParameters = Vector(RecordsPerSecond, RampUpTime)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries = {
      writeStream(process, out, OutputMode.Append).toQueryExecution
    }

    private def process: Dataset[Data] = {

      val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
      val rampUpTime = context.streamletConfig.getDuration(RampUpTime.key, java.util.concurrent.TimeUnit.SECONDS)
      println(s"Using rampup time of $rampUpTime seconds")

      val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas"

      val rateStream = session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .option("rampUpTime", s"${rampUpTime}s")
        .load()
        .as[Rate]

      rateStream.map {
        case Rate(timestamp, value) ⇒ Data(s"src-${value % 1000}", timestamp.getTime, None, None, gaugeGen(), value)
      }
    }
  }
} 
Example 2
Source File: TransactionsFlowUnitTest.scala    From kafka-examples   with Apache License 2.0 5 votes vote down vote up
package com.cloudera.streaming.refapp

import java.sql.Timestamp

import org.scalatest.BeforeAndAfter

import org.apache.spark.sql.execution.streaming.MemoryStream

class TransactionsFlowUnitTest extends UnitTestBase with BeforeAndAfter {
  import testImplicits._

  var transactionsFromStream: MemoryStream[Transaction] = _
  var transactiosnFlow: TransactionsFlow = _

  before {
    transactionsFromStream = MemoryStream[Transaction]
    transactiosnFlow = new TransactionsFlow(
      spark,
      statesFromCluster,
      customersFromCluster,
      vendorsFromCluster,
      transactionsFromStream = transactionsFromStream
        .toDF.withColumn("timestamp", $"event_timestamp".cast("timestamp")))
  }

  test("Valid records are written to the validTransactions output") {

    val validTransaction = Transaction(
      transaction_id = "1",
      customer_id = Some(1),
      vendor_id = Some(1),
      event_state = Some("CREATED"),
      event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
      price = Some("100"),
      card_type = Some("Credit"))

    testStream(transactiosnFlow.validTransactions.select('transaction_id, 'customer_id, 'vendor_id, 'event_state, 'event_timestamp, 'price, 'card_type)) (
      AddData(transactionsFromStream, validTransaction),
      CheckAnswer(validTransaction)
    )
  }

  test("Invalid records are written to the invalidTransactions output") {
    // Note: transactionsFlow.validTransactions and invalidTransactions contain the fields that we used for internal calculations, e.g. for validation
    // It enables us to check the internal calculations
    testStream(transactiosnFlow.invalidTransactions.select('transaction_id, 'valid_card_type)) (
      AddData(transactionsFromStream,
        Transaction(
          transaction_id = "2",
          customer_id = Some(1),
          vendor_id = Some(1),
          event_state = Some("CREATED"),
          event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
          price = Some("100"),
          card_type = Some("Invalid"))),
      CheckAnswer(("2", false))
    )
  }

} 
Example 3
Source File: LocalIntegrationTest.scala    From kafka-examples   with Apache License 2.0 5 votes vote down vote up
package com.cloudera.streaming.refapp

import java.sql.Timestamp

import org.scalatest.Matchers._
import org.scalatest.concurrent.Eventually._
import org.scalatest.time.{Seconds, Span}

import org.apache.spark.sql.Encoders

class LocalIntegrationTest extends IntegrationTestBase {

  test("Integration test with one kafka and one spark instance embedded in the same JVM") {

    val inputDir = "src/test/resources/samples"

    val spark = EmbeddedSpark.sparkSession

    val fileSource = new FileSources(spark, inputDir)
    val kafkaConfig = EmbeddedKafkaBroker.defaultKafkaConfig
    val kafkaSource = new KafkaSource(spark, kafkaConfig)

    val application = new Application(
      spark,
      Sources(
        statesFromCluster = fileSource.jsonFile("states"),
        customersFromCluster = fileSource.jsonFile("customers"),
        vendorsFromCluster = fileSource.jsonFile("vendors"),
        customersFromStream = kafkaSource.jsonStreamWithKafkaTimestamp("customer"),
        vendorsFromStream = kafkaSource.jsonStreamWithTimestampFromMessage("vendor", "update_timestamp"),
        transactionsFromStream = kafkaSource.jsonStreamWithTimestampFromMessage("transaction", "event_timestamp")
      ),
      Sinks(
        invalidTransactions = Memory.memorySink("invalidTransactions"),
        validTransactions = Memory.memorySink("validTransactions"),
        customerOrphans = Memory.memorySink("customerOrphans"),
        vendorOrphans = Memory.memorySink("vendorOrphans"),
        customers = Memory.memorySink("customers"),
        vendors = Memory.memorySink("vendors"),
        transactionsOperationalMetadata = Memory.memorySink("transactionsOperationalMetadata")
      ))

    application.start()

    eventually(timeout(Span(20, Seconds)), interval(Span(5, Seconds))) {
      EmbeddedKafkaBroker.publishStringMessageToKafka(
        "transaction",
        """{
          "transaction_id": "1",
          "customer_id": 1,
          "vendor_id": 1,
          "event_state": "CREATED",
          "event_timestamp": "2018-11-12 09:42:00",
          "price": "100",
          "card_type": "Credit"}""")
      EmbeddedKafkaBroker.publishStringMessageToKafka(
        "transaction",
        """{
          "transaction_id": "21",
          "customer_id": 100,
          "vendor_id": 2,
          "event_state": "SWIPED",
          "event_timestamp": "2018-11-13 09:45:01",
          "price": "100",
          "card_type": "Debit"}""")

      val validTransactionsQuery = application.streamingQueries.validTransactions
      validTransactionsQuery.processAllAvailable()
      val currentContent = spark.table("validTransactions").as[Transaction](Encoders.product).collect()

      currentContent.shouldBe(
        Array(
          Transaction(
            transaction_id = "1",
            customer_id = Some(1),
            vendor_id = Some(1),
            event_state = Some("CREATED"),
            event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
            price = Some("100"),
            card_type = Some("Credit")),
          Transaction(
            transaction_id = "21",
            customer_id = Some(100),
            vendor_id = Some(2),
            event_state = Some("SWIPED"),
            event_timestamp = Timestamp.valueOf("2018-11-13 09:45:01"),
            price = Some("100"),
            card_type = Some("Debit"))
        ))
    }
  }
} 
Example 4
Source File: TypeCast.scala    From spark-google-spreadsheets   with Apache License 2.0 5 votes vote down vote up
package com.github.potix2.spark.google.spreadsheets.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheets] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
} 
Example 5
Source File: ProcessMarshaller.scala    From sundial   with MIT License 5 votes vote down vote up
package dao.postgres.marshalling

import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp}
import java.util.UUID
import dao.postgres.common.ProcessTable
import model.{Process, ProcessStatus}
import util.JdbcUtil._

object ProcessMarshaller {

  def unmarshalProcess(rs: ResultSet): Process = {
    import ProcessTable._
    Process(
      id = rs.getObject(COL_ID).asInstanceOf[UUID],
      processDefinitionName = rs.getString(COL_DEF_NAME),
      startedAt = javaDate(rs.getTimestamp(COL_STARTED)),
      status = rs.getString(COL_STATUS) match {
        case STATUS_SUCCEEDED =>
          ProcessStatus.Succeeded(javaDate(rs.getTimestamp(COL_ENDED_AT)))
        case STATUS_FAILED =>
          ProcessStatus.Failed(javaDate(rs.getTimestamp(COL_ENDED_AT)))
        case STATUS_RUNNING => ProcessStatus.Running()
      },
      taskFilter = getStringArray(rs, COL_TASK_FILTER)
    )
  }

  def marshalProcess(process: Process,
                     stmt: PreparedStatement,
                     columns: Seq[String],
                     startIndex: Int = 1)(implicit conn: Connection) = {
    import ProcessTable._
    var index = startIndex
    columns.foreach { col =>
      col match {
        case COL_ID => stmt.setObject(index, process.id)
        case COL_DEF_NAME =>
          stmt.setString(index, process.processDefinitionName)
        case COL_STARTED =>
          stmt.setTimestamp(index, new Timestamp(process.startedAt.getTime()))
        case COL_ENDED_AT =>
          stmt.setTimestamp(index, process.endedAt.getOrElse(null))
        case COL_STATUS =>
          stmt.setString(
            index,
            process.status match {
              case ProcessStatus.Succeeded(_) => STATUS_SUCCEEDED
              case ProcessStatus.Failed(_)    => STATUS_FAILED
              case ProcessStatus.Running()    => STATUS_RUNNING
            }
          )
        case COL_TASK_FILTER =>
          stmt.setArray(index,
                        process.taskFilter.map(makeStringArray).getOrElse(null))
      }
      index += 1
    }
  }

} 
Example 6
Source File: JdbcUtil.scala    From sundial   with MIT License 5 votes vote down vote up
package util

import java.sql.{Connection, Timestamp, ResultSet}
import java.util.Date
import scala.language.implicitConversions

object JdbcUtil {

  implicit def resultSetItr(resultSet: ResultSet): Stream[ResultSet] = {
    new Iterator[ResultSet] {
      def hasNext = resultSet.next()
      def next() = resultSet
    }.toStream
  }

  implicit def javaDate(ts: Timestamp): Date = {
    new Date(ts.getTime())
  }

  implicit def dateToTimestamp(date: Date) = {
    if (date != null)
      new Timestamp(date.getTime())
    else
      null
  }

  private def getNullable[T](rs: ResultSet, f: ResultSet => T): Option[T] = {
    val obj = f(rs)
    if (rs.wasNull()) {
      Option.empty
    } else {
      Some(obj)
    }
  }

  def getIntOption(rs: ResultSet, col: String) =
    getNullable(rs, rs => rs.getInt(col))

  def makeStringArray(seq: Seq[String])(implicit conn: Connection) = {
    conn.createArrayOf("varchar", seq.toArray[AnyRef])
  }

  def getStringArray(rs: ResultSet, col: String) = {
    Option(rs.getArray(col))
      .map(_.getArray().asInstanceOf[Array[String]].toList)
  }

} 
Example 7
Source File: SchedulerDataManager.scala    From cave   with MIT License 5 votes vote down vote up
package com.cave.metrics.data.postgresql

import java.sql.Timestamp

import com.cave.metrics.data.AwsConfig
import com.cave.metrics.data.postgresql.Tables._
import org.joda.time.format.DateTimeFormat
import org.joda.time.DateTime

import scala.slick.jdbc.{GetResult, StaticQuery => Q}
import scala.slick.driver.PostgresDriver.simple._

class SchedulerDataManager(awsConfig: AwsConfig) extends DatabaseConnection(awsConfig) {

  def leadershipTermTimeoutSeconds = awsConfig.leadershipTermTimeoutSeconds
  def leadershipTermLengthSeconds = awsConfig.leadershipTermLengthSeconds

  def DBDateTimeFormatter = DateTimeFormat.forPattern("YYYY-MM-dd HH:mm:ss Z")

  implicit val getSchedulersResult = GetResult(r => SchedulersRow(r.<<, r.<<, r.<<))

  
  def takeLeadership(hostname: String): Boolean = {
    db.withTransaction { implicit session =>
      val termTimeout = new DateTime().minusSeconds(leadershipTermTimeoutSeconds)
      val timeoutSql = DBDateTimeFormatter.print(termTimeout)
      val sql = s"BEGIN; SELECT * FROM schedulers WHERE created_at < '$timeoutSql' FOR UPDATE"
      val query = Q.queryNA[SchedulersRow](sql)

      def updateTimestamp(): Boolean = Schedulers.filter(_.createdAt < new Timestamp(termTimeout.getMillis))
        .map(s => (s.name, s.createdAt)).update(hostname, new Timestamp(System.currentTimeMillis())) == 1

      try {
        query.list.length == 1 &&
          (updateTimestamp() || {
            session.rollback()
            false
          })
      } catch {
        case e: Exception =>
          log.error(e)
          session.rollback()
          false
      }
    }
  }
} 
Example 8
Source File: SchedulerDataManagerSpec.scala    From cave   with MIT License 5 votes vote down vote up
package com.cave.metrics.data.postgresql

import java.sql.Timestamp

import com.cave.metrics.data.postgresql.Tables._
import org.joda.time.format.DateTimeFormat
import org.scalatest.BeforeAndAfter
import scala.slick.driver.H2Driver.simple._
import scala.slick.jdbc.StaticQuery

class SchedulerDataManagerSpec extends AbstractDataManagerSpec with BeforeAndAfter {
  val hostname_1 = "host1"
  val hostname_2 = "host2"
  val hostname_3 = "host3"

  var dm: SchedulerDataManager = _

  before {
    dm = new SchedulerDataManager(awsConfig) {
      override def DBDateTimeFormatter = DateTimeFormat.forPattern("YYYY-MM-dd HH:mm:ss")

      override def leadershipTermTimeoutSeconds = 30
    }
    Schedulers += SchedulersRow(1, "initialValue", new Timestamp(System.currentTimeMillis() - 1000 * 60))
  }

  "Scheduler Data Manager" should "update Schedulers table" in {
    Schedulers.list.head.name should be("initialValue")

    assert(dm.takeLeadership(hostname_1), "Expected success")
    Schedulers.list.head.name should be(hostname_1)

    assert(dm.takeLeadership(hostname_3) == false, "Expected success")
    Schedulers.list.head.name should be(hostname_1)

    assert(dm.extendLeadership(hostname_2) == false, "Expected success")
    Schedulers.list.head.name should be(hostname_1)

    Thread.sleep(1500)
    assert(dm.extendLeadership(hostname_1), "A-hostname was not able to extend its leadership")
    Schedulers.list.head.name should be(hostname_1)
  }

  it should "not update the leader if one is active" in {
    StaticQuery.queryNA("truncate table SCHEDULERS").execute
    Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 20))
    Schedulers.list.length should be(1)
    assert(!dm.takeLeadership(hostname_2), "Expected failure")
    Schedulers.list.head.name should be(hostname_1)

    Thread.sleep(100)
    assert(dm.extendLeadership(hostname_1), "Expected success")
    Schedulers.list.head.name should be(hostname_1)

  }

  it should "not give leadership to host3 when host2 is the leader" in {
    StaticQuery.queryNA("truncate table SCHEDULERS").execute
    Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 31))
    Schedulers.list.length should be(1)
    assert(dm.takeLeadership(hostname_2), "Expected success")
    Schedulers.list.head.name should be(hostname_2)


    assert(!dm.takeLeadership(hostname_3), "Expected failure")
    Schedulers.list.head.name should be(hostname_2)

    assert(!dm.takeLeadership(hostname_1), "Expected failure")
    Schedulers.list.head.name should be(hostname_2)
  }

  it should "be thread safe" in {
    StaticQuery.queryNA("truncate table SCHEDULERS").execute
    Schedulers.list.length should be(0)
    Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 360))
    Schedulers.list.length should be(1)
    Schedulers.list.head.name should be(hostname_1)

    import scala.slick.jdbc.{GetResult, StaticQuery => Q}

    
    val sql = s"BEGIN; select * from SCHEDULERS FOR UPDATE"
    val query = Q.queryNA[SchedulersRow](sql)
    query.list.length should be(1)


    assert(!dm.takeLeadership(hostname_1), "Expected failure")
    assert(!dm.takeLeadership(hostname_2), "Expected failure")
    assert(!dm.takeLeadership(hostname_3), "Expected failure")
    assert(!dm.extendLeadership(hostname_1), "Expected failure")
    assert(!dm.extendLeadership(hostname_2), "Expected failure")
    assert(!dm.extendLeadership(hostname_3), "Expected failure")

    Schedulers.list.head.name should be(hostname_1)
  }
} 
Example 9
Source File: TimeColumnBuffer.scala    From spark-vector   with Apache License 2.0 5 votes vote down vote up
package com.actian.spark_vector.colbuffer.time

import java.nio.ByteBuffer
import java.sql.Timestamp
import java.util.{ Calendar, TimeZone }

import org.apache.spark.sql.catalyst.util.DateTimeUtils

import com.actian.spark_vector.ComposePartial
import com.actian.spark_vector.colbuffer._
import com.actian.spark_vector.colbuffer.util._
import com.actian.spark_vector.vector.VectorDataType

private case class TimeColumnBufferParams(cbParams: ColumnBufferBuildParams,
  converter: TimeConversion.TimeConverter,
  adjustToUTC: Boolean = false)

private[colbuffer] abstract class TimeColumnBuffer(p: TimeColumnBufferParams, valueWidth: Int)
    extends ColumnBuffer[Timestamp, Long](p.cbParams.name, p.cbParams.maxValueCount, valueWidth, valueWidth, p.cbParams.nullable) {
  private val ts = new Timestamp(System.currentTimeMillis())
  private val cal = Calendar.getInstance

  override def put(source: Timestamp, buffer: ByteBuffer): Unit = {
    if (p.adjustToUTC) {
      TimeConversion.convertLocalTimestampToUTC(source, cal)
    }
    val convertedSource = p.converter.convert(TimeConversion.normalizeTime(source), p.cbParams.scale)
    putConverted(convertedSource, buffer)
  }

  protected def putConverted(converted: Long, buffer: ByteBuffer): Unit

  override def get(buffer: ByteBuffer): Long = {
    val deconvertedSource = p.converter.deconvert(getConverted(buffer), p.cbParams.scale)
    ts.setTime(TimeConversion.scaleNanos(deconvertedSource, MillisecondsScale))
    ts.setNanos((deconvertedSource % PowersOfTen(NanosecondsScale)).toInt)
    if (p.adjustToUTC) {
      TimeConversion.convertUTCToLocalTimestamp(ts, cal)
    }
    DateTimeUtils.fromJavaTimestamp(ts)
  }

  protected def getConverted(buffer: ByteBuffer): Long
}

private class TimeIntColumnBuffer(p: TimeColumnBufferParams) extends TimeColumnBuffer(p, IntSize) {
  override protected def putConverted(converted: Long, buffer: ByteBuffer): Unit = buffer.putInt(converted.toInt)

  override protected def getConverted(buffer: ByteBuffer): Long = buffer.getInt()
}

private class TimeLongColumnBuffer(p: TimeColumnBufferParams) extends TimeColumnBuffer(p, LongSize) {
  override protected def putConverted(converted: Long, buffer: ByteBuffer): Unit = buffer.putLong(converted)

  override protected def getConverted(buffer: ByteBuffer): Long = buffer.getLong()
}

private class TimeNZLZConverter extends TimeConversion.TimeConverter {
  override def convert(unscaledNanos: Long, scale: Int): Long = TimeConversion.scaleNanos(unscaledNanos, scale)

  override def deconvert(scaledNanos: Long, scale: Int): Long = TimeConversion.unscaleNanos(scaledNanos, scale)
}

private class TimeTZConverter extends TimeConversion.TimeConverter {
  override def convert(unscaledNanos: Long, scale: Int): Long =
    (TimeConversion.scaleNanos(unscaledNanos, scale) << TimeMaskSize)

  override def deconvert(scaledNanos: Long, scale: Int): Long =
    TimeConversion.unscaleNanos(scaledNanos >> TimeMaskSize, scale)
}


private[colbuffer] object TimeColumnBuffer extends ColumnBufferBuilder {
  private final val (nzlzIntScaleBounds, nzlzLongScaleBounds) = ((0, 4), (5, 9))
  private final val (tzIntScaleBounds, tzLongScaleBounds) = ((0, 1), (2, 9))
  private val calIsNotUTC = Calendar.getInstance.getTimeZone != TimeZone.getTimeZone("UTC")

  private val buildNZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] =
    ofDataType(VectorDataType.TimeType) andThen { TimeColumnBufferParams(_, new TimeNZLZConverter(), calIsNotUTC) }

  private val buildLZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] =
    ofDataType(VectorDataType.TimeLTZType) andThen { TimeColumnBufferParams(_, new TimeNZLZConverter()) }

  private val buildNZLZ: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = (buildNZPartial orElse buildLZPartial) andThenPartial {
    case nzlz if isInBounds(nzlz.cbParams.scale, nzlzIntScaleBounds) => new TimeIntColumnBuffer(nzlz)
    case nzlz if isInBounds(nzlz.cbParams.scale, nzlzLongScaleBounds) => new TimeLongColumnBuffer(nzlz)
  }

  private val buildTZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] =
    ofDataType(VectorDataType.TimeTZType) andThen { TimeColumnBufferParams(_, new TimeTZConverter()) }

  private val buildTZ: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = buildTZPartial andThenPartial {
    case tz if isInBounds(tz.cbParams.scale, tzIntScaleBounds) => new TimeIntColumnBuffer(tz)
    case tz if isInBounds(tz.cbParams.scale, tzLongScaleBounds) => new TimeLongColumnBuffer(tz)
  }

  override private[colbuffer] val build: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = buildNZLZ orElse buildTZ
} 
Example 10
Source File: package.scala    From spark-vector   with Apache License 2.0 5 votes vote down vote up
package com.actian.spark_vector.colbuffer

import java.sql.Timestamp


package object util {
  // scalastyle:off magic.number
  final val PowersOfTen = Seq(1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000)
  final val SecondsBeforeEpoch = 62167219200L
  final val TimeMaskSize = 11
  final val SecondsInMinute = 60
  final val MinutesInHour = 60
  final val HoursInDay = 24
  final val SecondsInDay = SecondsInMinute * MinutesInHour * HoursInDay
  final val MillisecondsScale = 3
  final val MillisecondsInMinute = SecondsInMinute * PowersOfTen(MillisecondsScale)
  final val MillisecondsInHour = MinutesInHour * MillisecondsInMinute
  final val MillisecondsInDay = HoursInDay * MillisecondsInHour
  final val NanosecondsScale = 9
  final val NanosecondsInMinute = (MillisecondsInMinute.toLong * PowersOfTen(NanosecondsScale - MillisecondsScale))
  final val NanosecondsInHour = MinutesInHour * NanosecondsInMinute
  final val NanosecondsInDay = HoursInDay * NanosecondsInHour
  // scalastyle:on magic.number

  def floorDiv(x: Long, y: Long): Long = {
    val ret = x / y
    if (ret >= 0 || ret * y == x) ret else ret - 1
  }
} 
Example 11
Source File: PackageSpec.scala    From sparkpipe-core   with Apache License 2.0 5 votes vote down vote up
package software.uncharted.sparkpipe.ops.core.dataframe.temporal

import org.scalatest._
import software.uncharted.sparkpipe.Spark
import software.uncharted.sparkpipe.ops.core.rdd.toDF

import java.text.SimpleDateFormat
import java.sql.Timestamp

class PackageSpec extends FunSpec {
  describe("ops.core.dataframe.temporal") {
    val rdd = Spark.sc.parallelize(Seq(
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18").getTime), "2015-11-18", 1),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19").getTime), "2015-11-19", 2),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-20").getTime), "2015-11-20", 3),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21").getTime), "2015-11-21", 4)
    ))
    val df = toDF(Spark.sparkSession)(rdd)

    describe("#dateFilter()") {
      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21"),
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 3)
      }

      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range, specified using strings") {
        val df2 = dateFilter(
          "2015-11-19",
          "2015-11-20",
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 2)
      }

      it("should support filtering rows in an input DataFrame with a Timestamp timestamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-17"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18"),
          "_1"
        )(df)
        assert(df2.count == 1)
      }
    }

    describe("#parseDate()") {
      it("should facilitate converting a string timestamp column into a TimestampType and adding it as a new column") {
        val df2 = parseDate("_2", "new", "yyyy-MM-dd")(df)
        assert(df2.filter("new = _1").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }

    describe("#dateField()") {
      it("should facilitate extracting a single field from a Timestamp column, and placing it a new column") {
        val df2 = dateField("_1", "new", java.util.Calendar.YEAR)(df)
        assert(df2.filter("new = 2015").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }
  }
} 
Example 12
Source File: KafkaStructuredStreamingDemo.scala    From MaxCompute-Spark   with Apache License 2.0 5 votes vote down vote up
package com.aliyun.odps.spark.examples.structuredStreaming.kafka

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.window

object KafkaStructuredStreamingDemo{
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName("KafkaStreamingDemo")
      .getOrCreate()

    import spark.implicits._

    val df = spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", "localhost:9092")
      .option("subscribe", "topic")
      .load()

    
    // 请使用OSS作为Checkpoint存储
    val checkpointLocation3 = "oss://bucket/checkpoint3/"

    val windowedCountsWithWatermark = wordsWithTimestamp
      .withWatermark("timestamp", "5 seconds")
      .groupBy(
        window($"timestamp", "6 seconds", "3 seconds"),
        $"word"
      ).count()

    val query3 = windowedCountsWithWatermark.writeStream
      .outputMode("append")
      .format("console")
      .option("checkpointLocation", checkpointLocation3)
      .start()

    query3.awaitTermination()
  }
} 
Example 13
Source File: TimestampVectorWriter.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.landoop.streamreactor.connect.hive.orc.vectors

import java.sql.Timestamp

import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector

object TimestampVectorWriter extends OrcVectorWriter[TimestampColumnVector, Timestamp] {
  override def write(vector: TimestampColumnVector, offset: Int, value: Option[Timestamp]): Unit = {
    value match {
      case Some(ts) =>
        vector.set(offset, value.asInstanceOf[Timestamp])
      case _ =>
        vector.setNullValue(offset)
        vector.noNulls = false
        vector.isNull(offset) = true
    }
  }
} 
Example 14
Source File: CallRecordGeneratorIngress.scala    From pipelines-examples   with Apache License 2.0 5 votes vote down vote up
package pipelines.examples.carly.aggregator

import java.sql.Timestamp

import scala.util.Random
import scala.concurrent.duration._

import org.apache.spark.sql.{ Dataset, SparkSession }
import org.apache.spark.sql.streaming.OutputMode

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.LongType

import pipelines.streamlets._
import pipelines.streamlets.avro._
import pipelines.spark.sql.SQLImplicits._
import pipelines.examples.carly.data.CallRecord
import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.log4j.{ Level, Logger }

case class Rate(timestamp: Timestamp, value: Long)

class CallRecordGeneratorIngress extends SparkStreamlet {

  val rootLogger = Logger.getRootLogger()
  rootLogger.setLevel(Level.ERROR)

  val RecordsPerSecond = IntegerConfigParameter(
    "records-per-second",
    "Records per second to process.",
    Some(50))

  override def configParameters = Vector(RecordsPerSecond)

  val out = AvroOutlet[CallRecord]("out", _.user)
  val shape = StreamletShape(out)

  override def createLogic() = new SparkStreamletLogic {
    val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)
    override def buildStreamingQueries = {
      val outStream = DataGenerator.mkData(super.session, recordsPerSecond)
      writeStream(outStream, out, OutputMode.Append).toQueryExecution
    }
  }
}

object DataGenerator {
  def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = {
    // do we need to expose this through configuration?

    val MaxTime = 2.hours.toMillis
    val MaxUsers = 100000
    val TS0 = new java.sql.Timestamp(0)
    val ZeroTimestampProb = 0.05 // error rate

    // Random Data Generator
    val usersUdf = udf(() ⇒ "user-" + Random.nextInt(MaxUsers))
    val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing")

    // Time-biased randomized filter - 1/2 hour cycles
    val sinTime: Long ⇒ Double = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI)
    val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob
    val timeFilterUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng))
    val zeroTimestampUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ {
      if (rng < ZeroTimestampProb) {
        TS0
      } else {
        ts
      }
    })

    val rateStream = session.readStream
      .format("rate")
      .option("rowsPerSecond", recordsPerSecond)
      .load()
      .as[Rate]

    val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand())
    val sampledData = randomDataset.where(timeFilterUdf($"timestamp", $"rng"))
      .withColumn("user", usersUdf())
      .withColumn("other", usersUdf())
      .withColumn("direction", directionUdf())
      .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType))
      .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng"))
      .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp" as "timestamp")
      .as[CallRecord]
    sampledData
  }
} 
Example 15
Source File: SparkRandomGenDataIngress.scala    From pipelines-examples   with Apache License 2.0 5 votes vote down vote up
package pipelines.example

import java.sql.Timestamp

import scala.util.Random

import pipelines.streamlets.{ IntegerConfigParameter, StreamletShape }
import pipelines.streamlets.avro._
import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode

import pipelines.spark.sql.SQLImplicits._

case class Rate(timestamp: Timestamp, value: Long)

class SparkRandomGenDataIngress extends SparkStreamlet {
  val out = AvroOutlet[Data]("out", d ⇒ d.src)
  val shape = StreamletShape(out)

  val RecordsPerSecond = IntegerConfigParameter(
    "records-per-second",
    "Records per second to produce.",
    Some(50))

  override def configParameters = Vector(RecordsPerSecond)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries = {
      writeStream(process, out, OutputMode.Append).toQueryExecution
    }

    private def process: Dataset[Data] = {

      val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key)

      val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas"

      val rateStream = session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .load()
        .as[Rate]

      rateStream.map {
        case Rate(timestamp, value) ⇒ Data(s"src-${value % 100}", timestamp.getTime, gaugeGen(), Random.nextDouble() * value)
      }
    }
  }
} 
Example 16
Source File: PSetAny.scala    From yoda-orm   with MIT License 5 votes vote down vote up
package in.norbor.yoda.orm

import java.sql.{Blob, Timestamp}

import org.joda.time.DateTime


trait PSetAny {

  def set(p: PStatement, v: Any): PStatement = v match {
    case _: Boolean => p.setBoolean(v.asInstanceOf[Boolean])
    case _: Int => p.setInt(v.asInstanceOf[Int])
    case _: Long => p.setLong(v.asInstanceOf[Long])
    case _: Float => p.setDouble(v.asInstanceOf[Double])
    case _: Double => p.setDouble(v.asInstanceOf[Double])
    case _: String => p.setString(v.asInstanceOf[String])
    case _: Timestamp => p.setTimestamp(v.asInstanceOf[Timestamp])
    case _: DateTime => p.setDateTime(v.asInstanceOf[DateTime])
    case _: Blob => p.setBlob(v.asInstanceOf[Blob])
    case _: Array[Byte] => p.setBytes(v.asInstanceOf[Array[Byte]])
    case _ => p;
  }

} 
Example 17
Source File: PStatementTest.scala    From yoda-orm   with MIT License 5 votes vote down vote up
package in.norbor.yoda.orm

import java.sql.{Connection, DriverManager, ResultSet, Timestamp}

import com.typesafe.scalalogging.LazyLogging
import in.norbor.yoda.implicits.JavaSqlImprovement._
import mocks.People
import org.joda.time.DateTime
import org.scalatest.funsuite.AnyFunSuite


class PStatementTest extends AnyFunSuite {

  Class.forName("org.h2.Driver")

  private implicit val conn: Connection = DriverManager.getConnection("jdbc:h2:~/test", "sa", "")

  test("0) apply") {

    val ps = PStatement("SELECT 1")(conn)
    assert(ps !== null)

    ps.equals(null)
    ps.canEqual(null)
    ps.hashCode
    ps.toString
    ps.productPrefix
    ps.productArity
    ps.productElement(0)
    ps.productIterator
    ps.copy()
  }

  test("0) query") {

    PStatement("DROP TABLE IF EXISTS yoda_sql; CREATE TABLE yoda_sql (id INTEGER);")
      .update
  }

  test("0) update") {

    val rs = PStatement("""select 1""")
      .query

    assert(rs !== null)
  }

  test("0) queryOne with non index parameter") {

    val result = PStatement("""select ?, ?, ?, ?, ?, ?, ?, ?""")
      .setBoolean(true)
      .setInt(1)
      .setLong(1L)
      .setDouble(1)
      .setString("YO")
      .setDateTime(DateTime.now)
      .setTimestamp(new Timestamp(System.currentTimeMillis))
      .setTimestamp(null)
      .queryOne(parse)

    assert(result.head._1 === true)
  }

  test("3) queryList with parse method") {

    val peoples = PStatement("""select 1 as id, 'Peerapat' as name, now() as born;""")
      .queryList(parsePeople)

    assert(peoples.head.id === 1)
    assert(peoples.head.name === "Peerapat")
    assert(peoples.head.born.getMillis <= DateTime.now.getMillis)
  }

  test("5) batch") {

    val insert = PStatement("INSERT INTO yoda_sql VALUES(?)")
      .setInt(1)
      .addBatch()
      .setInt(2)
      .addBatch()
      .executeBatch

    assert(insert.length === 2)
  }


  private def parse(rs: ResultSet): (Boolean, Int, Long, Double, String, DateTime, Timestamp) = (rs.getBoolean(1)
    , rs.getInt(2)
    , rs.getLong(3)
    , rs.getDouble(4)
    , rs.getString(5)
    , rs.getDateTime(6)
    , rs.getTimestamp(7)
  )

  private def parsePeople(rs: ResultSet): People = People(id = rs.getLong("id")
    , name = rs.getString("name")
    , born = rs.getDateTime("born")
  )

} 
Example 18
Source File: DateTimeConverter.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.commons.datetime

import java.sql.Timestamp

import org.joda.time.format.{DateTimeFormatter, ISODateTimeFormat}
import org.joda.time.{DateTime, DateTimeZone}

trait DateTimeConverter {
  val zone: DateTimeZone = DateTimeZone.getDefault
  val dateTimeFormatter: DateTimeFormatter = ISODateTimeFormat.dateTime()
  def toString(dateTime: DateTime): String = dateTime.toString(dateTimeFormatter)
  def parseDateTime(s: String): DateTime = dateTimeFormatter.parseDateTime(s).withZone(zone)
  def parseTimestamp(s: String): Timestamp = new Timestamp(parseDateTime(s).getMillis)
  def now: DateTime = new DateTime(zone)
  def fromMillis(millis: Long): DateTime = new DateTime(zone).withMillis(millis)
  def dateTime(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, zone)
  def dateTimeFromUTC(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(
      year,
      monthOfyear,
      dayOfMonth,
      hourOfDay,
      minutesOfHour,
      secondsOfMinute,
      DateTimeZone.UTC).withZone(DateTimeConverter.zone)
}

object DateTimeConverter extends DateTimeConverter 
Example 19
Source File: CsvSchemaStringifierBeforeCsvWriting.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import io.deepsense.commons.datetime.DateTimeConverter
import io.deepsense.deeplang.ExecutionContext
import io.deepsense.deeplang.doperables.dataframe.DataFrame
import io.deepsense.deeplang.doperations.exceptions.UnsupportedColumnTypeException


object CsvSchemaStringifierBeforeCsvWriting {

  def preprocess(dataFrame: DataFrame)
                (implicit context: ExecutionContext): DataFrame = {
    requireNoComplexTypes(dataFrame)

    val schema = dataFrame.sparkDataFrame.schema
    def stringifySelectedTypes(schema: StructType): StructType = {
      StructType(
        schema.map {
          case field: StructField => field.copy(dataType = StringType)
        }
      )
    }

    context.dataFrameBuilder.buildDataFrame(
      stringifySelectedTypes(schema),
      dataFrame.sparkDataFrame.rdd.map(stringifySelectedCells(schema)))
  }

  private def requireNoComplexTypes(dataFrame: DataFrame): Unit = {
    dataFrame.sparkDataFrame.schema.fields.map(structField =>
      (structField.dataType, structField.name)
    ).foreach {
      case (dataType, columnName) => dataType match {
        case _: ArrayType | _: MapType | _: StructType =>
          throw UnsupportedColumnTypeException(columnName, dataType)
        case _ => ()
      }
    }

  }

  private def stringifySelectedCells(originalSchema: StructType)(row: Row): Row = {
    Row.fromSeq(
      row.toSeq.zipWithIndex map { case (value, index) =>
        (value, originalSchema(index).dataType) match {
          case (null, _) => ""
          case (_, BooleanType) =>
            if (value.asInstanceOf[Boolean]) "1" else "0"
          case (_, TimestampType) =>
            DateTimeConverter.toString(
              DateTimeConverter.fromMillis(value.asInstanceOf[Timestamp].getTime))
          case (x, _) => value.toString
        }
      })
  }

} 
Example 20
package io.deepsense.deeplang.doperations

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalatest.BeforeAndAfter

import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}
import io.deepsense.deeplang.doperables.dataframe.DataFrame
import io.deepsense.deeplang.doperations.inout._

class WriteReadDataFrameWithDriverFilesIntegSpec
  extends DeeplangIntegTestSupport
  with BeforeAndAfter with TestFiles {

  import DeeplangIntegTestSupport._

  val schema: StructType =
    StructType(Seq(
      StructField("boolean", BooleanType),
      StructField("double", DoubleType),
      StructField("string", StringType)
    ))

  val rows = {
    val base = Seq(
      Row(true, 0.45, "3.14"),
      Row(false, null, "\"testing...\""),
      Row(false, 3.14159, "Hello, world!"),
      // in case of CSV, an empty string is the same as null - no way around it
      Row(null, null, "")
    )
    val repeatedFewTimes = (1 to 10).flatMap(_ => base)
    repeatedFewTimes
  }

  lazy val dataFrame = createDataFrame(rows, schema)

  "WriteDataFrame and ReadDataFrame" should {
    "write and read CSV file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(
            new OutputStorageTypeChoice.File()
              .setOutputFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(
                new OutputFileFormatChoice.Csv()
                  .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Tab())
                  .setNamesIncluded(true)))
      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(
            new InputStorageTypeChoice.File()
              .setSourceFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(new InputFileFormatChoice.Csv()
                .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Tab())
                .setNamesIncluded(true)
                .setShouldConvertToBoolean(true)))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }

    "write and read JSON file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(new OutputStorageTypeChoice.File()
            .setOutputFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new OutputFileFormatChoice.Json()))

      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(new InputStorageTypeChoice.File()
            .setSourceFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new InputFileFormatChoice.Json()))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }
  }
} 
Example 21
Source File: DataFrameReportPerformanceSpec.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import io.deepsense.commons.utils.{DoubleUtils, Logging}
import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
} 
Example 22
Source File: StatisticsForContinuousIntegSpec.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperables.dataframe.report.distribution

import java.sql.Timestamp

import org.apache.spark.rdd.RDD
import org.apache.spark.sql
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import io.deepsense.commons.datetime.DateTimeConverter
import io.deepsense.deeplang.DeeplangIntegTestSupport
import io.deepsense.deeplang.doperables.dataframe.{DataFrame, DataFrameTestFactory}
import io.deepsense.reportlib.model._

class StatisticsForContinuousIntegSpec extends DeeplangIntegTestSupport with DataFrameTestFactory {

  "Statistics (Min, max and mean values)" should {
    "be calculated for each continuous column in distribution" when {
      "data is of type int" in {
        val distribution = distributionForInt(1, 2, 3, 4, 5)
        distribution.statistics.min shouldEqual Some("1")
        distribution.statistics.max shouldEqual Some("5")
        distribution.statistics.mean shouldEqual Some("3")
      }
      "data is of type Timestamp" in {
        val distribution =
          distributionForTimestamps(new Timestamp(1000), new Timestamp(2000), new Timestamp(3000))
        distribution.statistics.min shouldEqual Some(formatDate(1000))
        distribution.statistics.max shouldEqual Some(formatDate(3000))
        distribution.statistics.mean shouldEqual Some(formatDate(2000))
      }
    }
  }
  "Null value in data" should {
    val distribution = distributionForDouble(1, 2, 3, 4, Double.NaN, 5)
    "not be skipped in calculating min and max" in {
      distribution.statistics.min shouldEqual Some("1")
      distribution.statistics.max shouldEqual Some("5")
    }
    "result in mean value NaN" in {
      distribution.statistics.mean shouldEqual Some("NaN")
    }
  }

  lazy val columnName = "column_name"

  private def distributionForDouble(data: Double*): ContinuousDistribution = {
    distributionFor(data, DoubleType)
  }

  private def distributionForInt(data: Int*): ContinuousDistribution = {
    distributionFor(data, IntegerType)
  }

  private def distributionForTimestamps(data: Timestamp*): ContinuousDistribution = {
    distributionFor(data, TimestampType)
  }

  private def distributionFor(data: Seq[Any], dataType: DataType): ContinuousDistribution = {
    val schema = StructType(Array(
      StructField(columnName, dataType)
    ))

    val rows = data.map(v => Row(v))
    val dataFrame = createDataFrame(rows, schema)

    val report = dataFrame.report
    report.content.distributions(columnName).asInstanceOf[ContinuousDistribution]
  }

  def buildDataFrame(schema: StructType, data: RDD[Row]): DataFrame = {
    val dataFrame: sql.DataFrame = sparkSQLSession.createDataFrame(data, schema)
    DataFrame.fromSparkDataFrame(dataFrame)
  }

  def formatDate(millis: Long): String = {
    DateTimeConverter.toString(DateTimeConverter.fromMillis(millis))
  }

} 
Example 23
Source File: ParameterConversions.scala    From scruid   with Apache License 2.0 5 votes vote down vote up
package ing.wbaa.druid.sql

import java.sql.Timestamp
import java.time.{ Instant, LocalDate, LocalDateTime }

import scala.language.implicitConversions

import ing.wbaa.druid.{ DruidConfig, SQLQueryParameter, SQLQueryParameterType }

trait ParameterConversions {
  implicit def char2Param(v: Char): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Char, v.toString)

  implicit def string2Param(v: String): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Varchar, v)

  implicit def byte2Param(v: Byte): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Tinyint, v.toString)

  implicit def short2Param(v: Short): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Smallint, v.toString)

  implicit def int2Param(v: Int): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Integer, v.toString)

  implicit def long2Param(v: Long): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Bigint, v.toString)

  implicit def float2Param(v: Float): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Float, v.toString)

  implicit def double2Param(v: Double): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Double, v.toString)

  implicit def boolean2Param(v: Boolean): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Boolean, v.toString)

  implicit def localDate2Param(v: LocalDate)(implicit config: DruidConfig =
                                               DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Date, v.format(config.FormatterDate))

  implicit def localDateTime2Param(
      v: LocalDateTime
  )(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Timestamp, v.format(config.FormatterDateTime))

  implicit def timestamp2Param(v: Timestamp)(implicit config: DruidConfig =
                                               DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Timestamp, config.FormatterDateTime.format(v.toInstant))

  implicit def instant2Param(
      v: Instant
  )(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter =
    SQLQueryParameter(SQLQueryParameterType.Timestamp, config.FormatterDateTime.format(v))
} 
Example 24
Source File: CreateOps.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package services.folder.create

import java.util.{Date, UUID}
import java.sql.Timestamp
import org.jooq.DSLContext
import scala.concurrent.Future
import services.{PublicAccess, SharingLevel}
import services.folder.FolderService
import services.generated.Tables.{FOLDER, FOLDER_ASSOCIATION, SHARING_POLICY}
import services.generated.tables.records.{FolderRecord, FolderAssociationRecord, SharingPolicyRecord}

trait CreateOps { self: FolderService => 

  def createFolder(owner: String, title: String, parent: Option[UUID]): Future[FolderRecord] = 
    db.withTransaction { sql => 
      val folder = new FolderRecord(UUID.randomUUID, owner, title, optUUID(parent), null, PublicAccess.PRIVATE.toString, null)
      sql.insertInto(FOLDER).set(folder).execute()
      folder
    }

  private def insertAssociation(documentId: String, folderId: UUID, sql: DSLContext) = {
    val association = new FolderAssociationRecord(folderId, documentId)
    sql.insertInto(FOLDER_ASSOCIATION).set(association).execute()
    association
  }

  
  def moveDocumentToFolder(documentId: String, folderId: UUID) =
    db.withTransaction { sql => 
      sql.deleteFrom(FOLDER_ASSOCIATION)
         .where(FOLDER_ASSOCIATION.DOCUMENT_ID.equal(documentId))
         .execute

      insertAssociation(documentId, folderId, sql)
    }

  def moveDocumentToRoot(documentId: String) = db.withTransaction { sql => 
    sql.deleteFrom(FOLDER_ASSOCIATION)
       .where(FOLDER_ASSOCIATION.DOCUMENT_ID.equal(documentId))
       .execute == 1
  }

  def addCollaborator(folderId: UUID, sharedBy: String, sharedWith: String, level: SharingLevel) = 
    db.query { sql => 
      val existing = sql.selectFrom(SHARING_POLICY)
        .where(SHARING_POLICY.FOLDER_ID.equal(folderId)
          .and(SHARING_POLICY.SHARED_WITH.equal(sharedWith))).fetchOne 

      val policy = Option(existing) match {
        case Some(policy) =>
          policy.setSharedBy(sharedBy)
          policy.setSharedAt(new Timestamp(new Date().getTime))
          policy.setAccessLevel(level.toString)
          policy

        case None => 
          val policy = new SharingPolicyRecord(
            null, // auto-inc id
            folderId,
            null, // document_id
            sharedBy,
            sharedWith,
            new Timestamp(new Date().getTime),
            level.toString)

          policy.changed(SHARING_POLICY.ID, false)     
          sql.attach(policy)
          policy
      }
      
      policy.store() == 1
    }

} 
Example 25
Source File: A_1_WindowOperation.scala    From wow-spark   with MIT License 5 votes vote down vote up
package com.sev7e0.wow.structured_streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.OutputMode

object A_1_WindowOperation {

  def main(args: Array[String]): Unit = {

    if (args.length < 3) {
      println(s" Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<滑动间隔> 必须要小于或等于 <窗口间隔>")
    }

    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession.builder()
      .master("local")
      .appName(A_1_WindowOperation.getClass.getName)
      .getOrCreate()
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .load()
    import spark.implicits._

    val words = lines.as[(String, Timestamp)]
      .flatMap(line => line._1.split(" ").map(word => (word, line._2))).toDF()

    val windowCount = words.groupBy(
      window($"timestamp", windowDuration, slideDuration)
      , $"word").count().orderBy("window")

    val query = windowCount.writeStream
      .outputMode(OutputMode.Complete())
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()


  }
} 
Example 26
Source File: A_1_BasicOperation.scala    From wow-spark   with MIT License 5 votes vote down vote up
package com.sev7e0.wow.structured_streaming

import java.sql.Timestamp

import org.apache.spark.sql.types.{BooleanType, StringType, StructType, TimestampType}
import org.apache.spark.sql.{Dataset, SparkSession}

object A_1_BasicOperation {

  //DateTime要使用Timestamp  case类必须使用java.sql。在catalyst中作为TimestampType调用的时间戳
  case class DeviceData(device: String, deviceType: String, signal: Double, time: Timestamp)

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName(A_1_BasicOperation.getClass.getName)
      .master("local")
      .getOrCreate()
    val timeStructType = new StructType().add("device", StringType)
      .add("deviceType", StringType)
      .add("signal", BooleanType)
      .add("time", TimestampType)

    val dataFrame = spark.read.json("src/main/resources/sparkresource/device.json")
    import spark.implicits._
    val ds: Dataset[DeviceData] = dataFrame.as[DeviceData]

    //使用无类型方式查询,类sql
    dataFrame.select("device").where("signal>10").show()
    //使用有类型方式进行查询
    ds.filter(_.signal > 10).map(_.device).show()

    //使用无类型方式进行groupBy,并进行统计
    dataFrame.groupBy("deviceType").count().show()


    import org.apache.spark.sql.expressions.scalalang.typed
    //使用有类型方式进行 计算每种类型的设备的平均信号值
    ds.groupByKey(_.deviceType).agg(typed.avg(_.signal)).show()

    //也可以使用创建临时视图的形式,使用sql语句进行查询
    dataFrame.createOrReplaceTempView("device")
    spark.sql("select * from device").show()

    //可以使用isStreaming来判断是否有流数据
    println(dataFrame.isStreaming)
  }
} 
Example 27
Source File: SparkDataGenerator.scala    From cloudflow   with Apache License 2.0 5 votes vote down vote up
package swissknife.spark

import java.sql.Timestamp

import cloudflow.streamlets.{ IntegerConfigParameter, StreamletShape }
import cloudflow.streamlets.avro._
import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.functions._

import cloudflow.spark.sql.SQLImplicits._

import swissknife.data.Data

case class Rate(timestamp: Timestamp, value: Long)

class SparkDataGenerator extends SparkStreamlet {
  val out   = AvroOutlet[Data]("out", d ⇒ d.src)
  val shape = StreamletShape(out)

  val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to produce.", Some(1))

  override def configParameters = Vector(RecordsPerSecond)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries =
      writeStream(process, out, OutputMode.Append).toQueryExecution

    private def process: Dataset[Data] = {
      val recordsPerSecond = RecordsPerSecond.value
      session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .load()
        .select(lit("origin").as("src"), $"timestamp", lit("").as("payload"), $"value".as("count"))
        .as[Data]
    }
  }
} 
Example 28
Source File: CallRecordGeneratorIngress.scala    From cloudflow   with Apache License 2.0 5 votes vote down vote up
package carly.aggregator

import java.sql.Timestamp

import scala.util.Random
import scala.concurrent.duration._

import org.apache.spark.sql.{ Dataset, SparkSession }
import org.apache.spark.sql.streaming.OutputMode

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.LongType

import cloudflow.streamlets._
import cloudflow.streamlets.avro._
import cloudflow.spark.sql.SQLImplicits._
import carly.data.CallRecord
import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.log4j.{ Level, Logger }

case class Rate(timestamp: Timestamp, value: Long)

class CallRecordGeneratorIngress extends SparkStreamlet {

  val rootLogger = Logger.getRootLogger()
  rootLogger.setLevel(Level.ERROR)

  val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to process.", Some(50))

  override def configParameters = Vector(RecordsPerSecond)

  val out   = AvroOutlet[CallRecord]("out", _.user)
  val shape = StreamletShape(out)

  override def createLogic() = new SparkStreamletLogic {
    val recordsPerSecond = RecordsPerSecond.value
    override def buildStreamingQueries = {
      val outStream = DataGenerator.mkData(super.session, recordsPerSecond)
      writeStream(outStream, out, OutputMode.Append).toQueryExecution
    }
  }
}

object DataGenerator {
  def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = {
    // do we need to expose this through configuration?

    val MaxTime           = 2.hours.toMillis
    val MaxUsers          = 100000
    val TS0               = new java.sql.Timestamp(0)
    val ZeroTimestampProb = 0.05 // error rate

    // Random Data Generator
    val usersUdf     = udf(() ⇒ "user-" + Random.nextInt(MaxUsers))
    val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing")

    // Time-biased randomized filter - 1/2 hour cycles
    val sinTime: Long ⇒ Double                   = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI)
    val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob
    val timeFilterUdf                            = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng))
    val zeroTimestampUdf = udf { (ts: java.sql.Timestamp, rng: Double) ⇒
      if (rng < ZeroTimestampProb) {
        TS0
      } else {
        ts
      }
    }

    val rateStream = session.readStream
      .format("rate")
      .option("rowsPerSecond", recordsPerSecond)
      .load()
      .as[Rate]

    val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand())
    val sampledData = randomDataset
      .where(timeFilterUdf($"timestamp", $"rng"))
      .withColumn("user", usersUdf())
      .withColumn("other", usersUdf())
      .withColumn("direction", directionUdf())
      .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType))
      .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng"))
      .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp".as("timestamp"))
      .as[CallRecord]
    sampledData
  }
} 
Example 29
Source File: SparkRandomGenIngress.scala    From cloudflow   with Apache License 2.0 5 votes vote down vote up
package cloudflow.sparkdoc

import scala.util.Random

import cloudflow.spark._
import cloudflow.streamlets._
import cloudflow.streamlets.avro._
import cloudflow.spark.sql.SQLImplicits._

import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode
import java.sql.Timestamp

class SparkRandomGenDataIngress extends SparkStreamlet {
  val out   = AvroOutlet[Data]("out", d ⇒ d.key)
  val shape = StreamletShape(out)

  case class Rate(timestamp: Timestamp, value: Long)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries =
      writeStream(process, out, OutputMode.Append).toQueryExecution

    private def process: Dataset[Data] = {

      val recordsPerSecond = 10

      val keyGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "keyOne" else "keyTwo"

      val rateStream = session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .load()
        .as[Rate]

      rateStream.map {
        case Rate(_, value) ⇒ Data(keyGen(), value.toInt)
      }
    }
  }
} 
Example 30
Source File: InsertMysqlDemo.scala    From spark_mysql   with Apache License 2.0 5 votes vote down vote up
import java.sql.{Date, Timestamp}

import InsertMysqlDemo.CardMember
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import utils.MySQLUtils

/**
  * Created with IntelliJ IDEA.
  * Author: [email protected]
  * Description:DataFrame 中数据存入到MySQL
  * Date: Created in 2018-11-17 12:39
  */
object InsertMysqlDemo {

  case class CardMember(m_id: String, card_type: String, expire: Timestamp, duration: Int, is_sale: Boolean, date: Date, user: Long, salary: Float)

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName(getClass.getSimpleName).set("spark.testing.memory", "3147480000")
    val sparkContext = new SparkContext(conf)
    val hiveContext = new SQLContext(sparkContext)
    import hiveContext.implicits._
    val memberSeq = Seq(
      CardMember("member_2", "月卡", new Timestamp(System.currentTimeMillis()), 31, false, new Date(System.currentTimeMillis()), 123223, 0.32f),
      CardMember("member_1", "季卡", new Timestamp(System.currentTimeMillis()), 93, false, new Date(System.currentTimeMillis()), 124224, 0.362f)
    )
    val memberDF = memberSeq.toDF()
    MySQLUtils.saveDFtoDBCreateTableIfNotExist("member_test", memberDF)
    MySQLUtils.insertOrUpdateDFtoDBUsePool("member_test", memberDF, Array("user", "salary"))
    MySQLUtils.getDFFromMysql(hiveContext, "", null)


    sparkContext.stop()
  }
} 
Example 31
Source File: SchemaData.scala    From pulsar-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.pulsar

import java.sql.Timestamp
import java.util
import java.util.Calendar

import scala.beans.BeanProperty
import scala.collection.JavaConverters._

object SchemaData {

  val booleanSeq = Seq(true, false, true, true, false)
  val bytesSeq = 1.to(5).map(_.toString.getBytes)

  val cal = Calendar.getInstance()
  cal.clear()
  val dateSeq = (1 to 5).map { i =>
    cal.set(2019, 0, i)
    cal.getTime
  }

  cal.clear()
  val timestampSeq = (1 to 5).map { i =>
    cal.set(2019, 0, i, 20, 35, 40)
    new Timestamp(cal.getTimeInMillis)
  }

  val stringSeq = 1.to(5).map(_.toString)
  val int8Seq = 1.to(5).map(_.toByte)
  val doubleSeq = 1.to(5).map(_.toDouble)
  val floatSeq = 1.to(5).map(_.toFloat)
  val int32Seq = 1.to(5)
  val int64Seq = 1.to(5).map(_.toLong)
  val int16Seq = 1.to(5).map(_.toShort)

  case class Foo(@BeanProperty i: Int, @BeanProperty f: Float, @BeanProperty bar: Bar)
  case class Bar(@BeanProperty b: Boolean, @BeanProperty s: String)

  case class F1(@BeanProperty baz: Baz)

  case class Baz(
      @BeanProperty f: Float,
      @BeanProperty d: Double,
      @BeanProperty mp: util.Map[String, Bar],
      @BeanProperty arr: Array[Bar])

  val fooSeq: Seq[Foo] =
    Foo(1, 1.0.toFloat, Bar(true, "a")) :: Foo(2, 2.0.toFloat, Bar(false, "b")) :: Foo(3, 0, null) :: Nil

  val f1Seq: Seq[F1] =
    F1(
      Baz(
        Float.NaN,
        Double.NaN,
        Map("1" -> Bar(true, "1"), "2" -> Bar(false, "2")).asJava,
        Array(Bar(true, "1"), Bar(true, "2")))) ::
    F1(
      Baz(
        Float.NegativeInfinity,
        Double.NegativeInfinity,
        Map("" -> Bar(true, "1")).asJava,
        null)) ::
    F1(Baz(Float.PositiveInfinity, Double.PositiveInfinity, null, null)) ::
    F1(Baz(1.0.toFloat, 2.0, null, null)) :: Nil

  val f1Results = f1Seq.map(f1 =>
    (f1.baz.f, f1.baz.d, if (f1.baz.mp == null) null else f1.baz.mp.asScala, f1.baz.arr))
} 
Example 32
Source File: Executor.scala    From neo4j-spark-connector   with Apache License 2.0 5 votes vote down vote up
package org.neo4j.spark

import java.time.{LocalDate, LocalDateTime, OffsetTime, ZoneOffset, ZonedDateTime}
import java.util
import java.sql.Timestamp

import org.apache.spark.SparkContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.types.StructType
import org.neo4j.spark.dataframe.CypherTypes
import org.neo4j.spark.utils.{Neo4jSessionAwareIterator, Neo4jUtils}

import scala.collection.JavaConverters._


object Executor {

  def convert(value: AnyRef): Any = value match {
    case it: util.Collection[_] => it.toArray()
    case m: java.util.Map[_,_] => m.asScala
    case _ => Neo4jUtils.convert(value)
  }

  def toJava(parameters: Map[String, Any]): java.util.Map[String, Object] = {
    parameters.mapValues(toJava).asJava
  }

  private def toJava(x: Any): AnyRef = x match {
    case y: Seq[_] => y.asJava
    case _ => x.asInstanceOf[AnyRef]
  }

  val EMPTY = Array.empty[Any]

  val EMPTY_RESULT = new CypherResult(new StructType(), Iterator.empty)

  class CypherResult(val schema: StructType, val rows: Iterator[Array[Any]]) {
    def sparkRows: Iterator[Row] = rows.map(row => new GenericRowWithSchema(row, schema))

    def fields = schema.fieldNames
  }

  def execute(sc: SparkContext, query: String, parameters: Map[String, AnyRef]): CypherResult = {
    execute(Neo4jConfig(sc.getConf), query, parameters)
  }

  private def rows(result: Iterator[_]) = {
    var i = 0
    while (result.hasNext) i = i + 1
    i
  }

  def execute(config: Neo4jConfig, query: String, parameters: Map[String, Any], write: Boolean = false): CypherResult = {
    val result = new Neo4jSessionAwareIterator(config, query, toJava(parameters), write)
    if (!result.hasNext) {
      return EMPTY_RESULT
    }
    val peek = result.peek()
    val keyCount = peek.size()
    if (keyCount == 0) {
      return new CypherResult(new StructType(), Array.fill[Array[Any]](rows(result))(EMPTY).toIterator)
    }
    val keys = peek.keys().asScala
    val fields = keys.map(k => (k, peek.get(k).`type`())).map(keyType => CypherTypes.field(keyType))
    val schema = StructType(fields)
    val it = result.map(record => {
      val row = new Array[Any](keyCount)
      var i = 0
      while (i < keyCount) {
        val value = convert(record.get(i).asObject())
        row.update(i, value)
        i = i + 1
      }
      row
    })
    new CypherResult(schema, it)
  }
} 
Example 33
Source File: Neo4jUtils.scala    From neo4j-spark-connector   with Apache License 2.0 5 votes vote down vote up
package org.neo4j.spark.utils
import java.sql.Timestamp
import java.time._
import java.util.concurrent.Callable
import java.util.function

import io.github.resilience4j.retry.{Retry, RetryConfig}
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.neo4j.driver.exceptions.{ServiceUnavailableException, SessionExpiredException, TransientException}
import org.neo4j.driver.{Driver, Result, Session, Transaction}
import org.neo4j.spark.Neo4jConfig
import org.slf4j.LoggerFactory

class Neo4jUtils

object Neo4jUtils {

  private val logger = LoggerFactory.getLogger(classOf[Neo4jUtils])

  def close(driver: Driver, session: Session): Unit = {
    try {
      if (session != null && session.isOpen) {
        closeSafety(session)
      }
    } finally {
      if (driver != null) {
        closeSafety(driver)
      }
    }
  }

  private def closeSafety(closable: AutoCloseable): Unit = {
    try {
      closable.close()
    } catch {
      case e: Throwable => {
        logger.error("Exception while trying to close an AutoCloseable, because of the following exception", e)
      }
    }
  }

  private val retryConfig = RetryConfig.custom.retryExceptions(
      classOf[SessionExpiredException], classOf[ServiceUnavailableException] // retry on the same exceptions the driver does [1]
    )
    .retryOnException(new function.Predicate[Throwable] {
      override def test(exception: Throwable): Boolean = exception match {
        case t: TransientException => {
          val code = t.code()
          !("Neo.TransientError.Transaction.Terminated" == code) && !("Neo.TransientError.Transaction.LockClientStopped" == code)
        }
        case _ => false
      }
    })
    .maxAttempts(3)
    .build

  def executeTxWithRetries[T](neo4jConfig: Neo4jConfig,
                              query: String,
                              params: java.util.Map[String, AnyRef],
                              write: Boolean): (Driver, Session, Transaction, Result) = {
    val driver: Driver = neo4jConfig.driver()
    val session: Session = driver.session(neo4jConfig.sessionConfig(write))
    Retry.decorateCallable(
        Retry.of("neo4jTransactionRetryPool", retryConfig),
        new Callable[(Driver, Session, Transaction, Result)] {
          override def call(): (Driver, Session, Transaction, Result) = {
            val transaction = session.beginTransaction()
            val result = transaction.run(query, params)
            (driver, session, transaction, result)
          }
        }
      )
      .call()
  }

  def convert(value: AnyRef): AnyRef = value match {
    case m: ZonedDateTime => new Timestamp(DateTimeUtils.fromUTCTime(m.toInstant.toEpochMilli, m.getZone.getId))
    case m: LocalDateTime => new Timestamp(DateTimeUtils.fromUTCTime(m.toInstant(ZoneOffset.UTC).toEpochMilli,"UTC"))
    case m: LocalDate => java.sql.Date.valueOf(m)
    case m: OffsetTime => new Timestamp(m.atDate(LocalDate.ofEpochDay(0)).toInstant.toEpochMilli)
    case _ => value
  }

} 
Example 34
Source File: AnnouncementService.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package services.announcement

import java.sql.Timestamp
import java.util.Date
import javax.inject.{Inject, Singleton}
import scala.concurrent.{ExecutionContext, Future}
import services.BaseService
import services.generated.Tables.SERVICE_ANNOUNCEMENT
import services.generated.tables.records.ServiceAnnouncementRecord
import storage.db.DB
import services.user.UserService
import java.util.UUID

@Singleton
class AnnouncementService @Inject() (val db: DB, users: UserService, implicit val ctx: ExecutionContext) extends BaseService {
  
  def findLatestUnread(username: String): Future[Option[ServiceAnnouncementRecord]] = db.query { sql =>
    Option(sql.selectFrom(SERVICE_ANNOUNCEMENT)
              .where(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username)
                .and(SERVICE_ANNOUNCEMENT.RESPONSE.isNull))
              .orderBy(SERVICE_ANNOUNCEMENT.CREATED_AT.desc())
              .fetchOne())
  }
  
  def confirm(uuid: UUID, username: String, response: String): Future[Boolean] = db.query { sql =>
    val result =
      sql.update(SERVICE_ANNOUNCEMENT)
         .set(SERVICE_ANNOUNCEMENT.VIEWED_AT, new Timestamp(new Date().getTime))
         .set(SERVICE_ANNOUNCEMENT.RESPONSE, response)
         .where(SERVICE_ANNOUNCEMENT.ID.equal(uuid).and(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username)))
         .execute()
         
    result == 1
  }
  
  def clearAll(): Future[Boolean] = db.query { sql =>
    sql.deleteFrom(SERVICE_ANNOUNCEMENT).execute()
    true
  } recover { case t: Throwable =>
    t.printStackTrace()
    false
  }
  
  def deleteForUser(username: String) = db.query { sql =>
    sql.deleteFrom(SERVICE_ANNOUNCEMENT)
       .where(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username)).execute()
  }
  
  def insertBroadcastAnnouncement(content: String): Future[Boolean] =  {
    val BATCH_SIZE = 200
    
    def insertOneBatch(users: Seq[String]): Future[_] = db.query { sql =>
      sql.batch(users.map { user =>
        sql.insertInto(SERVICE_ANNOUNCEMENT,
          SERVICE_ANNOUNCEMENT.ID, 
          SERVICE_ANNOUNCEMENT.FOR_USER,
          SERVICE_ANNOUNCEMENT.CONTENT,
          SERVICE_ANNOUNCEMENT.CREATED_AT,
          SERVICE_ANNOUNCEMENT.VIEWED_AT,
          SERVICE_ANNOUNCEMENT.RESPONSE
        ).values(
          UUID.randomUUID(),
          user,
          content,
          new Timestamp(new Date().getTime),
          null,
          null)
      }:_*).execute()
    }
    
    def insertBatchesRecursive(offset: Int, numUsers: Int): Future[Boolean] =
      users.listUsers(offset, BATCH_SIZE, None, None).flatMap { users =>
        insertOneBatch(users.items.map(_._1.getUsername))
      } flatMap { _ =>
        if (offset + BATCH_SIZE >= numUsers)
          Future.successful(true)
        else
          insertBatchesRecursive(offset + BATCH_SIZE, numUsers)
      }
      
    val f = for {
      numUsers <- users.countUsers()
      success <- insertBatchesRecursive(0, numUsers)
    } yield (success)
    
    f.recover { case t: Throwable =>
      play.api.Logger.info(t.getMessage)
      t.printStackTrace()
      false
    }
  }
  
} 
Example 35
Source File: MimirUDF.scala    From mimir   with Apache License 2.0 5 votes vote down vote up
package mimir.exec.spark.udf

import java.sql.{ Timestamp, Date }

import org.apache.spark.sql.types.{ DataType, StructType, StructField }

import mimir.algebra._
import mimir.exec.spark._
import mimir.util.SparkUtils

class MimirUDF {
  def getPrimitive(t:Type, value:Any) = value match {
    case null => NullPrimitive()
    case _ => t match {
      //case TInt() => IntPrimitive(value.asInstanceOf[Long])
      case TInt() => IntPrimitive(value.asInstanceOf[Long])
      case TFloat() => FloatPrimitive(value.asInstanceOf[Double])
      case TDate() => SparkUtils.convertDate(value.asInstanceOf[Date])
      case TTimestamp() => SparkUtils.convertTimestamp(value.asInstanceOf[Timestamp])
      case TString() => StringPrimitive(value.asInstanceOf[String])
      case TBool() => BoolPrimitive(value.asInstanceOf[Boolean])
      case TRowId() => RowIdPrimitive(value.asInstanceOf[String])
      case TType() => TypePrimitive(Type.fromString(value.asInstanceOf[String]))
      //case TAny() => NullPrimitive()
      //case TUser(name) => name.toLowerCase
      //case TInterval() => Primitive(value.asInstanceOf[Long])
      case _ => StringPrimitive(value.asInstanceOf[String])
    }
  }
  def getNative(primitive : PrimitiveValue) : AnyRef = 
    primitive match {
      case NullPrimitive() => null
      case RowIdPrimitive(s) => s
      case StringPrimitive(s) => s
      case IntPrimitive(i) => new java.lang.Long(i)
      case FloatPrimitive(f) => new java.lang.Double(f)
      case BoolPrimitive(b) => new java.lang.Boolean(b)
      case ts@TimestampPrimitive(y,m,d,h,mm,s,ms) => SparkUtils.convertTimestamp(ts)
      case dt@DatePrimitive(y,m,d) => SparkUtils.convertDate(dt)
      case x =>  x.asString
    }
  def getStructType(datatypes:Seq[DataType]): StructType = {
    StructType(datatypes.map(dti => StructField("", RAToSpark.getInternalSparkType(dti), true)))
  }
} 
Example 36
Source File: TaskRecordAggregate.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package services.task

import java.sql.Timestamp
import java.util.UUID
import services.generated.tables.records.TaskRecord
import play.api.libs.json._
import play.api.libs.json.Reads._
import play.api.libs.functional.syntax._

case class TaskRecordAggregate(taskRecords: Seq[TaskRecord]) {
  
  private def getDistinctField[T](filter: TaskRecord => T, errorMessage: String): T = {
    val fields = taskRecords.map(filter).distinct
    if (fields.size != 1)
      throw new RuntimeException("Invalid task record aggregation: " + errorMessage + " (" + fields.mkString(", ") + ")")
    fields.head
  }

  lazy val taskType = TaskType(getDistinctField[String](_.getTaskType, "different task types"))

  lazy val className = getDistinctField[String](_.getClassName, "different class names")
  
  lazy val documentId = getDistinctField[String](_.getDocumentId, "different document IDs")
  
  lazy val spawnedBy = getDistinctField[String](_.getSpawnedBy, "different values for spawned_by")
  
  lazy val spawnedAt = taskRecords.sortBy(_.getSpawnedAt.getTime).head.getSpawnedAt
  
  lazy val stoppedAt = {
    val stoppedAtByTask = taskRecords.map(task => Option(task.getStoppedAt))
    if (stoppedAtByTask.exists(_.isEmpty))
      // At least one sub-task is unfinished - report aggregate task as unfinished 
      None
    else
      /// All stopped - use latest stop time
      Some(stoppedAtByTask.flatten.sortBy(_.getTime).reverse.head)
  }
  
  lazy val stoppedWith =
    taskRecords.flatMap(task => Option(task.getStoppedWith))

  lazy val status =
    taskRecords.map(task => TaskStatus.withName(task.getStatus)) match {
    
      case statusByTask if statusByTask.exists(_ == TaskStatus.FAILED) =>
        // Any task that failed?
        TaskStatus.FAILED
        
      case statusByTask if statusByTask.forall(_ == TaskStatus.COMPLETED) =>
        // All complete?
        TaskStatus.COMPLETED
        
      case statusByTask if statusByTask.forall(_ == TaskStatus.PENDING) =>
        // All pending?
        TaskStatus.PENDING
        
      case _ => TaskStatus.RUNNING
        
    }
  
  lazy val progress =
    taskRecords.map(_.getProgress.toInt).sum / taskRecords.size
  
}

object TaskRecordAggregate {
  
  implicit val taskRecordWrites: Writes[TaskRecord] = (
    (JsPath \ "task_type").write[String] and
    (JsPath \ "filepart_id").write[UUID] and
    (JsPath \ "status").write[String] and
    (JsPath \ "progress").write[Int]
  )(r => (
     r.getTaskType,
     r.getFilepartId,
     r.getStatus,
     r.getProgress
  ))
  
  implicit val aggregateTaskRecordWrites: Writes[TaskRecordAggregate] = (
    (JsPath \ "document_id").write[String] and
    (JsPath \ "status").write[String] and
    (JsPath \ "progress").write[Int] and
    (JsPath \ "subtasks").write[Seq[TaskRecord]]
  )(r => (
      r.documentId,
      r.status.toString,
      r.progress,
      r.taskRecords
  ))  
  
} 
Example 37
Source File: NetworkOps.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package services.document.network

import java.sql.Timestamp
import scala.concurrent.{ExecutionContext, Future}
import services.document.DocumentService

trait NetworkOps { self: DocumentService => 

  
  def getNetwork(docId: String)(implicit ctx: ExecutionContext): Future[Option[AncestryTree]] = {
    val f = for {
      maybeRoot <- getNetworkRoot(docId)
      descendants <- maybeRoot.map(rootNode => getDescendants(rootNode.id))
                       .getOrElse(Future.successful(Seq.empty[TreeRecord]))
    } yield (maybeRoot, descendants)

    f.map { case (maybeRoot, descendants) => 
      maybeRoot.map(rootNode => AncestryTree(rootNode, descendants))
    }
  }

} 
Example 38
Source File: AncestryTree.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package services.document.network

import java.sql.Timestamp


case class AncestryTree(private val root: TreeRecord, private[network] val descendants: Seq[TreeRecord]) {

  val rootNode = AncestryTreeNode(
    root.id, 
    root.owner, 
    root.clonedFrom, root.clonedAt, // Should ALWAYS be None
    this)

}

case class AncestryTreeNode(
  id: String, 
  owner: String, 
  clonedFrom: Option[String], 
  clonedAt: Option[Timestamp], 
  private val tree: AncestryTree
) {

  lazy val children: Seq[AncestryTreeNode] = 
    tree.descendants
      .filter(_.clonedFrom  == Some(id))
      .map(r => AncestryTreeNode(r.id, r.owner, r.clonedFrom, r.clonedAt, tree))
      
} 
Example 39
Source File: PublicAccountInfo.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package controllers.my.account

import java.sql.Timestamp
import org.joda.time.DateTime
import play.api.libs.json._
import play.api.libs.functional.syntax._
import services.HasDate
import services.contribution.stats.ContributorActivity
import services.document.read.AccessibleDocumentsCount
import services.user.User


case class PublicAccountInfo(
  user: User, 
  accessibleDocuments: AccessibleDocumentsCount,
  stats: ContributorActivity)

object PublicAccountInfo extends HasDate {

  implicit val accessibleDocumentsWrites: Writes[AccessibleDocumentsCount] = (
    (JsPath \ "public").write[Long] and
    (JsPath \ "shared_with_me").writeNullable[Long]
  )(d => (d.public, d.shared))  

  implicit val visitedAccountInfoWrites: Writes[PublicAccountInfo] = (
    (JsPath \ "username").write[String] and
    (JsPath \ "real_name").writeNullable[String] and
    (JsPath \ "member_since").write[DateTime] and
    (JsPath \ "bio").writeNullable[String] and
    (JsPath \ "website").writeNullable[String] and
    (JsPath \ "documents").write[AccessibleDocumentsCount] and
    (JsPath \ "stats").write[ContributorActivity]
  )(v => (
      v.user.username,
      v.user.realName,
      new DateTime(v.user.memberSince.getTime),
      v.user.bio,
      v.user.website,
      v.accessibleDocuments,
      v.stats
  ))  
  
} 
Example 40
Source File: PrivateAccountInfo.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package controllers.my.account

import java.sql.Timestamp
import org.joda.time.DateTime
import play.api.libs.json._
import play.api.libs.functional.syntax._
import services.{HasDate, HasNullableSeq}
import services.contribution.stats.ContributorActivity
import services.user.User


case class PrivateAccountInfo(
  user: User, 
  myDocumentsCount: Long, 
  sharedWithMeCount: Long,
  stats: ContributorActivity,
  usedMb: Double)

object PrivateAccountInfo extends HasDate with HasNullableSeq {

  implicit val personalAccountInfoWrites: Writes[PrivateAccountInfo] = (
    (JsPath \ "username").write[String] and
    (JsPath \ "real_name").writeNullable[String] and
    (JsPath \ "member_since").write[DateTime] and
    (JsPath \ "bio").writeNullable[String] and
    (JsPath \ "website").writeNullable[String] and
    (JsPath \ "feature_toggles").writeNullable[Seq[String]] and
    (JsPath \ "documents").write[JsObject] and
    (JsPath \ "storage").write[JsObject] and
    (JsPath \ "stats").write[ContributorActivity]
  )(p => (
      p.user.username,
      p.user.realName,
      new DateTime(p.user.memberSince.getTime),
      p.user.bio,
      p.user.website,
      toOptSeq(p.user.featureToggles),
      Json.obj(
        "my_documents" -> p.myDocumentsCount,
        "shared_with_me" -> p.sharedWithMeCount
      ),
      Json.obj(
        "quota_mb" -> p.user.quotaMb.toInt,
        "used_mb" -> p.usedMb
      ),
      p.stats
  ))

} 
Example 41
Source File: TwitterBatchTimely.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet.timeseries

import java.sql.Timestamp

import com.cloudera.sparkts.{DateTimeIndex, TimeSeriesRDD}
import io.gzet.timeseries.timely.MetricImplicits._
import io.gzet.timeseries.timely.TimelyImplicits._
import io.gzet.timeseries.twitter.Twitter._
import io.gzet.utils.spark.accumulo.AccumuloConfig
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.joda.time.{DateTime, Minutes, Period}

object TwitterBatchTimely extends SimpleConfig {

  case class Observation(
                          hashtag: String,
                          time: Timestamp,
                          count: Double
                        )

  def main(args: Array[String]) = {

    val sparkConf = new SparkConf().setAppName("Twitter Extractor")
    val sc = new SparkContext(sparkConf)
    val sqlContext = new SQLContext(sc)
    import sqlContext.implicits._

    val twitterJsonRDD = sc.textFile("file:///Users/antoine/CHAPTER/twitter-trump", 500)
    val tweetRDD = twitterJsonRDD mapPartitions analyzeJson cache()

    // Publish metrics to Timely
    tweetRDD.count()
    tweetRDD.countByState.publish()
    tweetRDD.sentimentByState.publish()

    // Read metrics from Timely
    val conf = AccumuloConfig("GZET", "alice", "alice", "localhost:2181")
    val metricsRDD = sc.timely(conf, Some("io.gzet.count"))

    val minDate = metricsRDD.map(_.time).min()
    val maxDate = metricsRDD.map(_.time).max()

    class TwitterFrequency(val minutes: Int) extends com.cloudera.sparkts.PeriodFrequency(Period.minutes(minutes)) {
      def difference(dt1: DateTime, dt2: DateTime): Int = Minutes.minutesBetween(dt1, dt2).getMinutes / minutes
      override def toString: String = s"minutes $minutes"
    }

    val dtIndex = DateTimeIndex.uniform(minDate, maxDate, new TwitterFrequency(1))

    val metricsDF = metricsRDD.filter({
      metric =>
        metric.tags.keys.toSet.contains("tag")
    }).flatMap({
      metric =>
        metric.tags map {
          case (k, v) =>
            ((v, roundFloorMinute(metric.time, 1)), metric.value)
        }
    }).reduceByKey(_+_).map({
      case ((metric, time), sentiment) =>
        Observation(metric, new Timestamp(time), sentiment)
    }).toDF()

    val tsRDD = TimeSeriesRDD.timeSeriesRDDFromObservations(dtIndex, metricsDF, "time", "hashtag", "count").filter(_._2.toArray.exists(!_.isNaN))

  }

  def roundFloorMinute(time: Long, windowMinutes: Int) = {
    val dt = new DateTime(time)
    dt.withMinuteOfHour((dt.getMinuteOfHour / windowMinutes) * windowMinutes).minuteOfDay().roundFloorCopy().toDate.getTime
  }

} 
Example 42
Source File: FieldDateTime.scala    From spark-gdb   with Apache License 2.0 5 votes vote down vote up
package com.esri.gdb

import java.nio.ByteBuffer
import java.sql.Timestamp

import org.apache.spark.sql.types.{Metadata, TimestampType}


class FieldDateTime(name: String, nullValueAllowed: Boolean, metadata:Metadata)
  extends Field(name, TimestampType, nullValueAllowed, metadata) {

  override def readValue(byteBuffer: ByteBuffer, oid: Int) = {
    val numDays = byteBuffer.getDouble
    // convert days since 12/30/1899 to 1/1/1970
    val unixDays = numDays - 25569
    val millis = (unixDays * 1000 * 60 * 60 * 24).ceil.toLong
    new Timestamp(millis)
  }
} 
Example 43
Source File: package.scala    From modelmatrix   with Apache License 2.0 5 votes vote down vote up
package com.collective.modelmatrix

import java.sql.Timestamp
import java.time.Instant

import org.apache.spark.sql.types._
import scodec.bits.ByteVector
import slick.driver.PostgresDriver.api._

package object catalog {

  implicit val instantColumnType =
    MappedColumnType.base[Instant, java.sql.Timestamp](
      instant => Timestamp.from(instant),
      _.toInstant
    )

  implicit val dataTypeColumnType =
    MappedColumnType.base[DataType, String]({
      case ShortType => "short"
      case IntegerType => "integer"
      case LongType => "long"
      case DoubleType => "double"
      case StringType => "string"
    }, {
      case "short" => ShortType
      case "integer" => IntegerType
      case "long" => LongType
      case "double" => DoubleType
      case "string" => StringType
    })

  implicit val byteVectorColumnType =
    MappedColumnType.base[ByteVector, Array[Byte]](
      _.toArray,
      ByteVector.apply
    )
} 
Example 44
Source File: Utils.scala    From lemon-schedule   with GNU General Public License v2.0 5 votes vote down vote up
package com.gabry.job.utils

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.collection.mutable.ArrayBuffer


  def getLoadedClass(classLoader: ClassLoader):Array[Class[_]] = {
    val loadedClass = ArrayBuffer.empty[Class[_]]

    val loadedClassEnum = classesField.get(classLoader).asInstanceOf[java.util.Vector[Class[_]]].elements()

    while(loadedClassEnum.hasMoreElements){
      val nextElement = loadedClassEnum.nextElement()
      loadedClass.append(nextElement)
    }
    loadedClass.toArray
  }
} 
Example 45
Source File: Utils.scala    From hbase-connectors   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark.datasources

import java.sql.{Date, Timestamp}

import org.apache.hadoop.hbase.spark.AvroSerdes
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.yetus.audience.InterfaceAudience;

@InterfaceAudience.Private
object Utils {

  
  def hbaseFieldToScalaType(
      f: Field,
      src: Array[Byte],
      offset: Int,
      length: Int): Any = {
    if (f.exeSchema.isDefined) {
      // If we have avro schema defined, use it to get record, and then convert them to catalyst data type
      val m = AvroSerdes.deserialize(src, f.exeSchema.get)
      val n = f.avroToCatalyst.map(_(m))
      n.get
    } else  {
      // Fall back to atomic type
      f.dt match {
        case BooleanType => src(offset) != 0
        case ByteType => src(offset)
        case ShortType => Bytes.toShort(src, offset)
        case IntegerType => Bytes.toInt(src, offset)
        case LongType => Bytes.toLong(src, offset)
        case FloatType => Bytes.toFloat(src, offset)
        case DoubleType => Bytes.toDouble(src, offset)
        case DateType => new Date(Bytes.toLong(src, offset))
        case TimestampType => new Timestamp(Bytes.toLong(src, offset))
        case StringType => UTF8String.fromBytes(src, offset, length)
        case BinaryType =>
          val newArray = new Array[Byte](length)
          System.arraycopy(src, offset, newArray, 0, length)
          newArray
        // TODO: SparkSqlSerializer.deserialize[Any](src)
        case _ => throw new Exception(s"unsupported data type ${f.dt}")
      }
    }
  }

  // convert input to data type
  def toBytes(input: Any, field: Field): Array[Byte] = {
    if (field.schema.isDefined) {
      // Here we assume the top level type is structType
      val record = field.catalystToAvro(input)
      AvroSerdes.serialize(record, field.schema.get)
    } else {
      field.dt match {
        case BooleanType => Bytes.toBytes(input.asInstanceOf[Boolean])
        case ByteType => Array(input.asInstanceOf[Number].byteValue)
        case ShortType => Bytes.toBytes(input.asInstanceOf[Number].shortValue)
        case IntegerType => Bytes.toBytes(input.asInstanceOf[Number].intValue)
        case LongType => Bytes.toBytes(input.asInstanceOf[Number].longValue)
        case FloatType => Bytes.toBytes(input.asInstanceOf[Number].floatValue)
        case DoubleType => Bytes.toBytes(input.asInstanceOf[Number].doubleValue)
        case DateType | TimestampType => Bytes.toBytes(input.asInstanceOf[java.util.Date].getTime)
        case StringType => Bytes.toBytes(input.toString)
        case BinaryType => input.asInstanceOf[Array[Byte]]
        case _ => throw new Exception(s"unsupported data type ${field.dt}")
      }
    }
  }
} 
Example 46
Source File: RDBDataTypeConverter.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.connector.jdbc.datatype

import java.sql.{Date, ResultSet, Time, Timestamp}
import java.util.Optional

import oharastream.ohara.client.configurator.InspectApi.RdbColumn
import oharastream.ohara.connector.jdbc.util.DateTimeUtils

trait RDBDataTypeConverter {
  
  def converterValue(resultSet: ResultSet, column: RdbColumn): Any = {
    val columnName             = column.name
    val typeName               = column.dataType.toUpperCase
    val dataType: DataTypeEnum = converterDataType(column)
    dataType match {
      case DataTypeEnum.INTEGER =>
        java.lang.Integer.valueOf(resultSet.getInt(columnName))
      case DataTypeEnum.LONG =>
        java.lang.Long.valueOf(resultSet.getLong(columnName))
      case DataTypeEnum.BOOLEAN =>
        java.lang.Boolean.valueOf(resultSet.getBoolean(columnName))
      case DataTypeEnum.FLOAT =>
        java.lang.Float.valueOf(resultSet.getFloat(columnName))
      case DataTypeEnum.DOUBLE =>
        java.lang.Double.valueOf(resultSet.getDouble(columnName))
      case DataTypeEnum.BIGDECIMAL =>
        Optional.ofNullable(resultSet.getBigDecimal(columnName)).orElseGet(() => new java.math.BigDecimal(0L))
      case DataTypeEnum.STRING =>
        Optional.ofNullable(resultSet.getString(columnName)).orElseGet(() => "null")
      case DataTypeEnum.DATE =>
        Optional.ofNullable(resultSet.getDate(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Date(0))
      case DataTypeEnum.TIME =>
        Optional.ofNullable(resultSet.getTime(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Time(0))
      case DataTypeEnum.TIMESTAMP =>
        Optional
          .ofNullable(resultSet.getTimestamp(columnName, DateTimeUtils.CALENDAR))
          .orElseGet(() => new Timestamp(0))
      case DataTypeEnum.BYTES =>
        Optional.ofNullable(resultSet.getBytes(columnName)).orElseGet(() => Array())
      case _ =>
        throw new UnsupportedOperationException(
          s"JDBC Source Connector not support ${typeName} data type in ${columnName} column for ${dataBaseProductName} implement."
        )
    }
  }
  protected[datatype] def dataBaseProductName: String

  protected[datatype] def converterDataType(column: RdbColumn): DataTypeEnum
} 
Example 47
Source File: SnowflakeWriter.scala    From spark-snowflake   with Apache License 2.0 5 votes vote down vote up
package net.snowflake.spark.snowflake

import java.sql.{Date, Timestamp}

import net.snowflake.client.jdbc.internal.apache.commons.codec.binary.Base64
import net.snowflake.spark.snowflake.Parameters.MergedParameters
import net.snowflake.spark.snowflake.io.SupportedFormat
import net.snowflake.spark.snowflake.io.SupportedFormat.SupportedFormat
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql._


  private def removeUselessColumns(dataFrame: DataFrame,
                                   params: MergedParameters): DataFrame =
    params.columnMap match {
      case Some(map) =>
        // Enclose column name with backtick(`) if dot(.) exists in column name
        val names = map.keys.toSeq.map(name =>
          if (name.contains(".")) {
            s"`$name`"
          } else {
            name
          })
        try {
          dataFrame.select(names.head, names.tail: _*)
        } catch {
          case e: AnalysisException =>
            throw new IllegalArgumentException(
              "Incorrect column name when column mapping: " + e.toString
            )
        }
      case _ => dataFrame
    }

  // Prepare a set of conversion functions, based on the schema
  def genConversionFunctions(schema: StructType): Array[Any => Any] =
    schema.fields.map { field =>
      field.dataType match {
        case DateType =>
          (v: Any) =>
            v match {
              case null => ""
              case t: Timestamp => Conversions.formatTimestamp(t)
              case d: Date => Conversions.formatDate(d)
            }
        case TimestampType =>
          (v: Any) =>
            {
              if (v == null) ""
              else Conversions.formatTimestamp(v.asInstanceOf[Timestamp])
            }
        case StringType =>
          (v: Any) =>
            {
              if (v == null) ""
              else Conversions.formatString(v.asInstanceOf[String])
            }
        case BinaryType =>
          (v: Any) =>
            v match {
              case null => ""
              case bytes: Array[Byte] => Base64.encodeBase64String(bytes)
            }
        case _ =>
          (v: Any) =>
            Conversions.formatAny(v)
      }
    }
}

object DefaultSnowflakeWriter extends SnowflakeWriter(DefaultJDBCWrapper) 
Example 48
Source File: CustomMatchers.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.extractor.services

import java.sql.Timestamp

import org.scalatest._
import matchers._

import scala.concurrent.duration._

trait CustomMatchers {
  class SqlTimestampMoreOrLessEquals(expected: Timestamp, tolerance: Duration)
      extends Matcher[Timestamp]
      with Matchers {
    def apply(left: Timestamp) = {
      MatchResult(
        left.getTime === (expected.getTime +- tolerance.toMillis),
        s"""Timestamp $left was not within ${tolerance} to "$expected"""",
        s"""Timestamp $left was within ${tolerance} to "$expected"""",
      )
    }
  }

  def beWithin5Minutes(expected: Timestamp) = new SqlTimestampMoreOrLessEquals(expected, 5.minutes)

  def beWithin(duration: Duration)(expected: Timestamp) =
    new SqlTimestampMoreOrLessEquals(expected, duration)

  def beWithinSeconds(seconds: Long)(expected: Timestamp) =
    new SqlTimestampMoreOrLessEquals(expected, seconds.seconds)

  def beWithinMillis(millis: Long)(expected: Timestamp) =
    new SqlTimestampMoreOrLessEquals(expected, millis.millis)
}

object CustomMatchers extends CustomMatchers 
Example 49
Source File: Transaction.scala    From Scala-Programming-Projects   with MIT License 5 votes vote down vote up
package coinyser

import java.sql.{Date, Timestamp}
import java.time.ZoneOffset

case class Transaction(timestamp: Timestamp,
                       date: Date,
                       tid: Int,
                       price: Double,
                       sell: Boolean,
                       amount: Double)


object Transaction {
  def apply(timestamp: Timestamp,
            tid: Int,
            price: Double,
            sell: Boolean,
            amount: Double) =
    new Transaction(
      timestamp = timestamp,
      date = Date.valueOf(
        timestamp.toInstant.atOffset(ZoneOffset.UTC).toLocalDate),
      tid = tid,
      price = price,
      sell = sell,
      amount = amount)
} 
Example 50
Source File: StreamingProducerSpec.scala    From Scala-Programming-Projects   with MIT License 5 votes vote down vote up
package coinyser

import java.sql.Timestamp

import coinyser.StreamingProducerSpec._
import org.scalactic.TypeCheckedTripleEquals
import org.scalatest.{Matchers, WordSpec}

class StreamingProducerSpec extends WordSpec with Matchers with TypeCheckedTripleEquals {

  "StreamingProducer.deserializeWebsocketTransaction" should {
    "deserialize a valid String to a WebsocketTransaction" in {
      val str =
        """{"amount": 0.045318270000000001, "buy_order_id": 1969499130,
          |"sell_order_id": 1969495276, "amount_str": "0.04531827",
          |"price_str": "6339.73", "timestamp": "1533797395",
          |"price": 6339.7299999999996, "type": 0, "id": 71826763}""".stripMargin
      StreamingProducer.deserializeWebsocketTransaction(str) should
        ===(SampleWebsocketTransaction)
    }
  }

  "StreamingProducer.convertWsTransaction" should {
    "convert a WebSocketTransaction to a Transaction" in {
      StreamingProducer.convertWsTransaction(SampleWebsocketTransaction) should
        ===(SampleTransaction)
    }
  }

  "StreamingProducer.serializeTransaction" should {
    "serialize a Transaction to a String" in {
      StreamingProducer.serializeTransaction(SampleTransaction) should
        ===(SampleJsonTransaction)
    }
  }

  "StreamingProducer.subscribe" should {
    "register a callback that receives live trades" in {
      val pusher = new FakePusher(Vector("a", "b", "c"))
      var receivedTrades = Vector.empty[String]
      val io = StreamingProducer.subscribe(pusher) { trade => receivedTrades = receivedTrades :+ trade }
      io.unsafeRunSync()
      receivedTrades should ===(Vector("a", "b", "c"))
    }
  }
}

object StreamingProducerSpec {
  val SampleWebsocketTransaction = WebsocketTransaction(
    amount = 0.04531827, buy_order_id = 1969499130, sell_order_id = 1969495276,
    amount_str = "0.04531827", price_str = "6339.73", timestamp = "1533797395",
    price = 6339.73, `type` = 0, id = 71826763)

  val SampleTransaction = Transaction(
    timestamp = new Timestamp(1533797395000L), tid = 71826763,
    price = 6339.73, sell = false, amount = 0.04531827)

  val SampleJsonTransaction =
    """{"timestamp":"2018-08-09 06:49:55",
      |"date":"2018-08-09","tid":71826763,"price":6339.73,"sell":false,
      |"amount":0.04531827}""".stripMargin

} 
Example 51
Source File: BatchProducerSpec.scala    From Scala-Programming-Projects   with MIT License 5 votes vote down vote up
package coinyser

import java.io.{BufferedOutputStream, StringReader}
import java.nio.CharBuffer
import java.sql.Timestamp

import cats.effect.IO
import org.apache.spark.sql._
import org.apache.spark.sql.test.SharedSparkSession
import org.scalatest.{Matchers, WordSpec}


class BatchProducerSpec extends WordSpec with Matchers with SharedSparkSession {

  val httpTransaction1 = HttpTransaction("1532365695", "70683282", "7740.00", "0", "0.10041719")
  val httpTransaction2 = HttpTransaction("1532365693", "70683281", "7739.99", "0", "0.00148564")

  "BatchProducer.jsonToHttpTransaction" should {
    "create a Dataset[HttpTransaction] from a Json string" in {
      val json =
        """[{"date": "1532365695", "tid": "70683282", "price": "7740.00", "type": "0", "amount": "0.10041719"},
          |{"date": "1532365693", "tid": "70683281", "price": "7739.99", "type": "0", "amount": "0.00148564"}]""".stripMargin

      val ds: Dataset[HttpTransaction] = BatchProducer.jsonToHttpTransactions(json)
      ds.collect() should contain theSameElementsAs Seq(httpTransaction1, httpTransaction2)
    }
  }

  "BatchProducer.httpToDomainTransactions" should {
    "transform a Dataset[HttpTransaction] into a Dataset[Transaction]" in {
      import testImplicits._
      val source: Dataset[HttpTransaction] = Seq(httpTransaction1, httpTransaction2).toDS()
      val target: Dataset[Transaction] = BatchProducer.httpToDomainTransactions(source)
      val transaction1 = Transaction(timestamp = new Timestamp(1532365695000L), tid = 70683282, price = 7740.00, sell = false, amount = 0.10041719)
      val transaction2 = Transaction(timestamp = new Timestamp(1532365693000L), tid = 70683281, price = 7739.99, sell = false, amount = 0.00148564)

      target.collect() should contain theSameElementsAs Seq(transaction1, transaction2)
    }
  }

} 
Example 52
Source File: BatchProducerIT.scala    From Scala-Programming-Projects   with MIT License 5 votes vote down vote up
package coinyser

import java.sql.Timestamp
import java.time.Instant
import java.util.concurrent.TimeUnit

import cats.effect.{IO, Timer}
import org.apache.spark.sql.test.SharedSparkSession
import org.scalatest.{Matchers, WordSpec}

import scala.concurrent.duration.FiniteDuration


class BatchProducerIT extends WordSpec with Matchers with SharedSparkSession {

  import testImplicits._

  "BatchProducer.save" should {
    "save a Dataset[Transaction] to parquet" in withTempDir { tmpDir =>
      val transaction1 = Transaction(timestamp = new Timestamp(1532365695000L), tid = 70683282, price = 7740.00, sell = false, amount = 0.10041719)
      val transaction2 = Transaction(timestamp = new Timestamp(1532365693000L), tid = 70683281, price = 7739.99, sell = false, amount = 0.00148564)
      val sourceDS = Seq(transaction1, transaction2).toDS()

      val uri = tmpDir.toURI
      BatchProducer.save(sourceDS, uri).unsafeRunSync()
      tmpDir.list() should contain("date=2018-07-23")
      val readDS = spark.read.parquet(uri.toString).as[Transaction]
      spark.read.parquet(uri + "/date=2018-07-23").show()
      sourceDS.collect() should contain theSameElementsAs readDS.collect()
    }
  }

  "BatchProducer.processOneBatch" should {
    "filter and save a batch of transaction, wait 59 mn, fetch the next batch" in withTempDir { tmpDir =>
      implicit object FakeTimer extends Timer[IO] {
        private var clockRealTimeInMillis: Long = Instant.parse("2018-08-02T01:00:00Z").toEpochMilli

        def clockRealTime(unit: TimeUnit): IO[Long] =
          IO(unit.convert(clockRealTimeInMillis, TimeUnit.MILLISECONDS))

        def sleep(duration: FiniteDuration): IO[Unit] = IO {
          clockRealTimeInMillis = clockRealTimeInMillis + duration.toMillis
        }

        def shift: IO[Unit] = ???

        def clockMonotonic(unit: TimeUnit): IO[Long] = ???
      }
      implicit val appContext: AppContext = new AppContext(transactionStorePath = tmpDir.toURI)

      implicit def toTimestamp(str: String): Timestamp = Timestamp.from(Instant.parse(str))
      val tx1 = Transaction("2018-08-01T23:00:00Z", 1, 7657.58, true, 0.021762)
      val tx2 = Transaction("2018-08-02T01:00:00Z", 2, 7663.85, false, 0.01385517)
      val tx3 = Transaction("2018-08-02T01:58:30Z", 3, 7663.85, false, 0.03782426)
      val tx4 = Transaction("2018-08-02T01:58:59Z", 4, 7663.86, false, 0.15750809)
      val tx5 = Transaction("2018-08-02T02:30:00Z", 5, 7661.49, true, 0.1)

     // Start at 01:00, tx 2 ignored (too soon)
      val txs0 = Seq(tx1)
      // Fetch at 01:59, get nb 2 and 3, but will miss nb 4 because of Api lag
      val txs1 = Seq(tx2, tx3)
      // Fetch at 02:58, get nb 3, 4, 5
      val txs2 = Seq(tx3, tx4, tx5)
      // Fetch at 03:57, get nothing
      val txs3 = Seq.empty[Transaction]

      val start0 = Instant.parse("2018-08-02T00:00:00Z")
      val end0 = Instant.parse("2018-08-02T00:59:55Z")
      val threeBatchesIO =
        for {
          tuple1 <- BatchProducer.processOneBatch(IO(txs1.toDS()), txs0.toDS(), start0, end0) // end - Api lag
          (ds1, start1, end1) = tuple1

          tuple2 <- BatchProducer.processOneBatch(IO(txs2.toDS()), ds1, start1, end1)
          (ds2, start2, end2) = tuple2

          _ <- BatchProducer.processOneBatch(IO(txs3.toDS()), ds2, start2, end2)
        } yield (ds1, start1, end1, ds2, start2, end2)

      val (ds1, start1, end1, ds2, start2, end2) = threeBatchesIO.unsafeRunSync()
      ds1.collect() should contain theSameElementsAs txs1
      start1 should ===(end0)
      end1 should ===(Instant.parse("2018-08-02T01:58:55Z")) // initialClock + 1mn - 15s - 5s

      ds2.collect() should contain theSameElementsAs txs2
      start2 should ===(end1)
      end2 should ===(Instant.parse("2018-08-02T02:57:55Z")) // initialClock + 1mn -15s + 1mn -15s -5s = end1 + 45s

      val lastClock = Instant.ofEpochMilli(
        FakeTimer.clockRealTime(TimeUnit.MILLISECONDS).unsafeRunSync())
      lastClock should === (Instant.parse("2018-08-02T03:57:00Z"))

      val savedTransactions = spark.read.parquet(tmpDir.toString).as[Transaction].collect()
      val expectedTxs = Seq(tx2, tx3, tx4, tx5)
      savedTransactions should contain theSameElementsAs expectedTxs
    }
  }


} 
Example 53
Source File: StructuredNetworkWordCountWindowed.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println 
Example 54
Source File: LiteralGenerator.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}

import org.scalacheck.{Arbitrary, Gen}

import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval


object LiteralGenerator {

  lazy val byteLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType)

  lazy val shortLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType)

  lazy val integerLiteralGen: Gen[Literal] =
    for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType)

  lazy val longLiteralGen: Gen[Literal] =
    for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType)

  lazy val floatLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2,
        Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity)
    } yield Literal.create(f, FloatType)

  lazy val doubleLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2,
        Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity)
    } yield Literal.create(f, DoubleType)

  // TODO cache the generated data
  def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = {
    assert(scale >= 0)
    assert(precision >= scale)
    Arbitrary.arbBigInt.arbitrary.map { s =>
      val a = (s % BigInt(10).pow(precision - scale)).toString()
      val b = (s % BigInt(10).pow(scale)).abs.toString()
      Literal.create(
        Decimal(BigDecimal(s"$a.$b"), precision, scale),
        DecimalType(precision, scale))
    }
  }

  lazy val stringLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType)

  lazy val binaryLiteralGen: Gen[Literal] =
    for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) }
      yield Literal.create(ab.toArray, BinaryType)

  lazy val booleanLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType)

  lazy val dateLiteralGen: Gen[Literal] =
    for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType)

  lazy val timestampLiteralGen: Gen[Literal] =
    for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType)

  lazy val calendarIntervalLiterGen: Gen[Literal] =
    for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary}
      yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType)


  // Sometimes, it would be quite expensive when unlimited value is used,
  // for example, the `times` arguments for StringRepeat would hang the test 'forever'
  // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited
  // range is more reasonable
  lazy val limitedIntegerLiteralGen: Gen[Literal] =
    for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType)

  def randomGen(dt: DataType): Gen[Literal] = {
    dt match {
      case ByteType => byteLiteralGen
      case ShortType => shortLiteralGen
      case IntegerType => integerLiteralGen
      case LongType => longLiteralGen
      case DoubleType => doubleLiteralGen
      case FloatType => floatLiteralGen
      case DateType => dateLiteralGen
      case TimestampType => timestampLiteralGen
      case BooleanType => booleanLiteralGen
      case StringType => stringLiteralGen
      case BinaryType => binaryLiteralGen
      case CalendarIntervalType => calendarIntervalLiterGen
      case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale)
      case dt => throw new IllegalArgumentException(s"not supported type $dt")
    }
  }
} 
Example 55
Source File: SQLCompatibilityFunctionSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.math.BigDecimal
import java.sql.Timestamp

import org.apache.spark.sql.test.SharedSQLContext


class SQLCompatibilityFunctionSuite extends QueryTest with SharedSQLContext {

  test("ifnull") {
    checkAnswer(
      sql("SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nullif") {
    checkAnswer(
      sql("SELECT nullif('x', 'x'), nullif('x', 'y')"),
      Row(null, "x"))

    // Type coercion
    checkAnswer(
      sql("SELECT nullif(1, 2.1d), nullif(1, 1.0d)"),
      Row(1.0, null))
  }

  test("nvl") {
    checkAnswer(
      sql("SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl(1, 2.1d), nvl(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nvl2") {
    checkAnswer(
      sql("SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)"),
      Row("y", "x", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)"),
      Row(2.1, 1.0))
  }

  test("SPARK-16730 cast alias functions for Hive compatibility") {
    checkAnswer(
      sql("SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)"),
      Row(true, 1.toByte, 1.toShort, 1, 1L))

    checkAnswer(
      sql("SELECT float(1), double(1), decimal(1)"),
      Row(1.toFloat, 1.0, new BigDecimal(1)))

    checkAnswer(
      sql("SELECT date(\"2014-04-04\"), timestamp(date(\"2014-04-04\"))"),
      Row(new java.util.Date(114, 3, 4), new Timestamp(114, 3, 4, 0, 0, 0, 0)))

    checkAnswer(
      sql("SELECT string(1)"),
      Row("1"))

    // Error handling: only one argument
    val errorMsg = intercept[AnalysisException](sql("SELECT string(1, 2)")).getMessage
    assert(errorMsg.contains("Function string accepts only one argument"))
  }
} 
Example 56
Source File: DataConverter.scala    From spark-cdm   with MIT License 5 votes vote down vote up
package com.microsoft.cdm.utils

import java.text.SimpleDateFormat
import java.util.{Locale, TimeZone}
import java.sql.Timestamp

import org.apache.commons.lang.time.DateUtils
import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String


class DataConverter() extends Serializable {

  val dateFormatter = new SimpleDateFormat(Constants.SINGLE_DATE_FORMAT)
  val timestampFormatter = TimestampFormatter(Constants.TIMESTAMP_FORMAT, TimeZone.getTimeZone("UTC"))


  val toSparkType: Map[CDMDataType.Value, DataType] = Map(
    CDMDataType.int64 -> LongType,
    CDMDataType.dateTime -> DateType,
    CDMDataType.string -> StringType,
    CDMDataType.double -> DoubleType,
    CDMDataType.decimal -> DecimalType(Constants.DECIMAL_PRECISION,0),
    CDMDataType.boolean -> BooleanType,
    CDMDataType.dateTimeOffset -> TimestampType
  )

  def jsonToData(dt: DataType, value: String): Any = {
    return dt match {
      case LongType => value.toLong
      case DoubleType => value.toDouble
      case DecimalType() => Decimal(value)
      case BooleanType => value.toBoolean
      case DateType => dateFormatter.parse(value)
      case TimestampType => timestampFormatter.parse(value)
      case _ => UTF8String.fromString(value)
    }
  }

  def toCdmType(dt: DataType): CDMDataType.Value = {
    return dt match {
      case IntegerType => CDMDataType.int64
      case LongType => CDMDataType.int64
      case DateType => CDMDataType.dateTime
      case StringType => CDMDataType.string
      case DoubleType => CDMDataType.double
      case DecimalType() => CDMDataType.decimal
      case BooleanType => CDMDataType.boolean
      case TimestampType => CDMDataType.dateTimeOffset
    }
  }  

  def dataToString(data: Any, dataType: DataType): String = {
    (dataType, data) match {
      case (_, null) => null
      case (DateType, _) => dateFormatter.format(data)
      case (TimestampType, v: Number) => timestampFormatter.format(data.asInstanceOf[Long])
      case _ => data.toString
    }
  }

} 
Example 57
Source File: MessageSink.scala    From parquet4s   with MIT License 5 votes vote down vote up
package com.github.mjakubowski84.parquet4s.indefinite

import java.sql.Timestamp
import java.util.UUID

import akka.Done
import akka.kafka.CommitterSettings
import akka.kafka.ConsumerMessage.CommittableOffsetBatch
import akka.kafka.scaladsl.Committer
import akka.stream.scaladsl.{Flow, Keep, Sink}
import com.github.mjakubowski84.parquet4s.{ChunkPathBuilder, ParquetStreams, ParquetWriter}
import com.google.common.io.Files
import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.metadata.CompressionCodecName

import scala.concurrent.Future
import scala.concurrent.duration._

object MessageSink {

  case class Data(timestamp: Timestamp, word: String)

  val MaxChunkSize: Int = 128
  val ChunkWriteTimeWindow: FiniteDuration = 10.seconds
  val WriteDirectoryName: String = "messages"

}

trait MessageSink {

  this: Akka =>

  import MessageSink._
  import MessageSource._

  protected val baseWritePath: String = new Path(Files.createTempDir().getAbsolutePath, WriteDirectoryName).toString

  private val writerOptions = ParquetWriter.Options(compressionCodecName = CompressionCodecName.SNAPPY)

  private lazy val committerSink = Flow.apply[Seq[Message]].map { messages =>
    CommittableOffsetBatch(messages.map(_.committableOffset))
  }.toMat(Committer.sink(CommitterSettings(system)))(Keep.right)

  def chunkPath: ChunkPathBuilder[Message] = {
    case (basePath, chunk) =>
      val lastElementDateTime = new Timestamp(chunk.last.record.timestamp()).toLocalDateTime
      val year = lastElementDateTime.getYear
      val month = lastElementDateTime.getMonthValue
      val day = lastElementDateTime.getDayOfMonth
      val uuid = UUID.randomUUID()

      basePath.suffix(s"/$year/$month/$day/part-$uuid.parquet")
  }

  lazy val messageSink: Sink[Message, Future[Done]] = ParquetStreams.toParquetIndefinite(
    path = baseWritePath,
    maxChunkSize = MaxChunkSize,
    chunkWriteTimeWindow = ChunkWriteTimeWindow,
    buildChunkPath = chunkPath,
    preWriteTransformation = { message: Message =>
      Data(
        timestamp = new Timestamp(message.record.timestamp()),
        word = message.record.value()
      )
    },
    postWriteSink = committerSink,
    options = writerOptions
  )

} 
Example 58
Source File: JdbcResultSpec.scala    From daf   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package daf.dataset.query.jdbc

import java.sql.Timestamp
import java.time.{ LocalDateTime, OffsetDateTime }

import org.scalatest.{ MustMatchers, WordSpec }
import play.api.libs.json._

class JdbcResultSpec extends WordSpec with MustMatchers {

  "A JDBC Result container" must {

    "convert to CSV" in {
      JdbcResults.flat.toCsv.toList must be {
        List(
          """"int", "string", "bool", "timestamp"""",
          """1, "str1", true, "2018-06-25T09:00:00Z"""",
          """2, "str2", false, "2018-06-25T09:30:00Z"""",
          """<null>, <null>, false, <null>"""
        )
      }
    }

    "convert to json" in {
      JdbcResults.flat.toJson.toList must be {
        Seq(
          JsObject {
            Seq(
              "int"       -> JsNumber(1),
              "string"    -> JsString("str1"),
              "bool"      -> JsBoolean(true),
              "timestamp" -> JsString("2018-06-25T09:00:00Z")
            )
          },
          JsObject {
            Seq(
              "int"       -> JsNumber(2),
              "string"    -> JsString("str2"),
              "bool"      -> JsBoolean(false),
              "timestamp" -> JsString("2018-06-25T09:30:00Z")
            )
          },
          JsObject {
            Seq(
              "int"       -> JsNull,
              "string"    -> JsNull,
              "bool"      -> JsBoolean(false),
              "timestamp" -> JsNull
            )
          }
        )
      }
    }

  }

}

object JdbcResults {

  private val offset = OffsetDateTime.now().getOffset

  private def timestamp(dateTime: LocalDateTime) = Timestamp.from { dateTime.toInstant(offset) }

  val flat = JdbcResult(
    header = Seq("int", "string", "bool", "timestamp"),
    rows   = Vector(
      List(
        Int.box(1),
        "str1",
        Boolean.box(true),
        timestamp { LocalDateTime.of(2018, 6, 25, 9, 0) }
      ),
      List(
        Int.box(2),
        "str2",
        Boolean.box(false),
        timestamp { LocalDateTime.of(2018, 6, 25, 9, 30) }
      ),
      List(
        null,
        null,
        Boolean.box(false),
        null
      )
    )
  )

} 
Example 59
Source File: UJESSQLTypeParser.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.ujes.jdbc



import java.sql.{Timestamp, Types}


object UJESSQLTypeParser {
  def parserFromName(typeName: String): Int = {
    typeName match {
      case null => throw new UJESSQLException(UJESSQLErrorCode.METADATA_EMPTY)
      case "string" => Types.CHAR
      case "short" => Types.SMALLINT
      case "int" => Types.INTEGER
      case "long" => Types.BIGINT
      case "float" => Types.FLOAT
      case "double" => Types.DOUBLE
      case "boolean" => Types.BOOLEAN
      case "byte" => Types.TINYINT
      case "char" => Types.CHAR
      case "timestamp" => Types.TIMESTAMP
      case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR)
    }
  }

  def parserFromVal(obj: Any): Int ={
    obj match {
      case _: String => Types.CHAR
      case _: Short => Types.SMALLINT
      case _: Int => Types.INTEGER
      case _: Long => Types.BIGINT
      case _: Float => Types.FLOAT
      case _: Double => Types.DOUBLE
      case _: Boolean => Types.BOOLEAN
      case _: Byte => Types.TINYINT
      case _: Char => Types.CHAR
      case _: Timestamp => Types.TIMESTAMP
      case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR)
    }
  }
  def parserFromMetaData(dataType: Int): String = {
    dataType match {
      case Types.CHAR => "string"
      case Types.SMALLINT => "short"
      case Types.INTEGER => "int"
      case Types.BIGINT => "long"
      case Types.FLOAT => "float"
      case Types.DOUBLE => "double"
      case Types.BOOLEAN => "boolean"
      case Types.TINYINT => "byte"
      case Types.CHAR => "char"
      case Types.TIMESTAMP => "timestamp"
      case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR)
    }
  }
} 
Example 60
Source File: FilterPushdown.scala    From spark-select   with Apache License 2.0 5 votes vote down vote up
package io.minio.spark.select

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.sources._
import org.apache.spark.sql.types._


  private def getTypeForAttribute(schema: StructType, attribute: String): Option[DataType] = {
    if (schema.fieldNames.contains(attribute)) {
      Some(schema(attribute).dataType)
    } else {
      None
    }
  }

  def queryFromSchema(schema: StructType, filters: Array[Filter]): String = {
    var columnList = schema.fields.map(x => s"s."+s""""${x.name}"""").mkString(",")
    if (columnList.length == 0) {
      columnList = "*"
    }
    val whereClause = buildWhereClause(schema, filters)
    if (whereClause.length == 0) {
      s"select $columnList from S3Object s"
    } else {
      s"select $columnList from S3Object s $whereClause"
    }
  }

} 
Example 61
Source File: TypeCast.scala    From spark-select   with Apache License 2.0 5 votes vote down vote up
package io.minio.spark.select.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.{SimpleDateFormat, NumberFormat}
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try


  @throws[IllegalArgumentException]
  private[select] def toChar(str: String): Char = {
    if (str.charAt(0) == '\\') {
      str.charAt(1)
      match {
        case 't' => '\t'
        case 'r' => '\r'
        case 'b' => '\b'
        case 'f' => '\f'
        case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
        case '\'' => '\''
        case 'u' if str == """\u0000""" => '\u0000'
        case _ =>
          throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
      }
    } else if (str.length == 1) {
      str.charAt(0)
    } else {
      throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
    }
  }
} 
Example 62
Source File: SlickJdbcScheduledMessagesRepository.scala    From reliable-http-client   with Apache License 2.0 5 votes vote down vote up
package rhttpc.transport.amqpjdbc.slick

import java.sql.Timestamp

import rhttpc.transport.amqpjdbc.{MessageToSchedule, ScheduledMessage, ScheduledMessagesRepository}
import slick.jdbc.{JdbcBackend, JdbcProfile}

import scala.concurrent.{ExecutionContext, Future}

private[amqpjdbc] class SlickJdbcScheduledMessagesRepository(profile: JdbcProfile, db: JdbcBackend.Database)
                                                            (implicit ec: ExecutionContext) extends ScheduledMessagesRepository {

  class V1_001__AddingPropertiesToScheduledMessagesMigration extends AddingPropertiesToScheduledMessagesMigration {
    override protected val profile: JdbcProfile = SlickJdbcScheduledMessagesRepository.this.profile
  }

  val messagesMigration = new V1_001__AddingPropertiesToScheduledMessagesMigration

  import messagesMigration._
  import profile.api._

  override def save(msg: MessageToSchedule): Future[Unit] = {
    import msg._
    val action = for {
      currentTimestamp <- sql"select current_timestamp".as[Timestamp].head
      plannedRun = new Timestamp(currentTimestamp.getTime + msg.delay.toMillis)
      messageToAdd = ScheduledMessage(None, queueName, content, properties, plannedRun)
      insertResult <- scheduledMessages += messageToAdd
    } yield ()
    db.run(action.transactionally)
  }

  override def fetchMessagesShouldByRun(queueName: String, batchSize: Int)
                                       (onMessages: (Seq[ScheduledMessage]) => Future[Any]): Future[Int] = {
    def drain(): Future[Int] = {
      val fetchAction = for {
        currentTimestamp <- sql"select current_timestamp".as[Timestamp].head
        fetched <- scheduledMessages.filter { msg =>
          msg.queueName === queueName &&
            msg.plannedRun <= currentTimestamp
        }.sortBy(_.plannedRun desc).take(batchSize).result
      } yield fetched

      def consumeAction(fetched: Seq[ScheduledMessage]) = {
        val fetchedIds = fetched.flatMap(_.id)
        for {
          deleted <- scheduledMessages.filter(_.id inSet fetchedIds).delete
          _ <- {
            if (deleted != fetched.size) {
              DBIO.failed(ConcurrentFetchException)
            } else {
              DBIO.successful(Unit)
            }
          }
          _ <- DBIO.from(onMessages(fetched))
        } yield fetched.size
      }

      val consumedFuture = for {
        fetched <- db.run(fetchAction.transactionally)
        consumed <- db.run(consumeAction(fetched).transactionally)
      } yield consumed

      val consumedRecovered = consumedFuture.recover {
        case ConcurrentFetchException => 0
      }

      for {
        consumed <- consumedRecovered
        consumedNext <- {
          if (consumed == batchSize)
            drain()
          else
            Future.successful(0)
        }
      } yield consumed + consumedNext
    }
    drain()
  }

  override def queuesStats(names: Set[String]): Future[Map[String, Int]] = {
    val action = scheduledMessages
      .filter(_.queueName inSet names)
      .groupBy(_.queueName).map {
      case (queueName, msgs) =>
        (queueName, msgs.size)
    }.result
    db.run(action).map(_.toMap)
  }
}

case object ConcurrentFetchException extends Exception(s"Concurrent fetch detected") 
Example 63
Source File: ReadParquetEEL.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels

import java.sql.Timestamp

import io.eels.component.parquet.{ParquetSink, ParquetSource}
import io.eels.datastream.DataStream
import io.eels.schema.{ArrayType, DecimalType, Field, IntType, Precision, Scale, StringType, StructType, TimestampMillisType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

object ReadParquetEEL extends App {

  def readParquet(path: Path): Unit = {

    implicit val hadoopConfiguration = new Configuration()
    implicit val hadoopFileSystem = FileSystem.get(hadoopConfiguration)
    val rows = ParquetSource(parquetFilePath).toDataStream().collect
    rows.foreach(row => println(row))
  }

  val parquetFilePath = new Path("file:///home/sam/development/person2.parquet")
  implicit val hadoopConfiguration = new Configuration()
  implicit val hadoopFileSystem = FileSystem.get(hadoopConfiguration)

  val friendStruct = Field.createStructField("FRIEND",
    Seq(
      Field("NAME", StringType),
      Field("AGE", IntType.Signed)
    )
  )

  val personDetailsStruct = Field.createStructField("PERSON_DETAILS",
    Seq(
      Field("NAME", StringType),
      Field("AGE", IntType.Signed),
      Field("SALARY", DecimalType(Precision(38), Scale(5))),
      Field("CREATION_TIME", TimestampMillisType)
    )
  )

  val friendType = StructType(friendStruct)
  val schema = StructType(personDetailsStruct, Field("FRIENDS", ArrayType(friendType), nullable = false))

  val friends = Vector(
    Vector(Vector("John", 25)),
    Vector(Vector("Adam", 26)),
    Vector(Vector("Steven", 27))
  )

  val rows = Vector(
    Vector(Vector("Fred", 50, BigDecimal("50000.99000"), new Timestamp(System.currentTimeMillis())), friends)
  )

  try {
    DataStream.fromValues(schema, rows).to(ParquetSink(parquetFilePath).withOverwrite(true))
  } catch {
    case e: Exception => e.printStackTrace()
  }

  try {
    readParquet(parquetFilePath)
  } catch {
    case e: Exception => e.printStackTrace()
  }
} 
Example 64
Source File: SchemaFn.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.schema

import java.sql.Timestamp

object SchemaFn {
  def toDataType(clz: Class[_]): DataType = {
    val intClass = classOf[Int]
    val floatClass = classOf[Float]
    val stringClass = classOf[String]
    val charClass = classOf[Char]
    val bigIntClass = classOf[BigInt]
    val booleanClass = classOf[Boolean]
    val doubleClass = classOf[Double]
    val bigdecimalClass = classOf[BigDecimal]
    val longClass = classOf[Long]
    val byteClass = classOf[Byte]
    val shortClass = classOf[Short]
    val timestampClass = classOf[Timestamp]
    clz match {
      case `bigdecimalClass` => DecimalType(Precision(22), Scale(5))
      case `bigIntClass` => BigIntType
      case `booleanClass` => BooleanType
      case `byteClass` => ByteType.Signed
      case `charClass` => CharType(1)
      case `doubleClass` => DoubleType
      case `intClass` => IntType.Signed
      case `floatClass` => FloatType
      case `longClass` => LongType.Signed
      case `stringClass` => StringType
      case `shortClass` => ShortType.Signed
      case `timestampClass` => TimestampMillisType
      case _ => sys.error(s"Can not map $clz to data type")
    }
  }
} 
Example 65
Source File: TimestampCastSpec.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.sql.Timestamp
import java.time.Instant

import scala.reflect.runtime.universe.TypeTag

import com.twosigma.flint.timeseries.TimeSeriesSuite
import org.apache.spark.sql.functions._

class TimestampCastSpec extends TimeSeriesSuite {
  import TimestampCastSpec._
  import testImplicits._

  behavior of "TimestampToNanos"

  testEvalAndCodegen("retain up to microsecond precision", nanosToTimestamp(expectedNanos)){ df =>
    val actual = df.select(TimestampToNanos(col("time")).as("long"))
      .collect()
      .map(_.getAs[Long]("long"))

    assert(actual === expectedNanos)
  }

  behavior of "LongToTimestamp"

  testEvalAndCodegen("retain up to microsecond precision", expectedNanos) { df =>
    val actual = df.select(NanosToTimestamp(col("time")).as("timestamp"))
      .collect()
      .map(_.getAs[Timestamp]("timestamp"))

    val expectedTimestamps = expectedNanos.map { nanos =>
      Timestamp.from(Instant.ofEpochSecond(0, nanos))
    }
    assert(actual === expectedTimestamps)
  }

  
  private def asExternalRDD[T: TypeTag](input: Seq[T]): DataFrame = {
    sc.range(0, input.size.toLong).map { i =>
      Tuple1(input(i.toInt))
    }.toDF("time")
  }

}

object TimestampCastSpec {

  val expectedNanos = Seq[Long](
    0L,
    Long.MaxValue - (Long.MaxValue % 1000), // clip to microsecond precision
    946684800000000000L, // 2001-01-01
    1262304000000000000L, // 2010-01-01
    1893456000000000000L // 2030-01-01
  )

  def nanosToTimestamp(input: Seq[Long]): Seq[Timestamp] = input.map { v =>
    Timestamp.from(Instant.ofEpochSecond(0, v))
  }

} 
Example 66
Source File: JsonSupport.scala    From akka-http-slick-sample   with MIT License 5 votes vote down vote up
package net.softler.data.model

import java.sql.Timestamp
import java.time.Instant
import java.util.UUID

import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport
import spray.json.{DefaultJsonProtocol, JsNumber, JsString, JsValue, JsonFormat, RootJsonFormat}

trait BaseJsonProtocol extends DefaultJsonProtocol {
  implicit val timestampFormat: JsonFormat[Timestamp] = new JsonFormat[Timestamp] {
    override def write(obj: Timestamp): JsValue = JsNumber(obj.getTime)

    override def read(json: JsValue): Timestamp = json match {
      case JsNumber(x) => Timestamp.from(Instant.ofEpochMilli(x.toLong))
      case _ =>
        throw new IllegalArgumentException(
          s"Can not parse json value [$json] to a timestamp object")
    }
  }

  implicit val uuidJsonFormat: JsonFormat[UUID] = new JsonFormat[UUID] {
    override def write(x: UUID): JsValue = JsString(x.toString)

    override def read(value: JsValue): UUID = value match {
      case JsString(x) => UUID.fromString(x)
      case x =>
        throw new IllegalArgumentException("Expected UUID as JsString, but got " + x.getClass)
    }
  }
}


trait JsonProtocol extends SprayJsonSupport with BaseJsonProtocol {
  implicit val userFormat: RootJsonFormat[User] = jsonFormat10(User)
} 
Example 67
Source File: entities.scala    From akka-http-slick-sample   with MIT License 5 votes vote down vote up
package net.softler.data.model

import java.sql.Timestamp
import java.util.UUID

trait Entity {
  def id: UUID
  def created: Timestamp
  def updated: Option[Timestamp]
  def deleted: Option[Timestamp]
}

case class User(id: UUID,
                login: String,
                password: String,
                email: String,
                firstName: Option[String],
                lastName: Option[String],
                readOnly: Boolean,
                created: Timestamp,
                updated: Option[Timestamp],
                deleted: Option[Timestamp])
    extends Entity

case class Role(id: UUID,
                name: String,
                readOnly: Boolean,
                created: Timestamp,
                updated: Option[Timestamp],
                deleted: Option[Timestamp])
    extends Entity

case class UserRole(user: User, role: Role) 
Example 68
Source File: StationSuite.scala    From gihyo-spark-book-example   with Apache License 2.0 5 votes vote down vote up
package jp.gihyo.spark.ch05

import java.sql.Timestamp
import java.text.SimpleDateFormat

import org.scalatest.FunSuite

class StationSuite extends FunSuite {

  test("should be parse") {
    val line = "2,San Jose Diridon Caltrain Station,37.329732,-121.901782,27,San Jose,8/6/2013"
    val station = Station.parse(line)

    val dateFormat = new SimpleDateFormat("MM/dd/yyy")
    assert(station.id === 2)
    assert(station.name === "San Jose Diridon Caltrain Station")
    assert(station.lat === 37.329732)
    assert(station.lon === -121.901782)
    assert(station.dockcount === 27)
    assert(station.landmark === "San Jose")
    assert(station.installation === new Timestamp(dateFormat.parse("8/6/2013").getTime))
  }
} 
Example 69
Source File: QueryPartitionSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import java.io.File
import java.sql.Timestamp

import com.google.common.io.Files
import org.apache.hadoop.fs.FileSystem

import org.apache.spark.internal.config._
import org.apache.spark.sql._
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.util.Utils

class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  import spark.implicits._

  private def queryWhenPathNotExist(): Unit = {
    withTempView("testData") {
      withTable("table_with_partition", "createAndInsertTest") {
        withTempDir { tmpDir =>
          val testData = sparkContext.parallelize(
            (1 to 10).map(i => TestData(i, i.toString))).toDF()
          testData.createOrReplaceTempView("testData")

          // create the table for test
          sql(s"CREATE TABLE table_with_partition(key int,value string) " +
              s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='2') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='3') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') " +
              "SELECT key,value FROM testData")

          // test for the exist path
          checkAnswer(sql("select key,value from table_with_partition"),
            testData.union(testData).union(testData).union(testData))

          // delete the path of one partition
          tmpDir.listFiles
              .find { f => f.isDirectory && f.getName().startsWith("ds=") }
              .foreach { f => Utils.deleteRecursively(f) }

          // test for after delete the path
          checkAnswer(sql("select key,value from table_with_partition"),
            testData.union(testData).union(testData))
        }
      }
    }
  }

  test("SPARK-5068: query data when path doesn't exist") {
    withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") {
      queryWhenPathNotExist()
    }
  }

  test("Replace spark.sql.hive.verifyPartitionPath by spark.files.ignoreMissingFiles") {
    withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "false") {
      sparkContext.conf.set(IGNORE_MISSING_FILES.key, "true")
      queryWhenPathNotExist()
    }
  }

  test("SPARK-21739: Cast expression should initialize timezoneId") {
    withTable("table_with_timestamp_partition") {
      sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)")
      sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " +
        "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)")

      // test for Cast expression in TableReader
      checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"),
        Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000"))))

      // test for Cast expression in HiveTableScanExec
      checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " +
        "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1))
    }
  }
} 
Example 70
Source File: LiteralGenerator.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}

import org.scalacheck.{Arbitrary, Gen}

import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval


object LiteralGenerator {

  lazy val byteLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType)

  lazy val shortLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType)

  lazy val integerLiteralGen: Gen[Literal] =
    for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType)

  lazy val longLiteralGen: Gen[Literal] =
    for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType)

  lazy val floatLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2,
        Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity)
    } yield Literal.create(f, FloatType)

  lazy val doubleLiteralGen: Gen[Literal] =
    for {
      f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2,
        Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity)
    } yield Literal.create(f, DoubleType)

  // TODO cache the generated data
  def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = {
    assert(scale >= 0)
    assert(precision >= scale)
    Arbitrary.arbBigInt.arbitrary.map { s =>
      val a = (s % BigInt(10).pow(precision - scale)).toString()
      val b = (s % BigInt(10).pow(scale)).abs.toString()
      Literal.create(
        Decimal(BigDecimal(s"$a.$b"), precision, scale),
        DecimalType(precision, scale))
    }
  }

  lazy val stringLiteralGen: Gen[Literal] =
    for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType)

  lazy val binaryLiteralGen: Gen[Literal] =
    for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) }
      yield Literal.create(ab.toArray, BinaryType)

  lazy val booleanLiteralGen: Gen[Literal] =
    for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType)

  lazy val dateLiteralGen: Gen[Literal] =
    for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType)

  lazy val timestampLiteralGen: Gen[Literal] =
    for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType)

  lazy val calendarIntervalLiterGen: Gen[Literal] =
    for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary}
      yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType)


  // Sometimes, it would be quite expensive when unlimited value is used,
  // for example, the `times` arguments for StringRepeat would hang the test 'forever'
  // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited
  // range is more reasonable
  lazy val limitedIntegerLiteralGen: Gen[Literal] =
    for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType)

  def randomGen(dt: DataType): Gen[Literal] = {
    dt match {
      case ByteType => byteLiteralGen
      case ShortType => shortLiteralGen
      case IntegerType => integerLiteralGen
      case LongType => longLiteralGen
      case DoubleType => doubleLiteralGen
      case FloatType => floatLiteralGen
      case DateType => dateLiteralGen
      case TimestampType => timestampLiteralGen
      case BooleanType => booleanLiteralGen
      case StringType => stringLiteralGen
      case BinaryType => binaryLiteralGen
      case CalendarIntervalType => calendarIntervalLiterGen
      case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale)
      case dt => throw new IllegalArgumentException(s"not supported type $dt")
    }
  }
} 
Example 71
Source File: CallMethodViaReflectionSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import java.sql.Timestamp

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.types.{IntegerType, StringType}


class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelper {

  import CallMethodViaReflection._

  // Get rid of the $ so we are getting the companion object's name.
  private val staticClassName = ReflectStaticClass.getClass.getName.stripSuffix("$")
  private val dynamicClassName = classOf[ReflectDynamicClass].getName

  test("findMethod via reflection for static methods") {
    assert(findMethod(staticClassName, "method1", Seq.empty).exists(_.getName == "method1"))
    assert(findMethod(staticClassName, "method2", Seq(IntegerType)).isDefined)
    assert(findMethod(staticClassName, "method3", Seq(IntegerType)).isDefined)
    assert(findMethod(staticClassName, "method4", Seq(IntegerType, StringType)).isDefined)
  }

  test("findMethod for a JDK library") {
    assert(findMethod(classOf[java.util.UUID].getName, "randomUUID", Seq.empty).isDefined)
  }

  test("class not found") {
    val ret = createExpr("some-random-class", "method").checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("not found") && errorMsg.contains("class"))
  }

  test("method not found because name does not match") {
    val ret = createExpr(staticClassName, "notfoundmethod").checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("cannot find a static method"))
  }

  test("method not found because there is no static method") {
    val ret = createExpr(dynamicClassName, "method1").checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("cannot find a static method"))
  }

  test("input type checking") {
    assert(CallMethodViaReflection(Seq.empty).checkInputDataTypes().isFailure)
    assert(CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes().isFailure)
    assert(CallMethodViaReflection(
      Seq(Literal(staticClassName), Literal(1))).checkInputDataTypes().isFailure)
    assert(createExpr(staticClassName, "method1").checkInputDataTypes().isSuccess)
  }

  test("unsupported type checking") {
    val ret = createExpr(staticClassName, "method1", new Timestamp(1)).checkInputDataTypes()
    assert(ret.isFailure)
    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
    assert(errorMsg.contains("arguments from the third require boolean, byte, short"))
  }

  test("invoking methods using acceptable types") {
    checkEvaluation(createExpr(staticClassName, "method1"), "m1")
    checkEvaluation(createExpr(staticClassName, "method2", 2), "m2")
    checkEvaluation(createExpr(staticClassName, "method3", 3), "m3")
    checkEvaluation(createExpr(staticClassName, "method4", 4, "four"), "m4four")
  }

  private def createExpr(className: String, methodName: String, args: Any*) = {
    CallMethodViaReflection(
      Literal.create(className, StringType) +:
      Literal.create(methodName, StringType) +:
      args.map(Literal.apply)
    )
  }
} 
Example 72
Source File: SortOrderExpressionsSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}
import java.util.TimeZone

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._

class SortOrderExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("SortPrefix") {
    val b1 = Literal.create(false, BooleanType)
    val b2 = Literal.create(true, BooleanType)
    val i1 = Literal.create(20132983, IntegerType)
    val i2 = Literal.create(-20132983, IntegerType)
    val l1 = Literal.create(20132983, LongType)
    val l2 = Literal.create(-20132983, LongType)
    val millis = 1524954911000L;
    // Explicitly choose a time zone, since Date objects can create different values depending on
    // local time zone of the machine on which the test is running
    val oldDefaultTZ = TimeZone.getDefault
    val d1 = try {
      TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
      Literal.create(new java.sql.Date(millis), DateType)
    } finally {
      TimeZone.setDefault(oldDefaultTZ)
    }
    val t1 = Literal.create(new Timestamp(millis), TimestampType)
    val f1 = Literal.create(0.7788229f, FloatType)
    val f2 = Literal.create(-0.7788229f, FloatType)
    val db1 = Literal.create(0.7788229d, DoubleType)
    val db2 = Literal.create(-0.7788229d, DoubleType)
    val s1 = Literal.create("T", StringType)
    val s2 = Literal.create("This is longer than 8 characters", StringType)
    val bin1 = Literal.create(Array[Byte](12), BinaryType)
    val bin2 = Literal.create(Array[Byte](12, 17, 99, 0, 0, 0, 2, 3, 0xf4.asInstanceOf[Byte]),
      BinaryType)
    val dec1 = Literal(Decimal(20132983L, 10, 2))
    val dec2 = Literal(Decimal(20132983L, 19, 2))
    val dec3 = Literal(Decimal(20132983L, 21, 2))
    val list1 = Literal(List(1, 2), ArrayType(IntegerType))
    val nullVal = Literal.create(null, IntegerType)

    checkEvaluation(SortPrefix(SortOrder(b1, Ascending)), 0L)
    checkEvaluation(SortPrefix(SortOrder(b2, Ascending)), 1L)
    checkEvaluation(SortPrefix(SortOrder(i1, Ascending)), 20132983L)
    checkEvaluation(SortPrefix(SortOrder(i2, Ascending)), -20132983L)
    checkEvaluation(SortPrefix(SortOrder(l1, Ascending)), 20132983L)
    checkEvaluation(SortPrefix(SortOrder(l2, Ascending)), -20132983L)
    // For some reason, the Literal.create code gives us the number of days since the epoch
    checkEvaluation(SortPrefix(SortOrder(d1, Ascending)), 17649L)
    checkEvaluation(SortPrefix(SortOrder(t1, Ascending)), millis * 1000)
    checkEvaluation(SortPrefix(SortOrder(f1, Ascending)),
      DoublePrefixComparator.computePrefix(f1.value.asInstanceOf[Float].toDouble))
    checkEvaluation(SortPrefix(SortOrder(f2, Ascending)),
      DoublePrefixComparator.computePrefix(f2.value.asInstanceOf[Float].toDouble))
    checkEvaluation(SortPrefix(SortOrder(db1, Ascending)),
      DoublePrefixComparator.computePrefix(db1.value.asInstanceOf[Double]))
    checkEvaluation(SortPrefix(SortOrder(db2, Ascending)),
      DoublePrefixComparator.computePrefix(db2.value.asInstanceOf[Double]))
    checkEvaluation(SortPrefix(SortOrder(s1, Ascending)),
      StringPrefixComparator.computePrefix(s1.value.asInstanceOf[UTF8String]))
    checkEvaluation(SortPrefix(SortOrder(s2, Ascending)),
      StringPrefixComparator.computePrefix(s2.value.asInstanceOf[UTF8String]))
    checkEvaluation(SortPrefix(SortOrder(bin1, Ascending)),
      BinaryPrefixComparator.computePrefix(bin1.value.asInstanceOf[Array[Byte]]))
    checkEvaluation(SortPrefix(SortOrder(bin2, Ascending)),
      BinaryPrefixComparator.computePrefix(bin2.value.asInstanceOf[Array[Byte]]))
    checkEvaluation(SortPrefix(SortOrder(dec1, Ascending)), 20132983L)
    checkEvaluation(SortPrefix(SortOrder(dec2, Ascending)), 2013298L)
    checkEvaluation(SortPrefix(SortOrder(dec3, Ascending)),
      DoublePrefixComparator.computePrefix(201329.83d))
    checkEvaluation(SortPrefix(SortOrder(list1, Ascending)), 0L)
    checkEvaluation(SortPrefix(SortOrder(nullVal, Ascending)), null)
  }
} 
Example 73
Source File: ApplicationMonitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor.application

import java.sql.{Connection, Timestamp}
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.spark.alarm.AlertMessage
import org.apache.spark.alarm.AlertType._
import org.apache.spark.monitor.Monitor
import org.apache.spark.monitor.MonitorItem.MonitorItem

abstract class ApplicationMonitor extends Monitor {
  override val alertType = Seq(Application)
}

class ApplicationInfo(
    title: MonitorItem,
    appName: String,
    appId: String,
    md5: String,
    startTime: Date,
    duration: Long,
    appUiUrl: String,
    historyUrl: String,
    eventLogDir: String,
    minExecutor: Int,
    maxExecutor: Int,
    executorCore: Int,
    executorMemoryMB: Long,
    executorAccu: Double,
    user: String)
  extends AlertMessage(title) {
  override def toCsv(): String = {
    s"${user},${appId}," +
      s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}," +
      s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}," +
      s"${executorMemoryMB},${executorCore},${executorAccu.formatted("%.2f")},${appName}"
  }
  // scalastyle:off
  override def toHtml(): String = {
    val html = <h1>任务完成! </h1>
        <h2>任务信息 </h2>
        <ul>
          <li>作业名:{appName}</li>
          <li>作业ID:{appId}</li>
          <li>开始时间:{new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}</li>
          <li>任务用时:{Duration(duration, TimeUnit.MILLISECONDS).toSeconds} s</li>
        </ul>
        <h2>资源用量</h2>
        <ul>
          <li>Executor个数:{minExecutor}~{maxExecutor}</li>
          <li>Executor内存:{executorMemoryMB} MB</li>
          <li>Executor核数:{executorCore}</li>
          <li>Executor累积用量:{executorAccu.formatted("%.2f")} executor*min</li>
        </ul>
        <h2>调试信息</h2>
        <ul>
          <li>回看链接1:<a href={appUiUrl.split(",").head}>{appUiUrl.split(",").head}</a></li>
          <li>回看链接2:<a href={historyUrl}>{historyUrl}</a></li>
          <li>日志文件所在目录:{eventLogDir}</li>
        </ul>
    html.mkString
  }

  override def toJdbc(conn: Connection, appId: String): Unit = {
    val query = "INSERT INTO `xsql_monitor`.`spark_history`(" +
      "`user`, `md5`, `appId`, `startTime`, `duration`, " +
      "`yarnURL`, `sparkHistoryURL`, `eventLogDir`, `coresPerExecutor`, `memoryPerExecutorMB`," +
      " `executorAcc`, `appName`, `minExecutors`, `maxExecutors`)" +
      " SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? FROM DUAL" +
      " WHERE NOT EXISTS (SELECT * FROM `xsql_monitor`.`spark_history` WHERE `appId` = ?);"

    val preparedStmt = conn.prepareStatement(query)
    preparedStmt.setString(1, user)
    preparedStmt.setString(2, md5)
    preparedStmt.setString(3, appId)
    preparedStmt.setTimestamp(4, new Timestamp(startTime.getTime))
    preparedStmt.setLong(5, Duration(duration, TimeUnit.MILLISECONDS).toSeconds)
    preparedStmt.setString(6, appUiUrl)
    preparedStmt.setString(7, historyUrl)
    preparedStmt.setString(8, eventLogDir)
    preparedStmt.setInt(9, executorCore)
    preparedStmt.setLong(10, executorMemoryMB)
    preparedStmt.setDouble(11, executorAccu)
    preparedStmt.setString(12, appName)
    preparedStmt.setInt(13, minExecutor)
    preparedStmt.setInt(14, maxExecutor)
    preparedStmt.setString(15, appId)
    preparedStmt.execute
  }
} 
Example 74
Source File: TimestampExpressionSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import java.sql.Timestamp

import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.types.{DateType, IntegerType}
import org.scalatest.FunSuite

class TimestampExpressionSuite extends FunSuite with ExpressionEvalHelper {

  test("add_seconds") {
    // scalastyle:off magic.number
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-01 00:11:33")), Literal(28)),
      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-01-01 00:12:01")))
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-02 00:00:00")), Literal(-1)),
      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-01-01 23:59:59")))
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-01 00:00:00")), Literal(-1)),
      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2014-12-31 23:59:59")))
    checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-02 00:00:00")),
      Literal.create(null, IntegerType)), null)
    checkEvaluation(AddSeconds(Literal.create(null, DateType), Literal(1)), null)
    checkEvaluation(AddSeconds(Literal.create(null, DateType), Literal.create(null, IntegerType)),
      null)
  }
} 
Example 75
Source File: AddSecondsSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import java.sql.Timestamp

import org.apache.spark.sql.{GlobalSapSQLContext, Row}
import org.scalatest.FunSuite

class AddSecondsSuite
  extends FunSuite
  with GlobalSapSQLContext {

  val rowA = TimestampRow("AAA", Timestamp.valueOf("2015-01-01 12:12:04"))
  val rowB = TimestampRow("BBB", Timestamp.valueOf("2015-01-01 00:00:00"))
  val rowC = TimestampRow("CCC", Timestamp.valueOf("2015-12-31 23:59:58"))
  val rowD = TimestampRow("DDD", Timestamp.valueOf("2012-01-01 23:30:45"))

  val dataWithTimestamps = Seq(rowA, rowB, rowC, rowD)

  test("add_seconds") {
    val rdd = sc.parallelize(dataWithTimestamps)
    val dSrc = sqlContext.createDataFrame(rdd).cache()
    dSrc.registerTempTable("src")

    val result1 = sqlContext.sql("SELECT name, ADD_SECONDS(t, 5) FROM src").collect()
    assertResult(
      Row(rowA.name, Timestamp.valueOf("2015-01-01 12:12:09")) ::
      Row(rowB.name, Timestamp.valueOf("2015-01-01 00:00:05")) ::
      Row(rowC.name, Timestamp.valueOf("2016-01-01 00:00:03")) ::
      Row(rowD.name, Timestamp.valueOf("2012-01-01 23:30:50")) :: Nil)(result1)

    val result2 = sqlContext.sql("SELECT name, ADD_SECONDS(t, -5) FROM src").collect()
    assertResult(
      Row(rowA.name, Timestamp.valueOf("2015-01-01 12:11:59")) ::
      Row(rowB.name, Timestamp.valueOf("2014-12-31 23:59:55")) ::
      Row(rowC.name, Timestamp.valueOf("2015-12-31 23:59:53")) ::
      Row(rowD.name, Timestamp.valueOf("2012-01-01 23:30:40")) :: Nil)(result2)

    // example from SAP HANA documentation at
    // http://help.sap.com/hana/SAP_HANA_SQL_and_System_Views_Reference_en.pdf
    val result3 = sqlContext.sql("SELECT name, ADD_SECONDS(t, 60*30) FROM src").collect()
    assertResult(
      Row(rowA.name, Timestamp.valueOf("2015-01-01 12:42:04")) ::
      Row(rowB.name, Timestamp.valueOf("2015-01-01 00:30:00")) ::
      Row(rowC.name, Timestamp.valueOf("2016-01-01 00:29:58")) ::
      Row(rowD.name, Timestamp.valueOf("2012-01-02 00:00:45")) :: Nil)(result3)
  }

} 
Example 76
Source File: ExcelOutputWriter.scala    From spark-hadoopoffice-ds   with Apache License 2.0 5 votes vote down vote up
package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

} 
Example 77
Source File: SpecificPrimitivesSpec.scala    From sbt-avrohugger   with Apache License 2.0 5 votes vote down vote up
import test._
import org.specs2.mutable.Specification
import java.sql.{Date, Timestamp}
import java.util.UUID
class SpecificPrimitivesSpec extends Specification {

  "A case class with an `Int` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest00(1)
      val record2 = AvroTypeProviderTest00(2)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Float` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest01(1F)
      val record2 = AvroTypeProviderTest01(2F)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Long` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest02(1L)
      val record2 = AvroTypeProviderTest02(2L)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Double` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest03(1D)
      val record2 = AvroTypeProviderTest03(2D)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Boolean` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest04(true)
      val record2 = AvroTypeProviderTest04(false)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `String` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest05("hello world")
      val record2 = AvroTypeProviderTest05("hello galaxy")
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Null` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest06(null)
      val record2 = AvroTypeProviderTest06(null)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with an `Array[Bytes]` field" should {
    "deserialize correctly" in {
      val record1 = AvroTypeProviderTest69("hello world".getBytes)
      val record2 = AvroTypeProviderTest69("hello galaxy".getBytes)
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

  "A case class with a `logicalType` fields from .avsc" should {
    "deserialize correctly" in {
      val t1 = System.currentTimeMillis()
      val t2 = System.currentTimeMillis()
      val record1 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t1), UUID.randomUUID())
      val record2 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t2), UUID.randomUUID())
      val records = List(record1, record2)
      SpecificTestUtil.verifyWriteAndRead(records)
    }
  }

} 
Example 78
Source File: ActionsHandler.scala    From spark-http-stream   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.http

import java.util.Properties
import scala.collection.mutable.ArrayBuffer
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.spark.internal.Logging
import org.apache.spark.sql.Row
import java.sql.Timestamp
import org.apache.spark.sql.types.StructType
import java.util.concurrent.atomic.AtomicInteger


	def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries;
	def destroy();
}

trait ActionsHandlerFactory {
	def createInstance(params: Params): ActionsHandler;
}

abstract class AbstractActionsHandler extends ActionsHandler {
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any = {
		val opt = requestBody.get(key);
		if (opt.isEmpty) {
			throw new MissingRequiredRequestParameterException(key);
		}

		opt.get;
	}

	override def destroy() = {
	}
}

class NullActionsHandler extends AbstractActionsHandler {
	override def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries = new ActionHandlerEntries() {
		def apply(action: String) = Map[String, Any]();
		//yes, do nothing
		def isDefinedAt(action: String) = false;
	};
}

//rich row with extra info: id, time stamp, ...
case class RowEx(originalRow: Row, batchId: Long, offsetInBatch: Long, timestamp: Timestamp) {
	def withTimestamp(): Row = Row.fromSeq(originalRow.toSeq :+ timestamp);
	def withId(): Row = Row.fromSeq(originalRow.toSeq :+ s"$batchId-$offsetInBatch");
	def extra: (Long, Long, Timestamp) = { (batchId, offsetInBatch, timestamp) };
}

trait SendStreamActionSupport {
	def onReceiveStream(topic: String, rows: Array[RowEx]);
	def getRequiredParam(requestBody: Map[String, Any], key: String): Any;

	val listeners = ArrayBuffer[StreamListener]();

	def addListener(listener: StreamListener): this.type = {
		listeners += listener;
		this;
	}

	protected def notifyListeners(topic: String, data: Array[RowEx]) {
		listeners.foreach { _.onArrive(topic, data); }
	}

	def handleSendStream(requestBody: Map[String, Any]): Map[String, Any] = {
		val topic = getRequiredParam(requestBody, "topic").asInstanceOf[String];
		val batchId = getRequiredParam(requestBody, "batchId").asInstanceOf[Long];
		val rows = getRequiredParam(requestBody, "rows").asInstanceOf[Array[Row]];
		val ts = new Timestamp(System.currentTimeMillis());
		var index = -1;
		val rows2 = rows.map { row ⇒
			index += 1;
			RowEx(Row.fromSeq(row.toSeq), batchId, index, ts)
		}

		onReceiveStream(topic, rows2);
		notifyListeners(topic, rows2);
		Map("rowsCount" -> rows.size);
	}
} 
Example 79
Source File: SSHOrder.scala    From Aton   with GNU General Public License v3.0 5 votes vote down vote up
package model

import java.sql.Timestamp


case class SSHOrder(
                     id: Long,
                     sentDatetime: Timestamp,
                     superUser: Boolean,
                     interrupt: Boolean,
                     command: String,
                     webUser: String) {
  def this(sentDatetime: Timestamp, superUser: Boolean, interrupt: Boolean, command: String, username: String) =
    this(0, sentDatetime, superUser, interrupt, command, username)

  def this(sentDatetime: Timestamp, superUser: Boolean, command: String) = this(0, sentDatetime, superUser, false, command, "")

  def this(sentDatetime: Timestamp, command: String, webUser: String) = this(0, sentDatetime, false, false, command, webUser)
} 
Example 80
Source File: ConnectedUserTable.scala    From Aton   with GNU General Public License v3.0 5 votes vote down vote up
package model.table

import java.sql.Timestamp

import model.{ComputerState, ConnectedUser}
import slick.driver.H2Driver.api._
import slick.lifted.{ForeignKeyQuery, ProvenShape}


class ConnectedUserTable(tag: Tag) extends Table[ConnectedUser](tag, "CONNECTED_USER") {
  // Primary key
  def id: Rep[Int] = column[Int]("ID", O.PrimaryKey, O.AutoInc)

  // Date maps to java.sql.TimeStamp.
  // Ver: http://stackoverflow.com/questions/31351361/storing-date-and-time-into-mysql-using-slick-scala
  def computerStateRegisteredDate: Rep[Timestamp] = column[Timestamp]("COMPUTER_STATE_REGISTERED_DATE")

  // Other columns/attributes
  def computerStateComputerIp: Rep[String] = column[String]("COMPUTER_STATE_COMPUTER_IP")

  // Foreign key to Computer
  def computer: ForeignKeyQuery[ComputerStateTable, ComputerState] =
    foreignKey("CONNECTEC_USER_COMPUTER_STATE", (computerStateComputerIp, computerStateRegisteredDate),
      TableQuery[ComputerStateTable])(x => (x.computerIp, x.registeredDate), onUpdate = ForeignKeyAction.Restrict,
      onDelete = ForeignKeyAction.Cascade)

  // All tables need the * method with the type that it was created the table with.
  override def * : ProvenShape[ConnectedUser] =
    (id, username, computerStateComputerIp, computerStateRegisteredDate) <> (ConnectedUser.tupled, ConnectedUser.unapply)

  def username: Rep[String] = column[String]("USERNAME")
} 
Example 81
Source File: SuggestionController.scala    From Aton   with GNU General Public License v3.0 5 votes vote down vote up
package controllers

import java.sql.Timestamp
import java.util.Calendar

import com.google.inject.Inject
import model.form.SuggestionForm
import model.{Role, Suggestion}
import play.api.Environment
import play.api.i18n.MessagesApi
import services.{SuggestionService, UserService, state}
import views.html._

import scala.concurrent.{ExecutionContext, Future}


class SuggestionController @Inject()(suggestionService: SuggestionService, val messagesApi: MessagesApi)(implicit userService: UserService, executionContext: ExecutionContext, environment: Environment) extends ControllerWithNoAuthRequired {
  def home = AsyncStack { implicit request =>
    implicit val (username: Option[String], isAdmin: Boolean) = loggedIn match {
      case Some(user) => (Some(user.username), user.role == Role.Administrator)
      case _ => (None, false)
    }
    if (isAdmin) {
      suggestionService.listAll.map { suggestions =>
        Ok//(index(messagesApi("suggestion"), suggestionHome(SuggestionForm.form, suggestions)))
      }
    } else {
      Future.successful(Ok)//(index(messagesApi("suggestion"), suggestionHome(SuggestionForm.form, Seq.empty[Suggestion]))))
    }


  }

  def add = AsyncStack() { implicit request =>
    implicit val (username: Option[String], isAdmin: Boolean) = loggedIn match {
      case Some(user) => (Some(user.username), user.role == Role.Administrator)
      case _ => (None, false)
    }
    SuggestionForm.form.bindFromRequest().fold(
      errorForm => Future.successful(Ok(errorForm.toString)),
      data => {
        val text = data.suggestion
        val suggestion = Suggestion(0, text, now, username)
        suggestionService.add(suggestion).map {
          case state.ActionCompleted => Redirect(routes.SuggestionController.home())
          case _ => BadRequest
        }
      }
    )
  }

  private def now = new Timestamp(Calendar.getInstance().getTime.getTime)
} 
Example 82
Source File: RowReaderTest.scala    From filo   with Apache License 2.0 5 votes vote down vote up
package org.velvia.filo

import org.joda.time.DateTime
import java.sql.Timestamp
import org.scalatest.FunSpec
import org.scalatest.Matchers

class RowReaderTest extends FunSpec with Matchers {
  val schema = Seq(
                 VectorInfo("name", classOf[String]),
                 VectorInfo("age",  classOf[Int]),
                 VectorInfo("timestamp", classOf[Timestamp])
               )

  val rows = Seq(
               (Some("Matthew Perry"),     Some(18), Some(new Timestamp(10000L))),
               (Some("Michelle Pfeiffer"), None,     Some(new Timestamp(10010L))),
               (Some("George C"),          Some(59), None),
               (Some("Rich Sherman"),      Some(26), Some(new Timestamp(10000L)))
             )

  val csvRows = Seq(
    "Matthew Perry,18,1973-01-25T00Z",
    "Michelle Pfeiffer,,1970-07-08T00Z",
    "George C,59,",
    "Rich Sherman,26,1991-10-12T00Z"
  ).map(str => (str.split(',') :+ "").take(3))

  def readValues[T](r: FastFiloRowReader, len: Int)(f: FiloRowReader => T): Seq[T] = {
    (0 until len).map { i =>
      r.rowNo = i
      f(r)
    }
  }

  it("should extract from columns back to rows") {
    val columnData = RowToVectorBuilder.buildFromRows(rows.map(TupleRowReader).toIterator,
                                                      schema,
                                                      BuilderEncoder.SimpleEncoding)
    val chunks = Array(columnData("name"), columnData("age"), columnData("timestamp"))
    val types = schema.map(_.dataType)
    val reader = new FastFiloRowReader(chunks, types.toArray)

    readValues(reader, 4)(_.getString(0)) should equal (
      Seq("Matthew Perry", "Michelle Pfeiffer", "George C", "Rich Sherman"))

    reader.rowNo = 1
    reader.notNull(1) should equal (false)
    reader.as[Timestamp](2) should equal (new Timestamp(10010L))
  }

  it("should write to columns from ArrayStringRowReader and read back properly") {
    val columnData = RowToVectorBuilder.buildFromRows(csvRows.map(ArrayStringRowReader).toIterator,
                                                      schema,
                                                      BuilderEncoder.SimpleEncoding)
    val chunks = Array(columnData("name"), columnData("age"), columnData("timestamp"))
    val types = schema.map(_.dataType)
    val reader = new FastFiloRowReader(chunks, types.toArray)

    readValues(reader, 4)(_.getString(0)) should equal (
      Seq("Matthew Perry", "Michelle Pfeiffer", "George C", "Rich Sherman"))

    reader.rowNo = 1
    reader.notNull(1) should equal (false)
    reader.as[Timestamp](2) should equal (new Timestamp(DateTime.parse("1970-07-08T00Z").getMillis))
  }

  it("should read longs from timestamp strings from ArrayStringRowReader") {
    ArrayStringRowReader(csvRows.head).getLong(2) should equal (96768000000L)
  }

  import org.velvia.filo.{vectors => bv}

  it("should append to BinaryAppendableVector from Readers with RowReaderAppender") {
    val readers = rows.map(TupleRowReader)
    val appenders = Seq(
      new IntReaderAppender(bv.IntBinaryVector.appendingVector(10), 1),
      new LongReaderAppender(bv.LongBinaryVector.appendingVector(10), 2)
    )
    readers.foreach { r => appenders.foreach(_.append(r)) }
    val bufs = appenders.map(_.appender.optimize().toFiloBuffer).toArray
    val reader = new FastFiloRowReader(bufs, Array(classOf[Int], classOf[Long]))

    readValues(reader, 4)(_.getInt(0)) should equal (Seq(18, 0, 59, 26))
    reader.rowNo = 1
    reader.notNull(0) should equal (false)
}

  import RowReader._
  it("should compare RowReaders using TypedFieldExtractor") {
    val readers = rows.map(TupleRowReader)
    StringFieldExtractor.compare(readers(1), readers(2), 0) should be > (0)
    IntFieldExtractor.compare(readers(0), readers(2), 1) should be < (0)
    TimestampFieldExtractor.compare(readers(0), readers(3), 2) should equal (0)

    // Ok, we should be able to compare the reader with the NA / None too
    IntFieldExtractor.compare(readers(1), readers(2), 1) should be < (0)
  }
} 
Example 83
Source File: FastFiloRowReaderBenchmark.scala    From filo   with Apache License 2.0 5 votes vote down vote up
package org.velvia.filo

import java.sql.Timestamp
import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.BenchmarkMode
import org.openjdk.jmh.annotations.{Mode, State, Scope}
import org.openjdk.jmh.annotations.OutputTimeUnit
import scalaxy.loops._
import scala.language.postfixOps

import java.util.concurrent.TimeUnit


@State(Scope.Thread)
class FastFiloRowReaderBenchmark {
  import VectorReader._

  // Ok, create an IntColumn and benchmark it.
  val numValues = 10000

  val randomInts = (0 until numValues).map(i => util.Random.nextInt)
  val randomLongs = randomInts.map(_.toLong)
  val randomTs = randomLongs.map(l => new Timestamp(l))

  val chunks = Array(VectorBuilder(randomInts).toFiloBuffer,
                     VectorBuilder(randomLongs).toFiloBuffer,
                     VectorBuilder(randomTs).toFiloBuffer)
  val clazzes = Array[Class[_]](classOf[Int], classOf[Long], classOf[Timestamp])

  // According to @ktosopl, be sure to return some value if possible so that JVM won't
  // optimize out the method body.  However JMH is apparently very good at avoiding this.
  // fastest loop possible using FiloVectorApply method
  @Benchmark
  @BenchmarkMode(Array(Mode.AverageTime))
  @OutputTimeUnit(TimeUnit.MICROSECONDS)
  def createFastFiloRowReader(): RowReader = {
    new FastFiloRowReader(chunks, clazzes)
  }

  val fastReader = new FastFiloRowReader(chunks, clazzes)

  @Benchmark
  @BenchmarkMode(Array(Mode.Throughput))
  @OutputTimeUnit(TimeUnit.SECONDS)
  def fastFiloRowReaderReadOne(): Int = {
    fastReader.setRowNo(0)
    if (fastReader.notNull(0)) fastReader.getInt(0) + 1 else 0
  }
} 
Example 84
Source File: KustoResponseDeserializer.scala    From azure-kusto-spark   with Apache License 2.0 5 votes vote down vote up
package com.microsoft.kusto.spark.datasource

import java.sql.Timestamp
import java.util

import com.microsoft.azure.kusto.data.{KustoResultColumn, KustoResultSetTable, Results}
import com.microsoft.kusto.spark.utils.DataTypeMapping
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import org.apache.spark.sql.types.{StructType, _}
import org.joda.time.DateTime

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

object KustoResponseDeserializer {
  def apply(kustoResult: KustoResultSetTable): KustoResponseDeserializer = new KustoResponseDeserializer(kustoResult)
}

// Timespan columns are casted to strings in kusto side. A simple test to compare the translation to a Duration string
// in the format of timespan resulted in less performance. One way was using a new expression that extends UnaryExpression,
// second was by a udf function, both were less performant.
case class KustoSchema(sparkSchema: StructType, toStringCastedColumns: Set[String])

class KustoResponseDeserializer(val kustoResult: KustoResultSetTable) {
  val schema: KustoSchema = getSchemaFromKustoResult

  private def getValueTransformer(valueType: String): Any => Any = {

    valueType.toLowerCase() match {
      case "string" => value: Any => value
      case "int64" => value: Any => value
      case "datetime" => value: Any => new Timestamp(new DateTime(value).getMillis)
      case "timespan" => value: Any => value
      case "sbyte" => value: Any => value
      case "long" => value: Any => value match {
        case i: Int => i.toLong
        case _ => value.asInstanceOf[Long]
      }
      case "double" => value: Any => value
      case "decimal" => value: Any => BigDecimal(value.asInstanceOf[String])
      case "int" => value: Any => value
      case "int32" => value: Any => value
      case "bool" => value: Any => value
      case "real" => value: Any => value
      case _ => value: Any => value.toString
      }
  }

   private def getSchemaFromKustoResult: KustoSchema = {
    if (kustoResult.getColumns.isEmpty) {
      KustoSchema(StructType(List()), Set())
    } else {
      val columns = kustoResult.getColumns

      KustoSchema(StructType(columns.map(col => StructField(col.getColumnName,
            DataTypeMapping.kustoTypeToSparkTypeMap.getOrElse(col.getColumnType.toLowerCase, StringType)))),
        columns.filter(c => c.getColumnType.equalsIgnoreCase("TimeSpan")).map(c => c.getColumnName).toSet)
    }
  }

  def getSchema: KustoSchema = { schema }

  def toRows: java.util.List[Row] = {
    val columnInOrder = kustoResult.getColumns
    val value: util.ArrayList[Row] = new util.ArrayList[Row](kustoResult.count())

//     Calculate the transformer function for each column to use later by order
    val valueTransformers: mutable.Seq[Any => Any] = columnInOrder.map(col => getValueTransformer(col.getColumnType))
    kustoResult.getData.asScala.foreach(row => {
      val genericRow = row.toArray().zipWithIndex.map(
        column => {
          if (column._1 == null) null else valueTransformers(column._2)(column._1)
        })
      value.add(new GenericRowWithSchema(genericRow, schema.sparkSchema))
    })

    value
  }

//  private def getOrderedColumnName = {
//    val columnInOrder = ArrayBuffer.fill(kustoResult.getColumnNameToIndex.size()){ "" }
//    kustoResult.getColumns.foreach((columnIndexPair: KustoResultColumn) => columnInOrder(columnIndexPair.) = columnIndexPair._1)
//    columnInOrder
//  }
} 
Example 85
Source File: FileOutputIT.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta

import java.sql.Timestamp
import java.util.UUID

import com.github.nscala_time.time.Imports._
import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest._

import scala.reflect.io.File


class FileOutputIT extends FlatSpec with ShouldMatchers with BeforeAndAfterAll {
  self: FlatSpec =>

  @transient var sc: SparkContext = _

  override def beforeAll {
    Logger.getRootLogger.setLevel(Level.ERROR)
    sc = FileOutputIT.getNewLocalSparkContext(1, "test")
  }

  override def afterAll {
    sc.stop()
    System.clearProperty("spark.driver.port")
  }

  trait CommonValues {

    val sqlContext = SQLContext.getOrCreate(sc)

    import sqlContext.implicits._

    val time = new Timestamp(DateTime.now.getMillis)

    val data =
      sc.parallelize(Seq(Person("Kevin", 18, time), Person("Kira", 21, time), Person("Ariadne", 26, time))).toDF

    val tmpPath: String = s"/tmp/sparta-test/${UUID.randomUUID().toString}"
  }

  trait WithEventData extends CommonValues {
    val properties = Map("path" -> tmpPath, "createDifferentFiles" -> "false")
    val output = new FileOutput("file-test", properties)
  }

  "FileOutputIT" should "save a dataframe" in new WithEventData {
    output.save(data, SaveModeEnum.Append, Map(Output.TimeDimensionKey -> "minute", Output.TableNameKey -> "person"))

    val source = new java.io.File(tmpPath).listFiles()
    val read = sqlContext.read.json(tmpPath).toDF
    read.count shouldBe(3)
    File("/tmp/sparta-test").deleteRecursively
  }
}

object FileOutputIT {

  def getNewLocalSparkContext(numExecutors: Int = 1, title: String): SparkContext = {
    val conf = new SparkConf().setMaster(s"local[$numExecutors]").setAppName(title)
    SparkContext.getOrCreate(conf)
  }
}

case class Person(name: String, age: Int, minute: Timestamp) extends Serializable 
Example 86
Source File: CubeWriterHelper.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.driver.writer

import java.sql.{Date, Timestamp}

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.driver.factory.SparkContextFactory
import com.stratio.sparta.driver.step.Cube
import com.stratio.sparta.sdk.pipeline.aggregation.cube.{DimensionValue, DimensionValuesTime, MeasuresValues}
import com.stratio.sparta.sdk.pipeline.output.Output
import com.stratio.sparta.sdk.pipeline.schema.TypeOp
import org.apache.spark.sql._
import org.apache.spark.streaming.dstream.DStream

object CubeWriterHelper extends SLF4JLogging {

  def writeCube(cube: Cube, outputs: Seq[Output], stream: DStream[(DimensionValuesTime, MeasuresValues)]): Unit = {
    stream.map { case (dimensionValuesTime, measuresValues) =>
      toRow(cube, dimensionValuesTime, measuresValues)
    }.foreachRDD(rdd => {
      if (!rdd.isEmpty()) {
        val sparkSession = SparkContextFactory.sparkSessionInstance
        val cubeDf = sparkSession.createDataFrame(rdd, cube.schema)
        val extraOptions = Map(Output.TableNameKey -> cube.name)
        val cubeAutoCalculatedFieldsDf = WriterHelper.write(cubeDf, cube.writerOptions, extraOptions, outputs)

        TriggerWriterHelper.writeTriggers(cubeAutoCalculatedFieldsDf, cube.triggers, cube.name, outputs)
      } else log.debug("Empty event received")
    })
  }

  private[driver] def toRow(cube: Cube, dimensionValuesT: DimensionValuesTime, measures: MeasuresValues): Row = {
    val measuresSorted = measuresValuesSorted(measures.values)
    val rowValues = dimensionValuesT.timeConfig match {
      case None =>
        val dimensionValues = dimensionsValuesSorted(dimensionValuesT.dimensionValues)

        dimensionValues ++ measuresSorted
      case Some(timeConfig) =>
        val timeValue = Seq(timeFromDateType(timeConfig.eventTime, cube.dateType))
        val dimFilteredByTime = filterDimensionsByTime(dimensionValuesT.dimensionValues, timeConfig.timeDimension)
        val dimensionValues = dimensionsValuesSorted(dimFilteredByTime) ++ timeValue
        val measuresValuesWithTime = measuresSorted

        dimensionValues ++ measuresValuesWithTime
    }

    Row.fromSeq(rowValues)
  }

  private[driver] def dimensionsValuesSorted(dimensionValues: Seq[DimensionValue]): Seq[Any] =
    dimensionValues.sorted.map(dimVal => dimVal.value)

  private[driver] def measuresValuesSorted(measures: Map[String, Option[Any]]): Seq[Any] =
    measures.toSeq.sortWith(_._1 < _._1).map(measure => measure._2.getOrElse(null))

  private[driver] def filterDimensionsByTime(dimensionValues: Seq[DimensionValue],
                                             timeDimension: String): Seq[DimensionValue] =
    dimensionValues.filter(dimensionValue => dimensionValue.dimension.name != timeDimension)

  private[driver] def timeFromDateType(time: Long, dateType: TypeOp.Value): Any = {
    dateType match {
      case TypeOp.Date | TypeOp.DateTime => new Date(time)
      case TypeOp.Long => time
      case TypeOp.Timestamp => new Timestamp(time)
      case _ => time.toString
    }
  }
} 
Example 87
Source File: CubeMakerTest.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.driver.test.cube

import java.sql.Timestamp

import com.github.nscala_time.time.Imports._
import com.stratio.sparta.driver.step.{Cube, CubeOperations, Trigger}
import com.stratio.sparta.driver.writer.WriterOptions
import com.stratio.sparta.plugin.default.DefaultField
import com.stratio.sparta.plugin.cube.field.datetime.DateTimeField
import com.stratio.sparta.plugin.cube.operator.count.CountOperator
import com.stratio.sparta.sdk.pipeline.aggregation.cube.{Dimension, DimensionValue, DimensionValuesTime, InputFields}
import com.stratio.sparta.sdk.pipeline.schema.TypeOp
import com.stratio.sparta.sdk.utils.AggregationTime
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType, TimestampType}
import org.apache.spark.streaming.TestSuiteBase
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class CubeMakerTest extends TestSuiteBase {

  val PreserverOrder = false

  
  def getEventOutput(timestamp: Timestamp, millis: Long):
  Seq[Seq[(DimensionValuesTime, InputFields)]] = {
    val dimensionString = Dimension("dim1", "eventKey", "identity", new DefaultField)
    val dimensionTime = Dimension("minute", "minute", "minute", new DateTimeField)
    val dimensionValueString1 = DimensionValue(dimensionString, "value1")
    val dimensionValueString2 = dimensionValueString1.copy(value = "value2")
    val dimensionValueString3 = dimensionValueString1.copy(value = "value3")
    val dimensionValueTs = DimensionValue(dimensionTime, timestamp)
    val tsMap = Row(timestamp)
    val valuesMap1 = InputFields(Row("value1", timestamp), 1)
    val valuesMap2 = InputFields(Row("value2", timestamp), 1)
    val valuesMap3 = InputFields(Row("value3", timestamp), 1)

    Seq(Seq(
      (DimensionValuesTime("cubeName", Seq(dimensionValueString1, dimensionValueTs)), valuesMap1),
      (DimensionValuesTime("cubeName", Seq(dimensionValueString2, dimensionValueTs)), valuesMap2),
      (DimensionValuesTime("cubeName", Seq(dimensionValueString3, dimensionValueTs)), valuesMap3)
    ))
  }
} 
Example 88
Source File: AvroOutputIT.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.output.avro

import java.sql.Timestamp
import java.time.Instant

import com.databricks.spark.avro._
import com.stratio.sparta.plugin.TemporalSparkContext
import com.stratio.sparta.sdk.pipeline.output.{Output, SaveModeEnum}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SparkSession}
import org.junit.runner.RunWith
import org.scalatest._
import org.scalatest.junit.JUnitRunner

import scala.reflect.io.File
import scala.util.Random


@RunWith(classOf[JUnitRunner])
class AvroOutputIT extends TemporalSparkContext with Matchers {

  trait CommonValues {
    val tmpPath: String = File.makeTemp().name
    val sparkSession = SparkSession.builder().config(sc.getConf).getOrCreate()
    val schema = StructType(Seq(
      StructField("name", StringType),
      StructField("age", IntegerType),
      StructField("minute", LongType)
    ))

    val data =
      sparkSession.createDataFrame(sc.parallelize(Seq(
        Row("Kevin", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Kira", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Ariadne", Random.nextInt, Timestamp.from(Instant.now).getTime)
      )), schema)
  }

  trait WithEventData extends CommonValues {
    val properties = Map("path" -> tmpPath)
    val output = new AvroOutput("avro-test", properties)
  }


  "AvroOutput" should "throw an exception when path is not present" in {
    an[Exception] should be thrownBy new AvroOutput("avro-test", Map.empty)
  }

  it should "throw an exception when empty path " in {
    an[Exception] should be thrownBy new AvroOutput("avro-test", Map("path" -> "    "))
  }

  it should "save a dataframe " in new WithEventData {
    output.save(data, SaveModeEnum.Append, Map(Output.TableNameKey -> "person"))
    val read = sparkSession.read.avro(s"$tmpPath/person")
    read.count should be(3)
    read should be eq data
    File(tmpPath).deleteRecursively
    File("spark-warehouse").deleteRecursively
  }

} 
Example 89
Source File: CsvOutputIT.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.plugin.output.csv

import java.sql.Timestamp
import java.time.Instant

import com.databricks.spark.avro._
import com.stratio.sparta.plugin.TemporalSparkContext
import com.stratio.sparta.sdk.pipeline.output.{Output, SaveModeEnum}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SparkSession}
import org.junit.runner.RunWith
import org.scalatest._
import org.scalatest.junit.JUnitRunner

import scala.reflect.io.File
import scala.util.Random


@RunWith(classOf[JUnitRunner])
class CsvOutputIT extends TemporalSparkContext with Matchers {

  trait CommonValues {
    val tmpPath: String = File.makeTemp().name
    val sparkSession = SparkSession.builder().config(sc.getConf).getOrCreate()
    val schema = StructType(Seq(
      StructField("name", StringType),
      StructField("age", IntegerType),
      StructField("minute", LongType)
    ))

    val data =
      sparkSession.createDataFrame(sc.parallelize(Seq(
        Row("Kevin", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Kira", Random.nextInt, Timestamp.from(Instant.now).getTime),
        Row("Ariadne", Random.nextInt, Timestamp.from(Instant.now).getTime)
      )), schema)
  }

  trait WithEventData extends CommonValues {
    val properties = Map("path" -> tmpPath)
    val output = new CsvOutput("csv-test", properties)
  }


  "CsvOutput" should "throw an exception when path is not present" in {
    an[Exception] should be thrownBy new CsvOutput("csv-test", Map.empty)
  }

  it should "throw an exception when empty path " in {
    an[Exception] should be thrownBy new CsvOutput("csv-test", Map("path" -> "    "))
  }

  it should "save a dataframe " in new WithEventData {
    output.save(data, SaveModeEnum.Append, Map(Output.TableNameKey -> "person"))
    val read = sparkSession.read.csv(s"$tmpPath/person.csv")
    read.count should be(3)
    read should be eq data
    File(tmpPath).deleteRecursively
    File("spark-warehouse").deleteRecursively
  }

} 
Example 90
Source File: TestJodaTimeVersionedEntityRepository.scala    From slick-repo   with MIT License 5 votes vote down vote up
package com.byteslounge.slickrepo.repository

import java.sql.Timestamp

import com.byteslounge.slickrepo.meta.{Versioned, VersionedEntity}
import org.joda.time.Instant
import slick.ast.BaseTypedType
import com.byteslounge.slickrepo.scalaversion.JdbcProfile
import com.byteslounge.slickrepo.version.JodaTimeVersionImplicits.instantVersionGenerator

case class TestJodaTimeVersionedEntity(override val id: Option[Int], price: Double, override val version: Option[Instant]) extends VersionedEntity[TestJodaTimeVersionedEntity, Int, Instant] {
  def withId(id: Int): TestJodaTimeVersionedEntity = this.copy(id = Some(id))
  def withVersion(version: Instant): TestJodaTimeVersionedEntity = this.copy(version = Some(version))
}

class TestJodaTimeVersionedEntityRepository(override val driver: JdbcProfile) extends VersionedRepository[TestJodaTimeVersionedEntity, Int, Instant](driver) {

  import driver.api._

  implicit val jodaTimeInstantToSqlTimestampMapper = MappedColumnType.base[Instant, Timestamp](
    { instant => new java.sql.Timestamp(instant.getMillis) },
    { sqlTimestamp => new Instant(sqlTimestamp.getTime) })

  val pkType = implicitly[BaseTypedType[Int]]
  val versionType = implicitly[BaseTypedType[Instant]]
  val tableQuery = TableQuery[TestJodaTimeVersionedEntities]
  type TableType = TestJodaTimeVersionedEntities

  class TestJodaTimeVersionedEntities(tag: slick.lifted.Tag) extends Table[TestJodaTimeVersionedEntity](tag, "TJTV_ENTITY") with Versioned[Int, Instant] {
    def id = column[Int]("ID", O.PrimaryKey)
    def price = column[Double]("PRICE")
    def version = column[Instant]("VERSION")

    def * = (id.?, price, version.?) <> ((TestJodaTimeVersionedEntity.apply _).tupled, TestJodaTimeVersionedEntity.unapply)
  }

} 
Example 91
Source File: PredicatePushdownSuite.scala    From spark-exasol-connector   with Apache License 2.0 5 votes vote down vote up
package com.exasol.spark

import java.sql.Timestamp

import org.apache.spark.sql.functions.col

import com.holdenkarau.spark.testing.DataFrameSuiteBase
import org.scalatest.funsuite.AnyFunSuite


class PredicatePushdownSuite extends AnyFunSuite with BaseDockerSuite with DataFrameSuiteBase {

  test("with where clause build from filters: filter") {
    createDummyTable()

    import spark.implicits._

    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()
      .filter($"id" < 3)
      .filter(col("city").like("Ber%"))
      .select("id", "city")

    val result = df.collect().map(x => (x.getLong(0), x.getString(1))).toSet
    assert(result.size === 1)
    assert(result === Set((1, "Berlin")))
  }

  test("with where clause build from filters: createTempView and spark.sql") {
    createDummyTable()

    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()

    df.createOrReplaceTempView("myTable")

    val myDF = spark
      .sql("SELECT id, city FROM myTable WHERE id BETWEEN 1 AND 3 AND name < 'Japan'")

    val result = myDF.collect().map(x => (x.getLong(0), x.getString(1))).toSet
    assert(result.size === 2)
    assert(result === Set((1, "Berlin"), (2, "Paris")))
  }

  test("date and timestamp should be read and filtered correctly") {
    import java.sql.Date

    createDummyTable()
    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT date_info, updated_at FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()
    val minTimestamp = Timestamp.valueOf("2017-12-30 00:00:00.0000")
    val testDate = Date.valueOf("2017-12-31")

    val resultDate = df.collect().map(_.getDate(0))
    assert(resultDate.contains(testDate))

    val resultTimestamp = df.collect().map(_.getTimestamp(1)).map(x => x.after(minTimestamp))
    assert(!resultTimestamp.contains(false))

    val filteredByDateDF = df.filter(col("date_info") === testDate)
    assert(filteredByDateDF.count() === 1)

    val filteredByTimestampDF = df.filter(col("updated_at") < minTimestamp)
    assert(filteredByTimestampDF.count() === 0)
  }

  test("count should be performed successfully") {
    createDummyTable()
    val df = spark.read
      .format("exasol")
      .option("host", container.host)
      .option("port", s"${container.port}")
      .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE")
      .load()
    val result = df.count()
    assert(result === 3)
  }
} 
Example 92
Source File: StructuredNetworkWordCountWindowed.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println 
Example 93
Source File: XGBoostBigModelTimeSeries.scala    From uberdata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import java.sql.Timestamp

import eleflow.uberdata.IUberdataForecastUtil
import eleflow.uberdata.core.data.DataTransformer
import eleflow.uberdata.enums.SupportedAlgorithm
import ml.dmlc.xgboost4j.scala.spark.XGBoostModel
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.linalg.{VectorUDT, Vector => SparkVector}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared.HasTimeCol
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.{StructField, _}


class XGBoostBigModelTimeSeries[I](override val uid: String,
                                   override val models: Seq[(ParamMap, XGBoostModel)])
                                  extends XGBoostBigModel[I](uid, models) with HasTimeCol{

  def setTimecol(time: String): this.type = set(timeCol, Some(time))

  override def transform(dataSet: Dataset[_]): DataFrame = {
    val prediction = predict(dataSet)
    val rows = dataSet.rdd
      .map {
        case (row: Row) =>
          (DataTransformer.toFloat(row.getAs($(idCol))),
            (row.getAs[SparkVector](IUberdataForecastUtil.FEATURES_COL_NAME),
              row.getAs[java.sql.Timestamp]($(timeCol).get)))
      }
      .join(prediction)
      .map {
        case (id, ((features, time), predictValue)) =>
          Row(id, features, time, SupportedAlgorithm.XGBoostAlgorithm.toString, predictValue)
      }
    dataSet.sqlContext.createDataFrame(rows, transformSchema(dataSet.schema))
  }


  @DeveloperApi
  override def transformSchema(schema: StructType): StructType =
    StructType(Array(
      StructField($(idCol), FloatType),
      StructField(IUberdataForecastUtil.FEATURES_COL_NAME, new VectorUDT),
      StructField($(timeCol).get, TimestampType),
      StructField(IUberdataForecastUtil.ALGORITHM, StringType),
      StructField("prediction", FloatType)
    ) )
} 
Example 94
Source File: MergeProjection.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.mutation.merge

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.{CarbonDatasourceHadoopRelation, Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, GenericRowWithSchema, InterpretedMutableProjection, Projection}
import org.apache.spark.sql.catalyst.util.DateTimeUtils


case class MergeProjection(
    @transient tableCols: Seq[String],
    @transient statusCol : String,
    @transient ds: Dataset[Row],
    @transient rltn: CarbonDatasourceHadoopRelation,
    @transient sparkSession: SparkSession,
    @transient mergeAction: MergeAction) {

  private val cutOffDate = Integer.MAX_VALUE >> 1

  val isUpdate = mergeAction.isInstanceOf[UpdateAction]
  val isDelete = mergeAction.isInstanceOf[DeleteAction]

  def apply(row: GenericRowWithSchema): InternalRow = {
    // TODO we can avoid these multiple conversions if this is added as a SparkPlan node.
    val values = row.values.map {
      case s: String => org.apache.spark.unsafe.types.UTF8String.fromString(s)
      case d: java.math.BigDecimal => org.apache.spark.sql.types.Decimal.apply(d)
      case b: Array[Byte] => org.apache.spark.unsafe.types.UTF8String.fromBytes(b)
      case d: Date => DateTimeUtils.fromJavaDate(d)
      case t: Timestamp => DateTimeUtils.fromJavaTimestamp(t)
      case value => value
    }

    projection(new GenericInternalRow(values)).asInstanceOf[GenericInternalRow]
  }

  val (projection, output) = generateProjection

  private def generateProjection: (Projection, Array[Expression]) = {
    val existingDsOutput = rltn.carbonRelation.schema.toAttributes
    val colsMap = mergeAction match {
      case UpdateAction(updateMap) => updateMap
      case InsertAction(insertMap) => insertMap
      case _ => null
    }
    if (colsMap != null) {
      val output = new Array[Expression](tableCols.length)
      val expecOutput = new Array[Expression](tableCols.length)
      colsMap.foreach { case (k, v) =>
        val tableIndex = tableCols.indexOf(k.toString().toLowerCase)
        if (tableIndex < 0) {
          throw new CarbonMergeDataSetException(s"Mapping is wrong $colsMap")
        }
        output(tableIndex) = v.expr.transform {
          case a: Attribute if !a.resolved =>
            ds.queryExecution.analyzed.resolveQuoted(a.name,
              sparkSession.sessionState.analyzer.resolver).get
        }
        expecOutput(tableIndex) =
          existingDsOutput.find(_.name.equalsIgnoreCase(tableCols(tableIndex))).get
      }
      if (output.contains(null)) {
        throw new CarbonMergeDataSetException(s"Not all columns are mapped")
      }
      (new InterpretedMutableProjection(output++Seq(
        ds.queryExecution.analyzed.resolveQuoted(statusCol,
        sparkSession.sessionState.analyzer.resolver).get),
        ds.queryExecution.analyzed.output), expecOutput)
    } else {
      (null, null)
    }
  }
} 
Example 95
Source File: TimestampDataTypeNullDataTest.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.testsuite.directdictionary

import java.io.File
import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.TimeStampGranularityConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest


class TimestampDataTypeNullDataTest extends QueryTest with BeforeAndAfterAll {
  var hiveContext: HiveContext = _

  override def beforeAll {
    try {
      CarbonProperties.getInstance()
        .addProperty(TimeStampGranularityConstants.CARBON_CUTOFF_TIMESTAMP, "2000-12-13 02:10.00.0")
      CarbonProperties.getInstance()
        .addProperty(TimeStampGranularityConstants.CARBON_TIME_GRANULARITY,
          TimeStampGranularityConstants.TIME_GRAN_SEC.toString
        )
      sql(
        """CREATE TABLE IF NOT EXISTS timestampTyeNullData
                     (ID Int, dateField Timestamp, country String,
                     name String, phonetype String, serialname String, salary Int)
                    STORED AS carbondata"""
      )

      CarbonProperties.getInstance()
        .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
      val csvFilePath = s"$resourcesPath/datasamplenull.csv"
      sql("LOAD DATA LOCAL INPATH '" + csvFilePath + "' INTO TABLE timestampTyeNullData").collect();

    } catch {
      case x: Throwable => CarbonProperties.getInstance()
        .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
          CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
    }
  }

  test("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null") {
    checkAnswer(
      sql("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null"),
      Seq(Row(Timestamp.valueOf("2015-07-23 00:00:00.0"))
      )
    )
  }
  test("SELECT * FROM timestampTyeNullData where dateField is null") {
    checkAnswer(
      sql("SELECT dateField FROM timestampTyeNullData where dateField is null"),
      Seq(Row(null)
      ))
  }

  override def afterAll {
    sql("drop table timestampTyeNullData")
    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
        CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
    CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false")
  }

} 
Example 96
Source File: TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.testsuite.directdictionary

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.TimeStampGranularityConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest


class TimestampDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with BeforeAndAfterAll {
  var hiveContext: HiveContext = _

  override def beforeAll {
    CarbonProperties.getInstance()
      .addProperty(TimeStampGranularityConstants.CARBON_CUTOFF_TIMESTAMP, "2000-12-13 02:10.00.0")
    CarbonProperties.getInstance()
      .addProperty(TimeStampGranularityConstants.CARBON_TIME_GRANULARITY,
        TimeStampGranularityConstants.TIME_GRAN_SEC.toString
      )
    CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true")
    sql(
      """
         CREATE TABLE IF NOT EXISTS directDictionaryTable
        (empno String, doj Timestamp, salary Int)
         STORED AS carbondata"""
    )
    val csvFilePath = s"$resourcesPath/datasample.csv"
    sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS"
        + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')")
  }

  test("select doj from directDictionaryTable") {
    checkAnswer(
      sql("select doj from directDictionaryTable"),
      Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09.0")),
        Row(Timestamp.valueOf("2016-04-14 15:00:09.0")),
        Row(null)
      )
    )
  }


  test("select doj from directDictionaryTable with equals filter") {
    checkAnswer(
      sql("select doj from directDictionaryTable where doj='2016-03-14 15:00:09'"),
      Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09")))
    )

  }

  test("select doj from directDictionaryTable with greater than filter") {
    checkAnswer(
      sql("select doj from directDictionaryTable where doj>'2016-03-14 15:00:09'"),
      Seq(Row(Timestamp.valueOf("2016-04-14 15:00:09")))
    )

  }


  override def afterAll {
    sql("drop table directDictionaryTable")
    CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false")
  }
} 
Example 97
Source File: TimestampNoDictionaryColumnTestCase.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.testsuite.directdictionary

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest


class TimestampNoDictionaryColumnTestCase extends QueryTest with BeforeAndAfterAll {

  override def beforeAll {
    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")

    sql("drop table if exists timestamp_nodictionary")
    sql(
      """
         CREATE TABLE IF NOT EXISTS timestamp_nodictionary
        (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
        workgroupcategoryname String,
         projectcode int, projectjoindate Timestamp, projectenddate Timestamp, attendance int,
         utilization int, salary Int) STORED AS carbondata"""
    )

    val csvFilePath = s"$resourcesPath/data_beyond68yrs.csv"
    sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE timestamp_nodictionary OPTIONS"
        + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')")
  }

  test("select projectjoindate, projectenddate from timestamp_nodictionary") {
    checkAnswer(
      sql("select projectjoindate, projectenddate from timestamp_nodictionary"),
      Seq(Row(Timestamp.valueOf("2000-01-29 00:00:00.0"), Timestamp.valueOf("2016-06-29 00:00:00.0")),
        Row(Timestamp.valueOf("1800-02-17 00:00:00.0"), Timestamp.valueOf("1900-11-29 00:00:00.0")),
        Row(null, Timestamp.valueOf("2016-05-29 00:00:00.0")),
        Row(null, Timestamp.valueOf("2016-11-30 00:00:00.0")),
        Row(Timestamp.valueOf("3000-10-22 00:00:00.0"), Timestamp.valueOf("3002-11-15 00:00:00.0")),
        Row(Timestamp.valueOf("1802-06-29 00:00:00.0"), Timestamp.valueOf("1902-12-30 00:00:00.0")),
        Row(null, Timestamp.valueOf("2016-12-30 00:00:00.0")),
        Row(Timestamp.valueOf("2038-11-14 00:00:00.0"), Timestamp.valueOf("2041-12-29 00:00:00.0")),
        Row(null, null),
        Row(Timestamp.valueOf("2014-09-15 00:00:00.0"), Timestamp.valueOf("2016-05-29 00:00:00.0"))
      )
    )
  }


  test("select projectjoindate, projectenddate from timestamp_nodictionary where in filter") {
    checkAnswer(
      sql("select projectjoindate, projectenddate from timestamp_nodictionary where projectjoindate in" +
          "('1800-02-17 00:00:00','3000-10-22 00:00:00') or projectenddate in ('1900-11-29 00:00:00'," +
          "'3002-11-15 00:00:00','2041-12-29 00:00:00')"),
      Seq(Row(Timestamp.valueOf("1800-02-17 00:00:00.0"), Timestamp.valueOf("1900-11-29 00:00:00.0")),
        Row(Timestamp.valueOf("3000-10-22 00:00:00.0"), Timestamp.valueOf("3002-11-15 00:00:00.0")),
        Row(Timestamp.valueOf("2038-11-14 00:00:00.0"), Timestamp.valueOf("2041-12-29 00:00:00.0")))
    )

  }


  override def afterAll {
    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
        CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
    sql("drop table timestamp_nodictionary")
  }
} 
Example 98
Source File: Commons.scala    From spark-structured-streaming   with MIT License 5 votes vote down vote up
package com.kafkaToSparkToCass


import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}

object Commons {

  case class UserEvent(user_id: String, time: Timestamp, event: String)
      extends Serializable

  def getTimeStamp(timeStr: String): Timestamp = {
    val dateFormat1: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val dateFormat2: DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")

    val date: Option[Timestamp] = {
      try {
        Some(new Timestamp(dateFormat1.parse(timeStr).getTime))
      } catch {
        case e: java.text.ParseException =>
          Some(new Timestamp(dateFormat2.parse(timeStr).getTime))
      }
    }
    date.getOrElse(Timestamp.valueOf(timeStr))
  }

} 
Example 99
Source File: Statements.scala    From spark-structured-streaming   with MIT License 5 votes vote down vote up
package com.kafkaToSparkToCass

import java.sql.Timestamp

import com.datastax.driver.core.Session

object Statements extends Serializable {

  def cql(id: String, time: Timestamp, ename: String): String = s"""
       insert into my_keyspace.test_table (user_id,time,event)
       values('$id', '$time', '$ename event')"""

  def createKeySpaceAndTable(session: Session, dropTable: Boolean = false) = {
    session.execute(
      """CREATE KEYSPACE  if not exists  my_keyspace WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };""")
    if (dropTable)
      session.execute("""drop table if exists my_keyspace.test_table""")

    session.execute(
      """create table if not exists my_keyspace.test_table ( user_id  text, time timestamp, event text, primary key((user_id), time) ) WITH CLUSTERING ORDER BY (time DESC)""")
  }
} 
Example 100
Source File: database.scala    From franklin   with Apache License 2.0 5 votes vote down vote up
package com.azavea.franklin

import cats.implicits._
import com.azavea.stac4s.TemporalExtent
import doobie.implicits.javasql._
import doobie.util.meta.Meta
import doobie.util.{Read, Write}
import io.circe.{Decoder, Encoder}

import java.sql.Timestamp
import java.time.Instant

package object database extends CirceJsonbMeta with GeotrellisWktMeta with Filterables {

  implicit val instantMeta: Meta[Instant]   = Meta[Timestamp].imap(_.toInstant)(Timestamp.from)
  implicit val instantRead: Read[Instant]   = Read[Timestamp].imap(_.toInstant)(Timestamp.from)
  implicit val instantWrite: Write[Instant] = Write[Timestamp].imap(_.toInstant)(Timestamp.from)

  def stringToInstant: String => Either[Throwable, Instant] =
    (s: String) => Either.catchNonFatal(Instant.parse(s))

  def temporalExtentToString(te: TemporalExtent): String = {
    te.value match {
      case Some(start) :: Some(end) :: _ if start != end => s"${start.toString}/${end.toString}"
      case Some(start) :: Some(end) :: _ if start == end => s"${start.toString}"
      case Some(start) :: None :: _                      => s"${start.toString}/.."
      case None :: Some(end) :: _                        => s"../${end.toString}"
    }
  }

  def temporalExtentFromString(str: String): Either[String, TemporalExtent] = {
    str.split("/").toList match {
      case ".." :: endString :: _ =>
        val parsedEnd: Either[Throwable, Instant] = stringToInstant(endString)
        parsedEnd match {
          case Left(_)             => Left(s"Could not decode instant: $str")
          case Right(end: Instant) => Right(TemporalExtent(None, end))
        }
      case startString :: ".." :: _ =>
        val parsedStart: Either[Throwable, Instant] = stringToInstant(startString)
        parsedStart match {
          case Left(_)               => Left(s"Could not decode instant: $str")
          case Right(start: Instant) => Right(TemporalExtent(start, None))
        }
      case startString :: endString :: _ =>
        val parsedStart: Either[Throwable, Instant] = stringToInstant(startString)
        val parsedEnd: Either[Throwable, Instant]   = stringToInstant(endString)
        (parsedStart, parsedEnd).tupled match {
          case Left(_)                               => Left(s"Could not decode instant: $str")
          case Right((start: Instant, end: Instant)) => Right(TemporalExtent(start, end))
        }
      case _ =>
        Either.catchNonFatal(Instant.parse(str)) match {
          case Left(_)           => Left(s"Could not decode instant: $str")
          case Right(t: Instant) => Right(TemporalExtent(t, t))
        }
    }
  }

  implicit val encoderTemporalExtent: Encoder[TemporalExtent] =
    Encoder.encodeString.contramap[TemporalExtent] { extent => temporalExtentToString(extent) }

  implicit val decoderTemporalExtent: Decoder[TemporalExtent] = Decoder.decodeString.emap { str =>
    temporalExtentFromString(str)
  }
} 
Example 101
Source File: MQTTStreamWordCount.scala    From bahir   with Apache License 2.0 5 votes vote down vote up
package org.apache.bahir.examples.sql.streaming.mqtt

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession


object MQTTStreamWordCount  {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.err.println("Usage: MQTTStreamWordCount <brokerUrl> <topic>") // scalastyle:off println
      System.exit(1)
    }

    val brokerUrl = args(0)
    val topic = args(1)

    val spark = SparkSession
      .builder
      .appName("MQTTStreamWordCount")
      .master("local[4]")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to mqtt server
    val lines = spark.readStream
      .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider")
      .option("topic", topic).option("persistence", "memory")
      .load(brokerUrl).selectExpr("CAST(payload AS STRING)").as[String]

    // Split the lines into words
    val words = lines.flatMap(_.split(" "))

    // Generate running word count
    val wordCounts = words.groupBy("value").count()

    // Start running the query that prints the running counts to the console
    val query = wordCounts.writeStream
      .outputMode("complete")
      .format("console")
      .start()

    query.awaitTermination()
  }
} 
Example 102
Source File: AkkaStreamWordCount.scala    From bahir   with Apache License 2.0 5 votes vote down vote up
package org.apache.bahir.examples.sql.streaming.akka

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession


object AkkaStreamWordCount {
  def main(args: Array[String]): Unit = {
    if (args.length < 1) {
      System.err.println("Usage: AkkaStreamWordCount <urlOfPublisher>") // scalastyle:off println
      System.exit(1)
    }

    val urlOfPublisher = args(0)

    val spark = SparkSession
                .builder()
                .appName("AkkaStreamWordCount")
                .master("local[4]")
                .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection
    // to publisher or feeder actor
    val lines = spark.readStream
                .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider")
                .option("urlOfPublisher", urlOfPublisher)
                .load().as[(String, Timestamp)]

    // Split the lines into words
    val words = lines.map(_._1).flatMap(_.split(" "))

    // Generate running word count
    val wordCounts = words.groupBy("value").count()

    // Start running the query that prints the running counts to the console
    val query = wordCounts.writeStream
                .outputMode("complete")
                .format("console")
                .start()

    query.awaitTermination()
  }
} 
Example 103
Source File: NetezzaFilters.scala    From spark-netezza   with Apache License 2.0 5 votes vote down vote up
package com.ibm.spark.netezza

import java.sql.{Date, Timestamp}

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.sources._


  def generateFilterExpr(f: Filter): Option[String] = {
    Option(f match {
      case EqualTo(attr, value) => s"$attr = ${quoteValue(value)}"
      case EqualNullSafe(attr, value) =>
        s"(NOT ($attr != ${quoteValue(value)} OR $attr IS NULL OR " +
          s"${quoteValue(value)} IS NULL) OR ($attr IS NULL AND ${quoteValue(value)} IS NULL))"
      case LessThan(attr, value) => s"$attr < ${quoteValue(value)}"
      case GreaterThan(attr, value) => s"$attr > ${quoteValue(value)}"
      case LessThanOrEqual(attr, value) => s"$attr <= ${quoteValue(value)}"
      case GreaterThanOrEqual(attr, value) => s"$attr >= ${quoteValue(value)}"
      case IsNull(attr) => s"$attr IS NULL"
      case IsNotNull(attr) => s"$attr IS NOT NULL"
      case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'"
      case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'"
      case StringContains(attr, value) => s"${attr} LIKE '%${value}%'"
      case In(attr, value) => s"$attr IN (${quoteValue(value)})"
      case Not(f) => generateFilterExpr(f).map(p => s"(NOT ($p))").getOrElse(null)
      case Or(f1, f2) =>
        val or = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (or.size == 2) {
          or.map(p => s"($p)").mkString(" OR ")
        } else {
          null
        }
      case And(f1, f2) =>
        val and = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (and.size == 2) {
          and.map(p => s"($p)").mkString(" AND ")
        } else {
          null
        }
      case _ => null
    })
  }
} 
Example 104
Source File: DefaultDatabaseOperationsTest.scala    From Conseil   with Apache License 2.0 5 votes vote down vote up
package tech.cryptonomic.conseil.api.sql

import java.sql.Timestamp
import java.time.LocalDateTime

import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{Matchers, WordSpec}
import slick.jdbc.PostgresProfile.api._
import tech.cryptonomic.conseil.api.TezosInMemoryDatabaseSetup
import tech.cryptonomic.conseil.api.sql.DefaultDatabaseOperations._
import tech.cryptonomic.conseil.common.testkit.InMemoryDatabase
import tech.cryptonomic.conseil.common.tezos.Tables
import tech.cryptonomic.conseil.common.tezos.Tables.FeesRow

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.language.postfixOps

class DefaultDatabaseOperationsTest
    extends WordSpec
    with Matchers
    with InMemoryDatabase
    with TezosInMemoryDatabaseSetup
    with ScalaFutures {

  "The default database operations" should {
      val fees: List[FeesRow] = List.tabulate(5) { i =>
        FeesRow(
          1 + i,
          3 + i,
          5 + i,
          Timestamp.valueOf(LocalDateTime.of(2018, 11, 22, 12, 30)),
          s"$i-example",
          None,
          None
        )
      }

      "count distinct elements in column properly" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(countDistinct("tezos", "fees", "timestamp")).futureValue shouldBe 1
        dbHandler.run(countDistinct("tezos", "fees", "low")).futureValue shouldBe 5
      }

      "select distinct elements from column properly" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(selectDistinct("tezos", "fees", "timestamp")).futureValue shouldBe List(
          "2018-11-22 12:30:00"
        )
        dbHandler.run(selectDistinct("tezos", "fees", "low")).futureValue should contain theSameElementsAs List(
          "1",
          "2",
          "3",
          "4",
          "5"
        )
      }

      "select distinct elements from column with 'like' properly" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(selectDistinctLike("tezos", "fees", "kind", "1-")).futureValue shouldBe List(
          "1-example"
        )
      }
    }
} 
Example 105
Source File: DefaultDatabaseOperationsTest.scala    From Conseil   with Apache License 2.0 5 votes vote down vote up
package tech.cryptonomic.conseil.indexer.sql

import java.sql.Timestamp
import java.time.LocalDateTime

import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{Matchers, WordSpec}
import slick.jdbc.PostgresProfile.api._
import tech.cryptonomic.conseil.common.testkit.InMemoryDatabase
import tech.cryptonomic.conseil.common.tezos.Tables
import tech.cryptonomic.conseil.common.tezos.Tables.{Fees, FeesRow}
import tech.cryptonomic.conseil.indexer.sql.DefaultDatabaseOperations._
import tech.cryptonomic.conseil.indexer.tezos.TezosInMemoryDatabaseSetup

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.language.postfixOps

class DefaultDatabaseOperationsTest
    extends WordSpec
    with Matchers
    with InMemoryDatabase
    with TezosInMemoryDatabaseSetup
    with ScalaFutures {

  "The default database operations" should {
      val fees: List[FeesRow] = List.tabulate(5) { i =>
        FeesRow(
          1 + i,
          3 + i,
          5 + i,
          Timestamp.valueOf(LocalDateTime.of(2018, 11, 22, 12, 30)),
          s"$i-example",
          None,
          None
        )
      }

      "insert data when table is empty" in {
        dbHandler.run(insertWhenEmpty[Fees](Tables.Fees, fees)).futureValue shouldBe Some(5)
      }

      "do not insert data when table is not empty" in {
        dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true
        dbHandler.run(insertWhenEmpty[Fees](Tables.Fees, fees)).futureValue.value shouldBe Some(0)
      }
    }
} 
Example 106
Source File: CustomerSerializers.scala    From quiz-management-service   with Apache License 2.0 5 votes vote down vote up
package com.danielasfregola.quiz.management.serializers

import java.sql.Timestamp

import org.json4s.CustomSerializer
import org.json4s.JsonAST.{JInt, JNull}

object CustomSerializers {
  val all = List(CustomTimestampSerializer)
}

case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format =>
  ({
    case JInt(x) => new Timestamp(x.longValue * 1000)
    case JNull => null
  },
    {
      case date: Timestamp => JInt(date.getTime / 1000)
    })) 
Example 107
Source File: CustomerSerializers.scala    From quiz-management-service   with Apache License 2.0 5 votes vote down vote up
package com.danielasfregola.quiz.management.serializers

import java.sql.Timestamp

import org.json4s.CustomSerializer
import org.json4s.JsonAST.{JInt, JNull}

object CustomSerializers {
  val all = List(CustomTimestampSerializer)
}

case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format =>
  ({
    case JInt(x) => new Timestamp(x.longValue * 1000)
    case JNull => null
  },
    {
      case date: Timestamp => JInt(date.getTime / 1000)
    })) 
Example 108
Source File: CustomerSerializers.scala    From quiz-management-service   with Apache License 2.0 5 votes vote down vote up
package com.danielasfregola.quiz.management.serializers

import java.sql.Timestamp

import org.json4s.CustomSerializer
import org.json4s.JsonAST.{JInt, JNull}

object CustomSerializers {
  val all = List(CustomTimestampSerializer)
}

case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format =>
  ({
    case JInt(x) => new Timestamp(x.longValue * 1000)
    case JNull => null
  },
    {
      case date: Timestamp => JInt(date.getTime / 1000)
    })) 
Example 109
Source File: DataFrameExtensions.scala    From spark-powerbi-connector   with Apache License 2.0 5 votes vote down vote up
package com.microsoft.azure.powerbi.extensions

import java.sql.Timestamp
import java.util.Date

import scala.collection.mutable.ListBuffer

import com.microsoft.azure.powerbi.authentication.PowerBIAuthentication
import com.microsoft.azure.powerbi.common.PowerBIUtils
import com.microsoft.azure.powerbi.models.{table, PowerBIDatasetDetails}

import org.apache.spark.sql.DataFrame

object DataFrameExtensions {

  implicit def PowerBIDataFrame(dataFrame: DataFrame): PowerBIDataFrame =
    new PowerBIDataFrame(dataFrame: DataFrame)

  class PowerBIDataFrame(dataFrame: DataFrame) extends Serializable{

    def toPowerBI(powerbiDatasetDetails: PowerBIDatasetDetails, powerbiTable: table,
                  powerBIAuthentication: PowerBIAuthentication): Unit = {

      var authenticationToken: String = powerBIAuthentication.getAccessToken

      dataFrame.foreachPartition { partition =>

        // PowerBI row limit in single request is 10,000. We limit it to 1000.

        partition.grouped(1000).foreach {
          group => {
            val powerbiRowListBuffer: ListBuffer[Map[String, Any]] = ListBuffer[Map[String, Any]]()
            group.foreach {
              record => {
                var powerbiRow: Map[String, Any] = Map[String, Any]()

                for (i <- 0 until record.length) {
                  powerbiRow += (powerbiTable.columns(i).name -> record(i))
                }

                powerbiRowListBuffer += powerbiRow
              }

              var attemptCount = 0
              var pushSuccessful = false

              while (!pushSuccessful && attemptCount < this.retryCount) {
                try {

                    PowerBIUtils.addMultipleRows(powerbiDatasetDetails, powerbiTable,
                      powerbiRowListBuffer, authenticationToken)
                    pushSuccessful = true
                }
                catch {
                  case e: Exception =>
                    println(f"Exception inserting multiple rows: ${e.getMessage}")
                    Thread.sleep(secondsBetweenRetry * 1000)
                    attemptCount += 1

                    authenticationToken = powerBIAuthentication.refreshAccessToken
                }
              }
            }
          }
        }
      }
    }

    def countTimelineToPowerBI(powerbiDatasetDetails: PowerBIDatasetDetails, powerbiTable: table,
                               powerBIAuthentication: PowerBIAuthentication): Unit = {

      var authenticationToken: String = powerBIAuthentication.getAccessToken
      val currentTimestamp = new Timestamp(new Date().getTime)

      val powerbiRow = Map(powerbiTable.columns.head.name -> currentTimestamp,
        powerbiTable.columns(1).name -> dataFrame.count())

      var attemptCount = 0
      var pushSuccessful = false

      while (!pushSuccessful && attemptCount < this.retryCount) {
        try {
          PowerBIUtils.addRow(powerbiDatasetDetails, powerbiTable, powerbiRow, authenticationToken)
          pushSuccessful = true
        }
        catch {
          case e: Exception => println("Exception inserting row: " + e.getMessage)
            Thread.sleep(secondsBetweenRetry * 1000)
            attemptCount += 1

            authenticationToken = powerBIAuthentication.refreshAccessToken
        }
      }
    }

    private val retryCount: Int = 3
    private val secondsBetweenRetry: Int = 1
  }
} 
Example 110
Source File: RepositoryMetadata.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.pretrained

import java.sql.Timestamp


case class RepositoryMetadata
(
  // Path to repository metadata file
  metadataFile: String,
  // Path to repository folder
  repoFolder: String,
  // Aws file metadata.json version
  version: String,
  // Last time metadata was downloaded
  lastMetadataDownloaded: Timestamp,
  // List of all available resources in repository
  metadata: List[ResourceMetadata]
) 
Example 111
Source File: TrainingHelper.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.util

import java.io.File
import java.nio.file.{Files, Paths, StandardCopyOption}
import java.sql.Timestamp
import java.util.Date

import com.johnsnowlabs.nlp.pretrained.ResourceType.ResourceType
import com.johnsnowlabs.nlp.pretrained.{ResourceMetadata, ResourceType}
import org.apache.commons.io.FileUtils
import org.apache.spark.ml.util.MLWriter


object TrainingHelper {

  def saveModel(name: String,
                language: Option[String],
                libVersion: Option[Version],
                sparkVersion: Option[Version],
                modelWriter: MLWriter,
                folder: String,
                category: Option[ResourceType] = Some(ResourceType.NOT_DEFINED)
               ): Unit = {

    // 1. Get current timestamp
    val timestamp = new Timestamp(new Date().getTime)


    // 2. Save model to file
    val file = Paths.get(folder, timestamp.toString).toString.replaceAllLiterally("\\", "/")
    modelWriter.save(file)

    // 3. Zip file
    val tempzipFile = Paths.get(folder, timestamp + ".zip")
    ZipArchiveUtil.zip(file, tempzipFile.toString)

    // 4. Set checksum
    val checksum = FileHelper.generateChecksum(tempzipFile.toString)

    // 5. Create resource metadata
    val meta = new ResourceMetadata(name, language, libVersion, sparkVersion, true, timestamp, true, category = category, checksum)

    val zipfile = Paths.get(meta.fileName)

    // 6. Move the zip
    Files.move(tempzipFile, zipfile, StandardCopyOption.REPLACE_EXISTING)

    // 7. Remove original file
    try {
      FileUtils.deleteDirectory(new File(file))
    } catch {
      case _: java.io.IOException => //file lock may prevent deletion, ignore and continue
    }

      // 6. Add to metadata.json info about resource
      val metadataFile = Paths.get(folder, "metadata.json").toString
      ResourceMetadata.addMetadataToFile(metadataFile, meta)
    }
} 
Example 112
Source File: CloudTestResources.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.pretrained

import java.sql.Timestamp
import com.johnsnowlabs.util.Version


object CloudTestResources {
  val name_en_123_345_new = new ResourceMetadata(
    "name",
    Some("en"),
    Some(Version(1, 2, 3)),
    Some(Version(3, 4, 5)),
    true,
    new Timestamp(50)
  )

  val name_en_12_34_old = new ResourceMetadata(
    "name",
    Some("en"),
    Some(Version(1, 2)),
    Some(Version(3, 4)),
    true,
    new Timestamp(1)
  )

  val name_en_old = new ResourceMetadata(
    "name",
    Some("en"),
    None,
    None,
    true,
    new Timestamp(1)
  )

  val name_en_new_disabled = new ResourceMetadata(
    "name",
    Some("en"),
    None,
    None,
    false,
    new Timestamp(1)
  )

  val name_de = new ResourceMetadata(
    "name",
    Some("de"),
    None,
    None,
    true,
    new Timestamp(1)
  )

  val all = List(name_en_123_345_new, name_en_12_34_old, name_en_old, name_en_new_disabled, name_de)
} 
Example 113
Source File: ResourceDownloaderSpec.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.pretrained

import java.sql.Timestamp
import com.johnsnowlabs.util.Version
import org.scalatest.FlatSpec


class ResourceDownloaderSpec extends FlatSpec {
  val b = CloudTestResources

  "CloudResourceMetadata" should "serialize and deserialize correctly" in {
    val resource = new ResourceMetadata("name",
      Some("en"),
      Some(Version(1,2,3)),
      Some(Version(5,4,3)),
      true,
      new Timestamp(123213))

    val json = ResourceMetadata.toJson(resource)
    val deserialized = ResourceMetadata.parseJson(json)

    assert(deserialized == resource)
  }

  "CloudResourceDownloader" should "choose the newest versions" in {
    val found = ResourceMetadata.resolveResource(b.all, ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5)))

    assert(found.isDefined)
    assert(found.get == b.name_en_123_345_new)
  }

  "CloudResourceDownloader" should "filter disabled resources" in {
    val found = ResourceMetadata.resolveResource(List(b.name_en_new_disabled), ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5)))

    assert(found.isEmpty)
  }

  "CloudResourceDownloader" should "filter language and allow empty versions" in {
    val found = ResourceMetadata.resolveResource(List(b.name_en_old, b.name_de), ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5)))

    assert(found.isDefined)
    assert(found.get == b.name_en_old)
  }
} 
Example 114
Source File: TimeBasedDataService.scala    From kafka-jdbc-connector   with Apache License 2.0 5 votes vote down vote up
package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp}
import java.util.{Date, GregorianCalendar, TimeZone}

import com.agoda.kafka.connector.jdbc.JdbcSourceConnectorConstants
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct.{MsSQL, MySQL}
import com.agoda.kafka.connector.jdbc.models.Mode.TimestampMode
import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.Try


case class TimeBasedDataService(databaseProduct: DatabaseProduct,
                                storedProcedureName: String,
                                batchSize: Int,
                                batchSizeVariableName: String,
                                timestampVariableName: String,
                                var timestampOffset: Long,
                                timestampFieldName: String,
                                topic: String,
                                keyFieldOpt: Option[String],
                                dataConverter: DataConverter,
                                calendar: GregorianCalendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"))
                               ) extends DataService {

  override def createPreparedStatement(connection: Connection): Try[PreparedStatement] = Try {
    val preparedStatement = databaseProduct match {
      case MsSQL => connection.prepareStatement(s"EXECUTE $storedProcedureName @$timestampVariableName = ?, @$batchSizeVariableName = ?")
      case MySQL => connection.prepareStatement(s"CALL $storedProcedureName (@$timestampVariableName := ?, @$batchSizeVariableName := ?)")
    }
    preparedStatement.setTimestamp(1, new Timestamp(timestampOffset), calendar)
    preparedStatement.setObject(2, batchSize)
    preparedStatement
  }

  override def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]] = Try {
    val sourceRecords = ListBuffer.empty[SourceRecord]
    var max = timestampOffset
    while (resultSet.next()) {
      dataConverter.convertRecord(schema, resultSet) map { record =>
        val time = record.get(timestampFieldName).asInstanceOf[Date].getTime
        max = if(time > max) {
          keyFieldOpt match {
            case Some(keyField) =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, null, schema, record.get(keyField), schema, record
              )
            case None           =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, schema, record
              )
          }
          time
        } else max
      }
    }
    timestampOffset = max
    sourceRecords
  }

  override def toString: String = {
    s"""
       |{
       |   "name" : "${this.getClass.getSimpleName}"
       |   "mode" : "${TimestampMode.entryName}"
       |   "stored-procedure.name" : "$storedProcedureName"
       |}
    """.stripMargin
  }
} 
Example 115
Source File: OAuthAuthorizationTokensDal.scala    From slick-akka-http-oauth2   with Apache License 2.0 5 votes vote down vote up
package persistence.dals

import java.security.SecureRandom
import java.sql.Timestamp

import org.joda.time.DateTime
import persistence.entities.SlickTables.OauthAccessTokenTable
import persistence.entities.{Account, OAuthAccessToken, OAuthClient}
import slick.driver.H2Driver.api._
import slick.driver.JdbcProfile
import utils.{Configuration, PersistenceModule}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.util.Random


trait OAuthAccessTokensDal extends BaseDalImpl[OauthAccessTokenTable,OAuthAccessToken]{
  def create(account: Account, client: OAuthClient): Future[OAuthAccessToken]
  def delete(account: Account, client: OAuthClient): Future[Int]
  def refresh(account: Account, client: OAuthClient): Future[OAuthAccessToken]
  def findByAccessToken(accessToken: String): Future[Option[OAuthAccessToken]]
  def findByAuthorized(account: Account, clientId: String): Future[Option[OAuthAccessToken]]
  def findByRefreshToken(refreshToken: String): Future[Option[OAuthAccessToken]]
}

class OAuthAccessTokensDalImpl (modules: Configuration with PersistenceModule)(implicit override val db: JdbcProfile#Backend#Database) extends OAuthAccessTokensDal {
  override def create(account: Account, client: OAuthClient): Future[OAuthAccessToken] = {
    def randomString(length: Int) = new Random(new SecureRandom()).alphanumeric.take(length).mkString
    val accessToken = randomString(40)
    val refreshToken = randomString(40)
    val createdAt = new Timestamp(new DateTime().getMillis)
    val oauthAccessToken = new OAuthAccessToken(
      id = 0,
      accountId = account.id,
      oauthClientId = client.id,
      accessToken = accessToken,
      refreshToken = refreshToken,
      createdAt = createdAt
    )
    insert(oauthAccessToken).map(id => oauthAccessToken.copy(id = id))
  }

  override def delete(account: Account, client: OAuthClient): Future[Int] = {
    deleteByFilter( oauthToken => oauthToken.accountId === account.id && oauthToken.oauthClientId === client.id)
  }

  override def refresh(account: Account, client: OAuthClient): Future[OAuthAccessToken] = {
    delete(account, client)
    create(account, client)
  }

  override def findByAuthorized(account: Account, clientId: String): Future[Option[OAuthAccessToken]] = {
    val query = for {
      oauthClient <- modules.oauthClientsDal.tableQ
      token <- tableQ if oauthClient.id === token.oauthClientId && oauthClient.clientId === clientId && token.accountId === account.id
    } yield token
    db.run(query.result).map(_.headOption)
  }

  override def findByAccessToken(accessToken: String): Future[Option[OAuthAccessToken]] = {
    findByFilter(_.accessToken === accessToken).map(_.headOption)
  }

  override def findByRefreshToken(refreshToken: String): Future[Option[OAuthAccessToken]] = {
    val expireAt = new Timestamp(new DateTime().minusMonths(1).getMillis)
    findByFilter( token => token.refreshToken === refreshToken && token.createdAt > expireAt).map(_.headOption)

  }
} 
Example 116
Source File: OAuthAuthorizationCodesDal.scala    From slick-akka-http-oauth2   with Apache License 2.0 5 votes vote down vote up
package persistence.dals

import java.sql.Timestamp

import org.joda.time.DateTime
import persistence.entities.OAuthAuthorizationCode
import persistence.entities.SlickTables.OauthAuthorizationCodeTable
import slick.driver.H2Driver.api._
import slick.driver.JdbcProfile

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future


trait OAuthAuthorizationCodesDal extends BaseDalImpl[OauthAuthorizationCodeTable,OAuthAuthorizationCode]{
  def findByCode(code: String): Future[Option[OAuthAuthorizationCode]]
  def delete(code: String): Future[Int]
}

class OAuthAuthorizationCodesDalImpl()(implicit override val db: JdbcProfile#Backend#Database) extends OAuthAuthorizationCodesDal {
  override def findByCode(code: String): Future[Option[OAuthAuthorizationCode]] = {
    val expireAt = new Timestamp(new DateTime().minusMinutes(30).getMillis)
    findByFilter(authCode => authCode.code === code && authCode.createdAt > expireAt).map(_.headOption)
  }

  override def delete(code: String): Future[Int] = deleteByFilter(_.code === code)

} 
Example 117
Source File: Boot.scala    From slick-akka-http-oauth2   with Apache License 2.0 5 votes vote down vote up
import java.sql.Timestamp

import akka.http.scaladsl.Http
import akka.http.scaladsl.server.RouteConcatenation
import akka.stream.ActorMaterializer
import org.joda.time.DateTime
import persistence.entities.{Account, OAuthClient}
import rest.OAuthRoutes
import utils._

object Main extends App with RouteConcatenation {
  // configuring modules for application, cake pattern for DI
  val modules = new ConfigurationModuleImpl  with ActorModuleImpl with PersistenceModuleImpl
  implicit val system = modules.system
  implicit val materializer = ActorMaterializer()
  implicit val ec = modules.system.dispatcher

  modules.generateDDL()

  for {
    createAccounts <- modules.accountsDal.insert(Seq(
      Account(0, "[email protected]", "48181acd22b3edaebc8a447868a7df7ce629920a", new Timestamp(new DateTime().getMillis)) // password:bob
    ))
    createOauthClients <- modules.oauthClientsDal.insert(Seq(
      OAuthClient(0, 1, "client_credentials", "bob_client_id", "bob_client_secret", Some("redirectUrl"), new Timestamp(new DateTime().getMillis))))
  } yield {
    println(s"Database initialized with default values for bob and alice")
  }

  val bindingFuture = Http().bindAndHandle(
    new OAuthRoutes(modules).routes, "localhost", 8080)

  println(s"Server online at http://localhost:8080/")

} 
Example 118
Source File: Schema.scala    From osmesa   with Apache License 2.0 5 votes vote down vote up
package osmesa.analytics.updater

import java.sql.Timestamp
import java.time.Instant

import geotrellis.vectortile.Layer
import org.apache.log4j.Logger
import osmesa.analytics.updater.Implicits._

trait Schema {
  val layer: Layer
  val features: Map[String, (Option[AugmentedDiffFeature], AugmentedDiffFeature)]

  val newFeatures: Seq[VTFeature]
  lazy val replacementFeatures: Seq[VTFeature] = Seq.empty[VTFeature]
  lazy val retainedFeatures: Seq[VTFeature] = Seq.empty[VTFeature]

  protected lazy val logger: Logger = Logger.getLogger(getClass)

  protected lazy val touchedFeatures: Map[String, Seq[VTFeature]] =
    Map.empty[String, Seq[VTFeature]]

  protected lazy val versionInfo: Map[String, (Int, Int, Timestamp)] =
    touchedFeatures
      .mapValues(_.last)
      .mapValues(
        f =>
          (
            f.data("__version").toInt,
            f.data("__minorVersion").toInt,
            Timestamp.from(Instant.ofEpochMilli(f.data("__updated")))
        ))

  protected lazy val minorVersions: Map[String, Int] =
    features
      .mapValues {
        case (_, curr) => curr.data
      }
      .map {
        case (id, f) =>
          versionInfo.get(id) match {
            case Some((prevVersion, _, _)) if prevVersion < f.version => (id, 0)
            case Some((prevVersion, prevMinorVersion, _)) if prevVersion == f.version =>
              (id, prevMinorVersion + 1)
            case _ => (id, 0)
          }
      }
}

trait SchemaBuilder {
  val layerName: String

  def apply(layer: Layer,
            features: Map[String, (Option[AugmentedDiffFeature], AugmentedDiffFeature)]): Schema
} 
Example 119
Source File: PostgresBookingViewRepository.scala    From ticket-booking-aecor   with Apache License 2.0 5 votes vote down vote up
package ru.pavkin.booking.booking.view

import java.sql.Timestamp
import java.time.Instant

import cats.Monad
import cats.implicits._
import doobie._
import doobie.implicits._
import doobie.util.transactor.Transactor
import io.circe.{ Decoder, Encoder, Json }
import io.circe.parser._
import org.postgresql.util.PGobject
import ru.pavkin.booking.common.models._

class PostgresBookingViewRepository[F[_]: Monad](transactor: Transactor[F],
                                                 tableName: String = "bookings")
    extends BookingViewRepository[F] {

  implicit val jsonMeta: Meta[Json] =
    Meta.Advanced
      .other[PGobject]("json")
      .timap[Json](a => parse(a.getValue).leftMap[Json](e => throw e).merge)(a => {
        val o = new PGobject
        o.setType("json")
        o.setValue(a.noSpaces)
        o
      })

  implicit val seatsMeta: Meta[List[Seat]] = jsonMeta.timap(
    j => Decoder[List[Seat]].decodeJson(j).right.get
  )(s => Encoder[List[Seat]].apply(s))

  implicit val ticketsMeta: Meta[List[Ticket]] = jsonMeta.timap(
    j => Decoder[List[Ticket]].decodeJson(j).right.get
  )(s => Encoder[List[Ticket]].apply(s))

  implicit val instantMeta: Meta[Instant] =
    Meta[Timestamp].timap(_.toInstant)(Timestamp.from)

  implicit val bookingStatusMeta: Meta[BookingStatus] =
    Meta[String].timap(BookingStatus.withName)(_.entryName)

  def get(bookingId: BookingKey): F[Option[BookingView]] =
    queryView(bookingId).option.transact(transactor)

  def byClient(clientId: ClientId): F[List[BookingView]] =
    queryForClient(clientId).to[List].transact(transactor)

  def set(view: BookingView): F[Unit] =
    Update[BookingView](setViewQuery).run(view).transact(transactor).void

  def expired(now: Instant): fs2.Stream[F, BookingKey] =
    queryExpired(now).stream.transact(transactor)

  def createTable: F[Unit] = createTableQuery.transact(transactor).void

  private val setViewQuery =
    s"""INSERT INTO $tableName
    (booking_id, client_id, concert_id, seats, tickets, status, confirmed_at, expires_at, version)
    VALUES (?,?,?,?,?,?,?,?,?)
    ON CONFLICT (booking_id)
    DO UPDATE SET
     tickets = EXCLUDED.tickets,
     status = EXCLUDED.status,
     confirmed_at = EXCLUDED.confirmed_at,
     expires_at = EXCLUDED.expires_at,
     version = EXCLUDED.version;"""

  private def queryView(bookingId: BookingKey) =
    (fr"SELECT * FROM " ++ Fragment.const(tableName) ++
      fr"WHERE booking_id = $bookingId;")
      .query[BookingView]

  private def queryExpired(now: Instant) =
    (fr"SELECT booking_id FROM " ++ Fragment.const(tableName) ++
      fr"WHERE status = ${BookingStatus.Confirmed: BookingStatus} AND expires_at < $now;")
      .query[BookingKey]

  private def queryForClient(clientId: ClientId) =
    (fr"SELECT * FROM " ++ Fragment.const(tableName) ++
      fr"WHERE client_id = $clientId;")
      .query[BookingView]

  private val createTableQuery = (fr"""
    CREATE TABLE IF NOT EXISTS """ ++ Fragment.const(tableName) ++
    fr""" (
    booking_id    text      NOT NULL PRIMARY KEY,
    client_id     text      NOT NULL,
    concert_id    text      NOT NULL,
    seats         json      NOT NULL,
    tickets       json      NOT NULL,
    status        text      NOT NULL,
    confirmed_at  timestamptz,
    expires_at    timestamptz,
    version       bigint    NOT NULL
    );
  """).update.run

} 
Example 120
Source File: SetDifferenceAndFilter.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.analytics.util

import java.sql.Timestamp

import org.apache.spark.sql.{Dataset, SparkSession}


object SetDifferenceAndFilter {

  def apply(uuids1: Dataset[KeyFields],
            uuids2: Dataset[KeyFields],
            consistencyThreshold: Long,
            filterOutMeta: Boolean = false)
           (implicit spark: SparkSession): Dataset[KeyFields] = {

    import spark.implicits._

    // The original setDifference implementation used the SQL except function, but that ignores any pre-partitioning.
    // The next implementation used a left-anti join, but that created a weird execution plan that caused poor performance.
    // The current implementation uses a outer join - which uses an efficient sort-merge join.

    def setDifference(uuids1: Dataset[KeyFields], uuids2: Dataset[KeyFields]): Dataset[KeyFields] =
      uuids1.join(uuids2, uuids1("uuid") === uuids2("uuid"), "left_outer")
        .filter(uuids2("uuid").isNull)
        .select(uuids1("*"))
        .as[KeyFields]

    // Calculate the set difference between the two sets of uuids.
    // The anti-join produces just the left side, and only the ones that are not in the right side.
    val positives = setDifference(uuids1, uuids2)

    val timeToConsistencyFilter = positives("lastModified") < new Timestamp(consistencyThreshold)
    val overallFilter = if (filterOutMeta)
      timeToConsistencyFilter &&
        (positives("path") =!= "/" && positives("path") =!= "/meta" && !positives("path").startsWith("/meta/"))
    else
      timeToConsistencyFilter

    // Filter out any positives that occurred after the current threshold
    positives.filter(overallFilter)
  }
} 
Example 121
Source File: RowComparer.scala    From spark-fast-tests   with MIT License 5 votes vote down vote up
package com.github.mrpowers.spark.fast.tests

import org.apache.spark.sql.Row

import java.sql.Timestamp
import scala.math.abs

object RowComparer {

  
  def areRowsEqual(r1: Row, r2: Row, tol: Double): Boolean = {
    if (r1.length != r2.length) {
      return false
    } else {
      (0 until r1.length).foreach(idx => {
        if (r1.isNullAt(idx) != r2.isNullAt(idx)) {
          return false
        }

        if (!r1.isNullAt(idx)) {
          val o1 = r1.get(idx)
          val o2 = r2.get(idx)
          o1 match {
            case b1: Array[Byte] =>
              if (!java.util.Arrays.equals(
                    b1,
                    o2.asInstanceOf[Array[Byte]]
                  )) {
                return false
              }

            case f1: Float =>
              if (java.lang.Float.isNaN(f1) !=
                    java.lang.Float.isNaN(o2.asInstanceOf[Float])) {
                return false
              }
              if (abs(f1 - o2.asInstanceOf[Float]) > tol) {
                return false
              }

            case d1: Double =>
              if (java.lang.Double.isNaN(d1) !=
                    java.lang.Double.isNaN(o2.asInstanceOf[Double])) {
                return false
              }
              if (abs(d1 - o2.asInstanceOf[Double]) > tol) {
                return false
              }

            case d1: java.math.BigDecimal =>
              if (d1.compareTo(o2.asInstanceOf[java.math.BigDecimal]) != 0) {
                return false
              }

            case t1: Timestamp =>
              if (abs(t1.getTime - o2.asInstanceOf[Timestamp].getTime) > tol) {
                return false
              }

            case _ =>
              if (o1 != o2) return false
          }
        }
      })
    }
    true
  }

} 
Example 122
Source File: BigQueryUtilsSpec.scala    From comet-data-pipeline   with Apache License 2.0 5 votes vote down vote up
package com.ebiznext.comet.utils.conversion

import java.sql.{Date, Timestamp}

import com.ebiznext.comet.TestHelper
import com.ebiznext.comet.config.SparkEnv
import com.ebiznext.comet.utils.conversion.BigQueryUtils._
import com.ebiznext.comet.utils.conversion.syntax._
import org.apache.spark.sql.SparkSession
import com.google.cloud.bigquery.{Field, StandardSQLTypeName, Schema => BQSchema}

class BigQueryUtilsSpec extends TestHelper {
  new WithSettings() {
    val sparkEnv: SparkEnv = new SparkEnv("test")
    val session: SparkSession = sparkEnv.session
    import session.implicits._

    "Spark Types" should "be converted to corresponding BQ Types" in {
      val res: BQSchema = List(
        (
          1,
          true,
          2.5,
          "hello",
          'x'.asInstanceOf[Byte],
          new Date(System.currentTimeMillis()),
          new Timestamp(System.currentTimeMillis())
        )
      ).toDF().to[BQSchema]
      //Schema{fields=[Field{name=value, type=INTEGER, mode=NULLABLE, description=, policyTags=null}]}
      val fields =
        List(
          Field
            .newBuilder("_1", StandardSQLTypeName.INT64)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_2", StandardSQLTypeName.BOOL)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_3", StandardSQLTypeName.FLOAT64)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_4", StandardSQLTypeName.STRING)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_5", StandardSQLTypeName.INT64)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_6", StandardSQLTypeName.DATE)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build(),
          Field
            .newBuilder("_7", StandardSQLTypeName.TIMESTAMP)
            .setDescription("")
            .setMode(Field.Mode.NULLABLE)
            .build()
        )
      res.getFields should contain theSameElementsInOrderAs fields
    }
  }
} 
Example 123
Source File: StructuredNetworkWordCountWindowed.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println 
Example 124
Source File: SchemaColumnSelection.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker.schema.table.columns

import scala.reflect.runtime.universe.TypeTag
import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.{rand, udf}

case class SchemaColumnSelection[T](override val name: String, values: List[T])(implicit tag: TypeTag[T]) extends SchemaColumn {
  override def column(rowID: Option[Column] = None): Column = {
    val intToSelectionUDF = udf((index: Int) => {
      values(index)
    })

    intToSelectionUDF(rand() * values.length % values.length)
  }
}

object SchemaColumnSelectionProtocol extends SchemaColumnSelectionProtocol
trait SchemaColumnSelectionProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnSelectionFormat extends YamlFormat[SchemaColumnSelection[_]] {

    override def read(yaml: YamlValue): SchemaColumnSelection[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set"))
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val values = fields.getOrElse(YamlString("values"), deserializationError("selection values not set"))

      dataType match {
        case SchemaColumnDataType.Int => SchemaColumnSelection(name, values.convertTo[List[Int]])
        case SchemaColumnDataType.Long => SchemaColumnSelection(name, values.convertTo[List[Long]])
        case SchemaColumnDataType.Float => SchemaColumnSelection(name, values.convertTo[List[Float]])
        case SchemaColumnDataType.Double => SchemaColumnSelection(name, values.convertTo[List[Double]])
        case SchemaColumnDataType.Date => SchemaColumnSelection(name, values.convertTo[List[Date]])
        case SchemaColumnDataType.Timestamp => SchemaColumnSelection(name, values.convertTo[List[Timestamp]])
        case SchemaColumnDataType.String => SchemaColumnSelection(name, values.convertTo[List[String]])
        case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Selection}")
      }

    }

    override def write(obj: SchemaColumnSelection[_]): YamlValue = ???

  }

} 
Example 125
Source File: SchemaColumnRandom.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.{to_utc_timestamp, round, rand, from_unixtime, to_date}
import org.apache.spark.sql.types.{IntegerType, LongType}

trait SchemaColumnRandom[T] extends SchemaColumn

object SchemaColumnRandom {
  val FloatDP = 3
  val DoubleDP = 3

  def apply(name: String, min: Int, max: Int): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Long, max: Long): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Float, max: Float): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Double, max: Double): SchemaColumn = SchemaColumnRandomNumeric(name, min, max)
  def apply(name: String, min: Date, max: Date): SchemaColumn = SchemaColumnRandomDate(name, min, max)
  def apply(name: String, min: Timestamp, max: Timestamp): SchemaColumn = SchemaColumnRandomTimestamp(name, min, max)
  def apply(name: String): SchemaColumn = SchemaColumnRandomBoolean(name)
}

private case class SchemaColumnRandomNumeric[T: Numeric](override val name: String, min: T, max: T) extends SchemaColumnRandom[T] {
  override def column(rowID: Option[Column] = None): Column = {
    import Numeric.Implicits._

    (min, max) match {
      case (_: Int, _: Int) => round(rand() * (max - min) + min, 0).cast(IntegerType)
      case (_: Long, _: Long) => round(rand() * (max - min) + min, 0).cast(LongType)
      case (_: Float, _: Float) => round(rand() * (max - min) + min, SchemaColumnRandom.FloatDP)
      case (_: Double, _: Double) => round(rand() * (max - min) + min, SchemaColumnRandom.DoubleDP)
    }
  }
}

private case class SchemaColumnRandomTimestamp(override val name: String, min: Timestamp, max: Timestamp) extends SchemaColumnRandom[Timestamp] {
  override def column(rowID: Option[Column] = None): Column = {
    val minTime = min.getTime / 1000
    val maxTime = max.getTime / 1000
    to_utc_timestamp(from_unixtime(rand() * (maxTime - minTime) + minTime), "UTC")
  }
}

private case class SchemaColumnRandomDate(override val name: String, min: Date, max: Date) extends SchemaColumnRandom[Date] {
  val timestamp = SchemaColumnRandomTimestamp(name, new Timestamp(min.getTime), new Timestamp(max.getTime + 86400000))

  override def column(rowID: Option[Column] = None): Column = to_date(timestamp.column())
}

private case class SchemaColumnRandomBoolean(override val name: String) extends SchemaColumnRandom[Boolean] {
  override def column(rowID: Option[Column] = None): Column = rand() < 0.5f
}

object SchemaColumnRandomProtocol extends SchemaColumnRandomProtocol
trait SchemaColumnRandomProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnRandomFormat extends YamlFormat[SchemaColumnRandom[_]] {

    override def read(yaml: YamlValue): SchemaColumnRandom[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name"))

      if (dataType == SchemaColumnDataType.Boolean) {
        SchemaColumnRandomBoolean(name)
      }
      else {
        val min = fields.getOrElse(YamlString("min"), deserializationError(s"min not set for $name"))
        val max = fields.getOrElse(YamlString("max"), deserializationError(s"max not set for $name"))

        dataType match {
          case SchemaColumnDataType.Int => SchemaColumnRandomNumeric(name, min.convertTo[Int], max.convertTo[Int])
          case SchemaColumnDataType.Long => SchemaColumnRandomNumeric(name, min.convertTo[Long], max.convertTo[Long])
          case SchemaColumnDataType.Float => SchemaColumnRandomNumeric(name, min.convertTo[Float], max.convertTo[Float])
          case SchemaColumnDataType.Double => SchemaColumnRandomNumeric(name, min.convertTo[Double], max.convertTo[Double])
          case SchemaColumnDataType.Date => SchemaColumnRandomDate(name, min.convertTo[Date], max.convertTo[Date])
          case SchemaColumnDataType.Timestamp => SchemaColumnRandomTimestamp(name, min.convertTo[Timestamp], max.convertTo[Timestamp])
          case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Random}")
        }
      }

    }

    override def write(obj: SchemaColumnRandom[_]): YamlValue = ???

  }

} 
Example 126
Source File: SchemaColumnSequential.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.{to_utc_timestamp, from_unixtime, monotonically_increasing_id, to_date}

trait SchemaColumnSequential[T] extends SchemaColumn

object SchemaColumnSequential {
  def apply(name: String, start: Int, step: Int): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Long, step: Long): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Float, step: Float): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Double, step: Double): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step)
  def apply(name: String, start: Date, step: Int): SchemaColumn = SchemaColumnSequentialDate(name, start, step)
  def apply(name: String, start: Timestamp, step: Int): SchemaColumn = SchemaColumnSequentialTimestamp(name, start, step)
}

private case class SchemaColumnSequentialNumeric[T: Numeric](override val name: String, start: T, step: T) extends SchemaColumnSequential[T] {
  override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = (rowID.get * step) + start
}

private case class SchemaColumnSequentialTimestamp(override val name: String, start: Timestamp, stepSeconds: Int) extends SchemaColumnSequential[Timestamp] {
  override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = {
    val startTime = start.getTime / 1000
    to_utc_timestamp(from_unixtime(rowID.get * stepSeconds + startTime), "UTC")
  }
}

private case class SchemaColumnSequentialDate(override val name: String, start: Date, stepDays: Int) extends SchemaColumnSequential[Date] {
  val timestamp = SchemaColumnSequentialTimestamp(name, new Timestamp(start.getTime), stepDays * 86400)

  override def column(rowID: Option[Column]): Column = to_date(timestamp.column())
}

object SchemaColumnSequentialProtocol extends SchemaColumnSequentialProtocol
trait SchemaColumnSequentialProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnSequentialFormat extends YamlFormat[SchemaColumnSequential[_]] {

    override def read(yaml: YamlValue): SchemaColumnSequential[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set"))
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val start = fields.getOrElse(YamlString("start"), deserializationError("start not set"))
      val step = fields.getOrElse(YamlString("step"), deserializationError("step not set"))

      dataType match {
        case "Int" => SchemaColumnSequentialNumeric(name, start.convertTo[Int], step.convertTo[Int])
        case "Long" => SchemaColumnSequentialNumeric(name, start.convertTo[Long], step.convertTo[Long])
        case "Float" => SchemaColumnSequentialNumeric(name, start.convertTo[Float], step.convertTo[Float])
        case "Double" => SchemaColumnSequentialNumeric(name, start.convertTo[Double], step.convertTo[Double])
        case "Date" => SchemaColumnSequentialDate(name, start.convertTo[Date], step.convertTo[Int])
        case "Timestamp" => SchemaColumnSequentialTimestamp(name, start.convertTo[Timestamp], step.convertTo[Int])
        case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Sequential}")
      }

    }

    override def write(obj: SchemaColumnSequential[_]): YamlValue = ???

  }

} 
Example 127
Source File: SchemaColumnFixed.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.lit

case class SchemaColumnFixed[T](override val name: String, value: T) extends SchemaColumn {
  override def column(rowID: Option[Column] = None): Column = lit(value)
}

object SchemaColumnFixedProtocol extends SchemaColumnFixedProtocol
trait SchemaColumnFixedProtocol extends YamlParserProtocol {

  import net.jcazevedo.moultingyaml._

  implicit object SchemaColumnFixedFormat extends YamlFormat[SchemaColumnFixed[_]] {

    override def read(yaml: YamlValue): SchemaColumnFixed[_] = {
      val fields = yaml.asYamlObject.fields
      val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set"))
      val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name"))
      val value = fields.getOrElse(YamlString("value"), deserializationError(s"value not set for $name"))

      dataType match {
        case SchemaColumnDataType.Int => SchemaColumnFixed(name, value.convertTo[Int])
        case SchemaColumnDataType.Long => SchemaColumnFixed(name, value.convertTo[Long])
        case SchemaColumnDataType.Float => SchemaColumnFixed(name, value.convertTo[Float])
        case SchemaColumnDataType.Double => SchemaColumnFixed(name, value.convertTo[Double])
        case SchemaColumnDataType.Date => SchemaColumnFixed(name, value.convertTo[Date])
        case SchemaColumnDataType.Timestamp => SchemaColumnFixed(name, value.convertTo[Timestamp])
        case SchemaColumnDataType.String => SchemaColumnFixed(name, value.convertTo[String])
        case SchemaColumnDataType.Boolean => SchemaColumnFixed(name, value.convertTo[Boolean])
        case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Fixed}")
      }

    }

    override def write(obj: SchemaColumnFixed[_]): YamlValue = ???

  }

} 
Example 128
Source File: YamlParserTest.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class YamlParserTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._
  import net.jcazevedo.moultingyaml._

  "YamlParser" must {
    "convert a YamlDate to java.sql.Date" in {
      val date = "1998-06-03"
      val string = s"""$date""".stripMargin
      string.parseYaml.convertTo[Date] mustBe Date.valueOf(date)
    }

    "convert a YamlDate to java.sql.Timestamp" in {
      val timestamp = "1998-06-03 01:23:45"
      val string = s"""$timestamp""".stripMargin
      string.parseYaml.convertTo[Timestamp] mustBe Timestamp.valueOf(timestamp)
    }
  }
} 
Example 129
Source File: SchemaColumnFixedTest.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class SchemaColumnFixedTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnFixedProtocol._
  import net.jcazevedo.moultingyaml._

  val name = "test"
  val column_type = "Fixed"

  val baseString =
    s"""name: $name
       |column_type: $column_type
    """.stripMargin

  "SchemaColumnFixed" must {
    "read an Int column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Int}
           |value: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1)
    }

    "read a Long column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Long}
           |value: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1l)
    }

    "read a Float column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Float}
           |value: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1f)
    }

    "read a Double column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Double}
           |value: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1d)
    }

    "read a Date column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Date}
           |value: 1998-06-03
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Date.valueOf("1998-06-03"))
    }

    "read a Timestamp column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Timestamp}
           |value: 1998-06-03 01:23:45
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Timestamp.valueOf("1998-06-03 01:23:45"))
    }

    "read a String column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.String}
           |value: test
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, "test")
    }

    "read a Boolean column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Boolean}
           |value: true
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, true)
    }
  }
} 
Example 130
Source File: SchemaColumnSequentialTest.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker.schema.table.columns

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class SchemaColumnSequentialTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnSequentialProtocol._
  import net.jcazevedo.moultingyaml._

  val name = "test"
  val column_type = "Sequential"

  val baseString =
    s"""name: $name
       |column_type: $column_type
    """.stripMargin

  "SchemaColumnSequential" must {
    "read an Int column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Int}
           |start: 1
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1, 1)
    }

    "read a Long column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Long}
           |start: 1
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1l, 1l)
    }

    "read a Float column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Float}
           |start: 1.0
           |step: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1f, 1f)
    }

    "read a Double column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Double}
           |start: 1.0
           |step: 1.0
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1d, 1d)
    }

    "read a Date column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Date}
           |start: 1998-06-03
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Date.valueOf("1998-06-03"), 1)
    }

    "read a Timestamp column" in {
      val string =
        s"""$baseString
           |data_type: ${SchemaColumnDataType.Timestamp}
           |start: 1998-06-03 01:23:45
           |step: 1
         """.stripMargin

      string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Timestamp.valueOf("1998-06-03 01:23:45"), 1)
    }
  }
} 
Example 131
Source File: ArgsParserTest.scala    From data-faker   with MIT License 5 votes vote down vote up
package com.dunnhumby.datafaker

import java.sql.{Date, Timestamp}
import org.scalatest.{MustMatchers, WordSpec}

class ArgsParserTest extends WordSpec with MustMatchers {

  import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._
  import net.jcazevedo.moultingyaml._

  "ArgsParser" must {
    "accepts --file arg" in {
      ArgsParser.parseArgs(List("--file", "test")) mustBe Map("file" -> "test")
    }

    "accepts --database arg" in {
      ArgsParser.parseArgs(List("--database", "test")) mustBe Map("database" -> "test")
    }
  }
} 
Example 132
Source File: literals.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types._

object Literal {
  def apply(v: Any): Literal = v match {
    case i: Int => Literal(i, IntegerType)
    case l: Long => Literal(l, LongType)
    case d: Double => Literal(d, DoubleType)
    case f: Float => Literal(f, FloatType)
    case b: Byte => Literal(b, ByteType)
    case s: Short => Literal(s, ShortType)
    case s: String => Literal(UTF8String(s), StringType)
    case b: Boolean => Literal(b, BooleanType)
    case d: BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
    case d: java.math.BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
    case d: Decimal => Literal(d, DecimalType.Unlimited)
    case t: Timestamp => Literal(t, TimestampType)
    case d: Date => Literal(DateUtils.fromJavaDate(d), DateType)
    case a: Array[Byte] => Literal(a, BinaryType)
    case null => Literal(null, NullType)
    case _ =>
      throw new RuntimeException("Unsupported literal type " + v.getClass + " " + v)
  }

  def create(v: Any, dataType: DataType): Literal = {
    Literal(CatalystTypeConverters.convertToCatalyst(v), dataType)
  }
}


case class Literal protected (value: Any, dataType: DataType) extends LeafExpression {

  override def foldable: Boolean = true
  override def nullable: Boolean = value == null

  override def toString: String = if (value != null) value.toString else "null"

  type EvaluatedType = Any
  override def eval(input: Row): Any = value
}

// TODO: Specialize
case class MutableLiteral(var value: Any, dataType: DataType, nullable: Boolean = true)
    extends LeafExpression {
  type EvaluatedType = Any

  def update(expression: Expression, input: Row): Unit = {
    value = expression.eval(input)
  }

  override def eval(input: Row): Any = value
} 
Example 133
Source File: DataFrameDateSuite.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.sql.{Date, Timestamp}

class DataFrameDateTimeSuite extends QueryTest {

  private lazy val ctx = org.apache.spark.sql.test.TestSQLContext
  import ctx.implicits._

  test("timestamp comparison with date strings") {
    val df = Seq(
      (1, Timestamp.valueOf("2015-01-01 00:00:00")),
      (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t")

    checkAnswer(
      df.select("t").filter($"t" <= "2014-06-01"),
      Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil)


    checkAnswer(
      df.select("t").filter($"t" >= "2014-06-01"),
      Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil)
  }

  test("date comparison with date strings") {
    val df = Seq(
      (1, Date.valueOf("2015-01-01")),
      (2, Date.valueOf("2014-01-01"))).toDF("i", "t")

    checkAnswer(
      df.select("t").filter($"t" <= "2014-06-01"),
      Row(Date.valueOf("2014-01-01")) :: Nil)


    checkAnswer(
      df.select("t").filter($"t" >= "2015"),
      Row(Date.valueOf("2015-01-01")) :: Nil)
  }
} 
Example 134
Source File: ColumnarTestUtils.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.columnar

import java.sql.Timestamp

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types.{UTF8String, DataType, Decimal, AtomicType}

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case STRING => UTF8String(Random.nextString(Random.nextInt(32)))
      case BOOLEAN => Random.nextBoolean()
      case BINARY => randomBytes(Random.nextInt(32))
      case DATE => Random.nextInt()
      case TIMESTAMP =>
        val timestamp = new Timestamp(Random.nextLong())
        timestamp.setNanos(Random.nextInt(999999999))
        timestamp
      case _ =>
        // Using a random one-element map instead of an arbitrary object
        Map(Random.nextInt() -> Random.nextString(Random.nextInt(32)))
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[T <: DataType, JvmType](
      columnType: ColumnType[T, JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Row = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Row = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericMutableRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
} 
Example 135
Source File: ArrayEncoders.scala    From quill   with Apache License 2.0 5 votes vote down vote up
package io.getquill.context.jasync

import java.sql.Timestamp
import java.time.LocalDate
import java.util.Date

import io.getquill.PostgresJAsyncContext
import io.getquill.context.sql.encoding.ArrayEncoding
import org.joda.time.{ DateTime => JodaDateTime, LocalDate => JodaLocalDate, LocalDateTime => JodaLocalDateTime }

trait ArrayEncoders extends ArrayEncoding {
  self: PostgresJAsyncContext[_] =>

  implicit def arrayStringEncoder[Col <: Seq[String]]: Encoder[Col] = arrayRawEncoder[String, Col]
  implicit def arrayBigDecimalEncoder[Col <: Seq[BigDecimal]]: Encoder[Col] = arrayRawEncoder[BigDecimal, Col]
  implicit def arrayBooleanEncoder[Col <: Seq[Boolean]]: Encoder[Col] = arrayRawEncoder[Boolean, Col]
  implicit def arrayByteEncoder[Col <: Seq[Byte]]: Encoder[Col] = arrayRawEncoder[Byte, Col]
  implicit def arrayShortEncoder[Col <: Seq[Short]]: Encoder[Col] = arrayRawEncoder[Short, Col]
  implicit def arrayIntEncoder[Col <: Seq[Index]]: Encoder[Col] = arrayRawEncoder[Index, Col]
  implicit def arrayLongEncoder[Col <: Seq[Long]]: Encoder[Col] = arrayRawEncoder[Long, Col]
  implicit def arrayFloatEncoder[Col <: Seq[Float]]: Encoder[Col] = arrayRawEncoder[Float, Col]
  implicit def arrayDoubleEncoder[Col <: Seq[Double]]: Encoder[Col] = arrayRawEncoder[Double, Col]
  implicit def arrayDateEncoder[Col <: Seq[Date]]: Encoder[Col] = arrayEncoder[Date, Col](d => Timestamp.from(d.toInstant))
  implicit def arrayJodaDateTimeEncoder[Col <: Seq[JodaDateTime]]: Encoder[Col] = arrayEncoder[JodaDateTime, Col](_.toLocalDateTime)
  implicit def arrayJodaLocalDateTimeEncoder[Col <: Seq[JodaLocalDateTime]]: Encoder[Col] = arrayRawEncoder[JodaLocalDateTime, Col]
  implicit def arrayJodaLocalDateEncoder[Col <: Seq[JodaLocalDate]]: Encoder[Col] = arrayRawEncoder[JodaLocalDate, Col]
  implicit def arrayLocalDateEncoder[Col <: Seq[LocalDate]]: Encoder[Col] = arrayEncoder[LocalDate, Col](encodeLocalDate.f)

  def arrayEncoder[T, Col <: Seq[T]](mapper: T => Any): Encoder[Col] =
    encoder[Col]((col: Col) => col.toIndexedSeq.map(mapper).mkString("{", ",", "}"), SqlTypes.ARRAY)

  def arrayRawEncoder[T, Col <: Seq[T]]: Encoder[Col] = arrayEncoder[T, Col](identity)

} 
Example 136
Source File: FinagleMysqlEncoders.scala    From quill   with Apache License 2.0 5 votes vote down vote up
package io.getquill.context.finagle.mysql

import java.sql.Timestamp
import java.time.{ LocalDate, LocalDateTime }
import java.util.{ Date, UUID }

import com.twitter.finagle.mysql.CanBeParameter._
import com.twitter.finagle.mysql.Parameter.wrap
import com.twitter.finagle.mysql._
import io.getquill.FinagleMysqlContext

trait FinagleMysqlEncoders {
  this: FinagleMysqlContext[_] =>

  type Encoder[T] = FinagleMySqlEncoder[T]

  case class FinagleMySqlEncoder[T](encoder: BaseEncoder[T]) extends BaseEncoder[T] {
    override def apply(index: Index, value: T, row: PrepareRow) =
      encoder(index, value, row)
  }

  def encoder[T](f: T => Parameter): Encoder[T] =
    FinagleMySqlEncoder((index, value, row) => row :+ f(value))

  def encoder[T](implicit cbp: CanBeParameter[T]): Encoder[T] =
    encoder[T]((v: T) => v: Parameter)

  private[this] val nullEncoder = encoder((_: Null) => Parameter.NullParameter)

  implicit def optionEncoder[T](implicit e: Encoder[T]): Encoder[Option[T]] =
    FinagleMySqlEncoder { (index, value, row) =>
      value match {
        case None    => nullEncoder.encoder(index, null, row)
        case Some(v) => e.encoder(index, v, row)
      }
    }

  implicit def mappedEncoder[I, O](implicit mapped: MappedEncoding[I, O], e: Encoder[O]): Encoder[I] =
    FinagleMySqlEncoder(mappedBaseEncoder(mapped, e.encoder))

  implicit val stringEncoder: Encoder[String] = encoder[String]
  implicit val bigDecimalEncoder: Encoder[BigDecimal] =
    encoder[BigDecimal] { (value: BigDecimal) =>
      BigDecimalValue(value): Parameter
    }
  implicit val booleanEncoder: Encoder[Boolean] = encoder[Boolean]
  implicit val byteEncoder: Encoder[Byte] = encoder[Byte]
  implicit val shortEncoder: Encoder[Short] = encoder[Short]
  implicit val intEncoder: Encoder[Int] = encoder[Int]
  implicit val longEncoder: Encoder[Long] = encoder[Long]
  implicit val floatEncoder: Encoder[Float] = encoder[Float]
  implicit val doubleEncoder: Encoder[Double] = encoder[Double]
  implicit val byteArrayEncoder: Encoder[Array[Byte]] = encoder[Array[Byte]]
  implicit val dateEncoder: Encoder[Date] = encoder[Date] {
    (value: Date) => timestampValue(new Timestamp(value.getTime)): Parameter
  }
  implicit val localDateEncoder: Encoder[LocalDate] = encoder[LocalDate] {
    (d: LocalDate) => DateValue(java.sql.Date.valueOf(d)): Parameter
  }
  implicit val localDateTimeEncoder: Encoder[LocalDateTime] = encoder[LocalDateTime] {
    (d: LocalDateTime) => timestampValue(new Timestamp(d.atZone(injectionTimeZone.toZoneId).toInstant.toEpochMilli)): Parameter
  }
  implicit val uuidEncoder: Encoder[UUID] = mappedEncoder(MappedEncoding(_.toString), stringEncoder)
} 
Example 137
Source File: ArrayDecoders.scala    From quill   with Apache License 2.0 5 votes vote down vote up
package io.getquill.context.jdbc

import java.sql.Timestamp
import java.time.LocalDate
import java.util.Date
import java.sql.{ Date => SqlDate }
import java.math.{ BigDecimal => JBigDecimal }

import io.getquill.context.sql.encoding.ArrayEncoding
import io.getquill.util.Messages.fail

import scala.collection.compat._
import scala.reflect.ClassTag

trait ArrayDecoders extends ArrayEncoding {
  self: JdbcContextBase[_, _] =>

  implicit def arrayStringDecoder[Col <: Seq[String]](implicit bf: CBF[String, Col]): Decoder[Col] = arrayRawDecoder[String, Col]
  implicit def arrayBigDecimalDecoder[Col <: Seq[BigDecimal]](implicit bf: CBF[BigDecimal, Col]): Decoder[Col] = arrayDecoder[JBigDecimal, BigDecimal, Col](BigDecimal.apply)
  implicit def arrayBooleanDecoder[Col <: Seq[Boolean]](implicit bf: CBF[Boolean, Col]): Decoder[Col] = arrayRawDecoder[Boolean, Col]
  implicit def arrayByteDecoder[Col <: Seq[Byte]](implicit bf: CBF[Byte, Col]): Decoder[Col] = arrayRawDecoder[Byte, Col]
  implicit def arrayShortDecoder[Col <: Seq[Short]](implicit bf: CBF[Short, Col]): Decoder[Col] = arrayRawDecoder[Short, Col]
  implicit def arrayIntDecoder[Col <: Seq[Int]](implicit bf: CBF[Int, Col]): Decoder[Col] = arrayRawDecoder[Int, Col]
  implicit def arrayLongDecoder[Col <: Seq[Long]](implicit bf: CBF[Long, Col]): Decoder[Col] = arrayRawDecoder[Long, Col]
  implicit def arrayFloatDecoder[Col <: Seq[Float]](implicit bf: CBF[Float, Col]): Decoder[Col] = arrayRawDecoder[Float, Col]
  implicit def arrayDoubleDecoder[Col <: Seq[Double]](implicit bf: CBF[Double, Col]): Decoder[Col] = arrayRawDecoder[Double, Col]
  implicit def arrayDateDecoder[Col <: Seq[Date]](implicit bf: CBF[Date, Col]): Decoder[Col] = arrayRawDecoder[Date, Col]
  implicit def arrayTimestampDecoder[Col <: Seq[Timestamp]](implicit bf: CBF[Timestamp, Col]): Decoder[Col] = arrayRawDecoder[Timestamp, Col]
  implicit def arrayLocalDateDecoder[Col <: Seq[LocalDate]](implicit bf: CBF[LocalDate, Col]): Decoder[Col] = arrayDecoder[SqlDate, LocalDate, Col](_.toLocalDate)

  
  def arrayRawDecoder[T: ClassTag, Col <: Seq[T]](implicit bf: CBF[T, Col]): Decoder[Col] =
    arrayDecoder[T, T, Col](identity)
} 
Example 138
Source File: Encoders.scala    From quill   with Apache License 2.0 5 votes vote down vote up
package io.getquill.context.jdbc

import java.sql.{ Date, Timestamp, Types }
import java.time.{ LocalDate, LocalDateTime }
import java.util.{ Calendar, TimeZone }
import java.{ sql, util }

trait Encoders {
  this: JdbcContextBase[_, _] =>

  type Encoder[T] = JdbcEncoder[T]

  protected val dateTimeZone = TimeZone.getDefault

  case class JdbcEncoder[T](sqlType: Int, encoder: BaseEncoder[T]) extends BaseEncoder[T] {
    override def apply(index: Index, value: T, row: PrepareRow) =
      encoder(index + 1, value, row)
  }

  def encoder[T](sqlType: Int, f: (Index, T, PrepareRow) => Unit): Encoder[T] =
    JdbcEncoder(sqlType, (index: Index, value: T, row: PrepareRow) => {
      f(index, value, row)
      row
    })

  def encoder[T](sqlType: Int, f: PrepareRow => (Index, T) => Unit): Encoder[T] =
    encoder(sqlType, (index: Index, value: T, row: PrepareRow) => f(row)(index, value))

  implicit def mappedEncoder[I, O](implicit mapped: MappedEncoding[I, O], e: Encoder[O]): Encoder[I] =
    JdbcEncoder(e.sqlType, mappedBaseEncoder(mapped, e.encoder))

  private[this] val nullEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setNull)

  implicit def optionEncoder[T](implicit d: Encoder[T]): Encoder[Option[T]] =
    JdbcEncoder(
      d.sqlType,
      (index, value, row) =>
        value match {
          case Some(v) => d.encoder(index, v, row)
          case None    => nullEncoder.encoder(index, d.sqlType, row)
        }
    )

  implicit val stringEncoder: Encoder[String] = encoder(Types.VARCHAR, _.setString)
  implicit val bigDecimalEncoder: Encoder[BigDecimal] =
    encoder(Types.NUMERIC, (index, value, row) => row.setBigDecimal(index, value.bigDecimal))
  implicit val byteEncoder: Encoder[Byte] = encoder(Types.TINYINT, _.setByte)
  implicit val shortEncoder: Encoder[Short] = encoder(Types.SMALLINT, _.setShort)
  implicit val intEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setInt)
  implicit val longEncoder: Encoder[Long] = encoder(Types.BIGINT, _.setLong)
  implicit val floatEncoder: Encoder[Float] = encoder(Types.FLOAT, _.setFloat)
  implicit val doubleEncoder: Encoder[Double] = encoder(Types.DOUBLE, _.setDouble)
  implicit val byteArrayEncoder: Encoder[Array[Byte]] = encoder(Types.VARBINARY, _.setBytes)
  implicit val dateEncoder: Encoder[util.Date] =
    encoder(Types.TIMESTAMP, (index, value, row) =>
      row.setTimestamp(index, new sql.Timestamp(value.getTime), Calendar.getInstance(dateTimeZone)))
  implicit val localDateEncoder: Encoder[LocalDate] =
    encoder(Types.DATE, (index, value, row) =>
      row.setDate(index, Date.valueOf(value), Calendar.getInstance(dateTimeZone)))
  implicit val localDateTimeEncoder: Encoder[LocalDateTime] =
    encoder(Types.TIMESTAMP, (index, value, row) =>
      row.setTimestamp(index, Timestamp.valueOf(value), Calendar.getInstance(dateTimeZone)))
} 
Example 139
Source File: ArrayJdbcEncodingSpec.scala    From quill   with Apache License 2.0 5 votes vote down vote up
package io.getquill.context.jdbc.postgres

import java.sql.Timestamp
import java.time.LocalDate
import java.util.UUID

import io.getquill.context.sql.encoding.ArrayEncodingBaseSpec
import io.getquill.{ Literal, PostgresJdbcContext }

class ArrayJdbcEncodingSpec extends ArrayEncodingBaseSpec {
  val ctx = testContext
  import ctx._

  val q = quote(query[ArraysTestEntity])
  val corrected = e.copy(timestamps = e.timestamps.map(d => new Timestamp(d.getTime)))

  "Support all sql base types and `Seq` implementers" in {
    ctx.run(q.insert(lift(corrected)))
    val actual = ctx.run(q).head
    actual mustEqual corrected
    baseEntityDeepCheck(actual, corrected)
  }

  "Support Seq encoding basing on MappedEncoding" in {
    val wrapQ = quote(querySchema[WrapEntity]("ArraysTestEntity"))
    ctx.run(wrapQ.insert(lift(wrapE)))
    ctx.run(wrapQ).head.texts mustBe wrapE.texts
  }

  "Timestamps" in {
    case class Timestamps(timestamps: List[Timestamp])
    val tE = Timestamps(List(new Timestamp(System.currentTimeMillis())))
    val tQ = quote(querySchema[Timestamps]("ArraysTestEntity"))
    ctx.run(tQ.insert(lift(tE)))
    ctx.run(tQ).head.timestamps mustBe tE.timestamps
  }

  "Catch invalid decoders" in {
    val newCtx = new PostgresJdbcContext(Literal, "testPostgresDB") {
      // avoid transforming from java.sql.Date to java.time.LocalDate
      override implicit def arrayLocalDateDecoder[Col <: Seq[LocalDate]](implicit bf: CBF[LocalDate, Col]): Decoder[Col] =
        arrayDecoder[LocalDate, LocalDate, Col](identity)
    }
    import newCtx._
    newCtx.run(query[ArraysTestEntity].insert(lift(corrected)))
    intercept[IllegalStateException] {
      newCtx.run(query[ArraysTestEntity]).head mustBe corrected
    }
    newCtx.close()
  }

  "Custom decoders/encoders" in {
    case class Entity(uuids: List[UUID])
    val e = Entity(List(UUID.randomUUID(), UUID.randomUUID()))
    val q = quote(querySchema[Entity]("ArraysTestEntity"))

    implicit def arrayUUIDEncoder[Col <: Seq[UUID]]: Encoder[Col] = arrayRawEncoder[UUID, Col]("uuid")
    implicit def arrayUUIDDecoder[Col <: Seq[UUID]](implicit bf: CBF[UUID, Col]): Decoder[Col] = arrayRawDecoder[UUID, Col]

    ctx.run(q.insert(lift(e)))
    ctx.run(q).head.uuids mustBe e.uuids
  }

  "Arrays in where clause" in {
    ctx.run(q.insert(lift(corrected)))
    val actual1 = ctx.run(q.filter(_.texts == lift(List("test"))))
    val actual2 = ctx.run(q.filter(_.texts == lift(List("test2"))))
    actual1 mustEqual List(corrected)
    actual2 mustEqual List()
  }

  "empty array on found null" in {
    case class ArraysTestEntity(texts: Option[List[String]])
    ctx.run(query[ArraysTestEntity].insert(lift(ArraysTestEntity(None))))

    case class E(texts: List[String])
    ctx.run(querySchema[E]("ArraysTestEntity")).headOption.map(_.texts) mustBe Some(Nil)
  }

  override protected def beforeEach(): Unit = {
    ctx.run(q.delete)
    ()
  }
} 
Example 140
Source File: DateTimeConverter.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.commons.datetime

import java.sql.Timestamp

import org.joda.time.format.{DateTimeFormatter, ISODateTimeFormat}
import org.joda.time.{DateTime, DateTimeZone}

trait DateTimeConverter {
  val zone: DateTimeZone = DateTimeZone.getDefault
  val dateTimeFormatter: DateTimeFormatter = ISODateTimeFormat.dateTime()
  def toString(dateTime: DateTime): String = dateTime.toString(dateTimeFormatter)
  def parseDateTime(s: String): DateTime = dateTimeFormatter.parseDateTime(s).withZone(zone)
  def parseTimestamp(s: String): Timestamp = new Timestamp(parseDateTime(s).getMillis)
  def now: DateTime = new DateTime(zone)
  def fromMillis(millis: Long): DateTime = new DateTime(zone).withMillis(millis)
  def dateTime(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, zone)
  def dateTimeFromUTC(
      year: Int,
      monthOfyear: Int,
      dayOfMonth: Int,
      hourOfDay: Int = 0,
      minutesOfHour: Int = 0,
      secondsOfMinute: Int = 0): DateTime =
    new DateTime(
      year,
      monthOfyear,
      dayOfMonth,
      hourOfDay,
      minutesOfHour,
      secondsOfMinute,
      DateTimeZone.UTC).withZone(DateTimeConverter.zone)
}

object DateTimeConverter extends DateTimeConverter 
Example 141
Source File: CsvSchemaStringifierBeforeCsvWriting.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import ai.deepsense.commons.datetime.DateTimeConverter
import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import ai.deepsense.deeplang.doperations.exceptions.UnsupportedColumnTypeException


object CsvSchemaStringifierBeforeCsvWriting {

  def preprocess(dataFrame: DataFrame)
                (implicit context: ExecutionContext): DataFrame = {
    requireNoComplexTypes(dataFrame)

    val schema = dataFrame.sparkDataFrame.schema
    def stringifySelectedTypes(schema: StructType): StructType = {
      StructType(
        schema.map {
          case field: StructField => field.copy(dataType = StringType)
        }
      )
    }

    context.dataFrameBuilder.buildDataFrame(
      stringifySelectedTypes(schema),
      dataFrame.sparkDataFrame.rdd.map(stringifySelectedCells(schema)))
  }

  private def requireNoComplexTypes(dataFrame: DataFrame): Unit = {
    dataFrame.sparkDataFrame.schema.fields.map(structField =>
      (structField.dataType, structField.name)
    ).foreach {
      case (dataType, columnName) => dataType match {
        case _: ArrayType | _: MapType | _: StructType =>
          throw UnsupportedColumnTypeException(columnName, dataType)
        case _ => ()
      }
    }

  }

  private def stringifySelectedCells(originalSchema: StructType)(row: Row): Row = {
    Row.fromSeq(
      row.toSeq.zipWithIndex map { case (value, index) =>
        (value, originalSchema(index).dataType) match {
          case (null, _) => ""
          case (_, BooleanType) =>
            if (value.asInstanceOf[Boolean]) "1" else "0"
          case (_, TimestampType) =>
            DateTimeConverter.toString(
              DateTimeConverter.fromMillis(value.asInstanceOf[Timestamp].getTime))
          case (x, _) => value.toString
        }
      })
  }

} 
Example 142
Source File: WriteReadDataFrameWithDriverFilesIntegSpec.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperations

import java.sql.Timestamp

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalatest.BeforeAndAfter

import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import ai.deepsense.deeplang.doperations.inout._

class WriteReadDataFrameWithDriverFilesIntegSpec
  extends DeeplangIntegTestSupport
  with BeforeAndAfter with TestFiles {

  import DeeplangIntegTestSupport._

  val schema: StructType =
    StructType(Seq(
      StructField("boolean", BooleanType),
      StructField("double", DoubleType),
      StructField("string", StringType)
    ))

  val rows = {
    val base = Seq(
      Row(true, 0.45, "3.14"),
      Row(false, null, "\"testing...\""),
      Row(false, 3.14159, "Hello, world!"),
      // in case of CSV, an empty string is the same as null - no way around it
      Row(null, null, "")
    )
    val repeatedFewTimes = (1 to 10).flatMap(_ => base)
    repeatedFewTimes
  }

  lazy val dataFrame = createDataFrame(rows, schema)

  "WriteDataFrame and ReadDataFrame" should {
    "write and read CSV file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(
            new OutputStorageTypeChoice.File()
              .setOutputFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(
                new OutputFileFormatChoice.Csv()
                  .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Comma())
                  .setNamesIncluded(true)))
      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(
            new InputStorageTypeChoice.File()
              .setSourceFile(absoluteTestsDirPath.fullPath + "/test_files")
              .setFileFormat(new InputFileFormatChoice.Csv()
                .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Comma())
                .setNamesIncluded(true)
                .setShouldConvertToBoolean(true)))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }

    "write and read JSON file" in {
      val wdf =
        new WriteDataFrame()
          .setStorageType(new OutputStorageTypeChoice.File()
            .setOutputFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new OutputFileFormatChoice.Json()))

      wdf.executeUntyped(Vector(dataFrame))(executionContext)

      val rdf =
        new ReadDataFrame()
          .setStorageType(new InputStorageTypeChoice.File()
            .setSourceFile(absoluteTestsDirPath.fullPath + "json")
            .setFileFormat(new InputFileFormatChoice.Json()))
      val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame]

      assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false)
    }
  }
} 
Example 143
Source File: DataFrameReportPerformanceSpec.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import ai.deepsense.commons.utils.{DoubleUtils, Logging}
import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report()
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
} 
Example 144
Source File: StatisticsForContinuousIntegSpec.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperables.dataframe.report.distribution

import java.sql.Timestamp

import org.apache.spark.rdd.RDD
import org.apache.spark.sql
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

import ai.deepsense.commons.datetime.DateTimeConverter
import ai.deepsense.deeplang.DeeplangIntegTestSupport
import ai.deepsense.deeplang.doperables.dataframe.{DataFrame, DataFrameTestFactory}
import ai.deepsense.reportlib.model._

class StatisticsForContinuousIntegSpec extends DeeplangIntegTestSupport with DataFrameTestFactory {

  "Statistics (Min, max and mean values)" should {
    "be calculated for each continuous column in distribution" when {
      "data is of type int" in {
        val distribution = distributionForInt(1, 2, 3, 4, 5)
        distribution.statistics.min shouldEqual Some("1")
        distribution.statistics.max shouldEqual Some("5")
        distribution.statistics.mean shouldEqual Some("3")
      }
      "data is of type Timestamp" in {
        val distribution =
          distributionForTimestamps(new Timestamp(1000), new Timestamp(2000), new Timestamp(3000))
        distribution.statistics.min shouldEqual Some(formatDate(1000))
        distribution.statistics.max shouldEqual Some(formatDate(3000))
        distribution.statistics.mean shouldEqual Some(formatDate(2000))
      }
    }
  }
  "Null value in data" should {
    val distribution = distributionForDouble(1, 2, 3, 4, Double.NaN, 5)
    "not be skipped in calculating min and max" in {
      distribution.statistics.min shouldEqual Some("1")
      distribution.statistics.max shouldEqual Some("5")
    }
    "result in mean value NaN" in {
      distribution.statistics.mean shouldEqual Some("NaN")
    }
  }

  lazy val columnName = "column_name"

  private def distributionForDouble(data: Double*): ContinuousDistribution = {
    distributionFor(data, DoubleType)
  }

  private def distributionForInt(data: Int*): ContinuousDistribution = {
    distributionFor(data, IntegerType)
  }

  private def distributionForTimestamps(data: Timestamp*): ContinuousDistribution = {
    distributionFor(data, TimestampType)
  }

  private def distributionFor(data: Seq[Any], dataType: DataType): ContinuousDistribution = {
    val schema = StructType(Array(
      StructField(columnName, dataType)
    ))

    val rows = data.map(v => Row(v))
    val dataFrame = createDataFrame(rows, schema)

    val report = dataFrame.report()
    report.content.distributions(columnName).asInstanceOf[ContinuousDistribution]
  }

  def buildDataFrame(schema: StructType, data: RDD[Row]): DataFrame = {
    val dataFrame: sql.DataFrame = sparkSQLSession.createDataFrame(data, schema)
    DataFrame.fromSparkDataFrame(dataFrame)
  }

  def formatDate(millis: Long): String = {
    DateTimeConverter.toString(DateTimeConverter.fromMillis(millis))
  }

} 
Example 145
Source File: DateEncoderTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.record.encoder

import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime}

import com.sksamuel.avro4s.{AvroSchema, DefaultFieldMapper, Encoder, ImmutableRecord}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

//noinspection ScalaDeprecation
class DateEncoderTest extends AnyFunSuite with Matchers {

  test("encode LocalTime as TIME-MILLIS") {
    case class Foo(s: LocalTime)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(LocalTime.of(12, 50, 45))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(46245000000L)))
  }

  test("encode LocalDate as DATE") {
    case class Foo(s: LocalDate)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(LocalDate.of(2018, 9, 10))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784)))
  }

  test("encode java.sql.Date as DATE") {
    case class Foo(s: Date)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(Date.valueOf(LocalDate.of(2018, 9, 10)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784)))
  }

  test("encode LocalDateTime as timestamp-nanos") {
    case class Foo(s: LocalDateTime)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000000123L)))
    Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123009))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000123009L)))
    Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 328187943))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739328187943L)))
  }

  test("encode Timestamp as TIMESTAMP-MILLIS") {
    case class Foo(s: Timestamp)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(Timestamp.from(Instant.ofEpochMilli(1538312231000L)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L)))
  }

  test("encode Instant as TIMESTAMP-MILLIS") {
    case class Foo(s: Instant)
    val schema = AvroSchema[Foo]
    Encoder[Foo].encode(Foo(Instant.ofEpochMilli(1538312231000L))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L)))
  }
} 
Example 146
Source File: DateDecoderTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.record.decoder

import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime}

import com.sksamuel.avro4s.SchemaFor.TimestampNanosLogicalType
import com.sksamuel.avro4s.{AvroSchema, Decoder, SchemaFor}
import org.apache.avro.generic.GenericData
import org.apache.avro.{LogicalTypes, SchemaBuilder}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

//noinspection ScalaDeprecation
class DateDecoderTest extends AnyFunSuite with Matchers {

  case class WithLocalTime(z: LocalTime)
  case class WithLocalDate(z: LocalDate)
  case class WithDate(z: Date)
  case class WithLocalDateTime(z: LocalDateTime)
  case class WithTimestamp(z: Timestamp)
  case class WithInstant(z: Instant)

  test("decode int to LocalTime") {
    val schema = AvroSchema[WithLocalTime]
    val record = new GenericData.Record(schema)
    record.put("z", 46245000000L)
    Decoder[WithLocalTime].decode(record) shouldBe WithLocalTime(LocalTime.of(12, 50, 45))
  }

  test("decode int to LocalDate") {
    val schema = AvroSchema[WithLocalDate]
    val record = new GenericData.Record(schema)
    record.put("z", 17784)
    Decoder[WithLocalDate].decode(record) shouldBe WithLocalDate(LocalDate.of(2018, 9, 10))
  }

  test("decode int to java.sql.Date") {
    val schema = AvroSchema[WithDate]
    val record = new GenericData.Record(schema)
    record.put("z", 17784)
    Decoder[WithDate].decode(record) shouldBe WithDate(Date.valueOf(LocalDate.of(2018, 9, 10)))
  }

  test("decode timestamp-millis to LocalDateTime") {
    val dateSchema = LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder.longType)
    val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord()
    val record = new GenericData.Record(schema)
    record.put("z", 1572707106376L)
    Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime(
      LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000000))
  }

  test("decode timestamp-micros to LocalDateTime") {
    val dateSchema = LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder.longType)
    val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord()
    val record = new GenericData.Record(schema)
    record.put("z", 1572707106376001L)
    Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime(
      LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376001000))
  }

  test("decode timestamp-nanos to LocalDateTime") {
    val dateSchema = TimestampNanosLogicalType.addToSchema(SchemaBuilder.builder.longType)
    val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord()
    val record = new GenericData.Record(schema)
    record.put("z", 1572707106376000002L)
    Decoder[WithLocalDateTime].decode(record) shouldBe WithLocalDateTime(
      LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000002))
  }

  test("decode long to Timestamp") {
    val schema = AvroSchema[WithTimestamp]
    val record = new GenericData.Record(schema)
    record.put("z", 1538312231000L)
    Decoder[WithTimestamp].decode(record) shouldBe WithTimestamp(new Timestamp(1538312231000L))
  }

  test("decode long to Instant") {
    val schema = AvroSchema[WithInstant]
    val record = new GenericData.Record(schema)
    record.put("z", 1538312231000L)
    Decoder[WithInstant].decode(record) shouldBe WithInstant(Instant.ofEpochMilli(1538312231000L))
  }
} 
Example 147
Source File: DateSchemaTest.scala    From avro4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.avro4s.schema

import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime}

import com.sksamuel.avro4s.AvroSchema
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

class DateSchemaTest extends AnyFunSuite with Matchers {

  test("generate date logical type for LocalDate") {
    case class LocalDateTest(date: LocalDate)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdate.json"))
    val schema = AvroSchema[LocalDateTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate date logical type for Date") {
    case class DateTest(date: Date)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/date.json"))
    val schema = AvroSchema[DateTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate time logical type for LocalTime") {
    case class LocalTimeTest(time: LocalTime)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localtime.json"))
    val schema = AvroSchema[LocalTimeTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate timestamp-nanos for LocalDateTime") {
    case class LocalDateTimeTest(time: LocalDateTime)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdatetime.json"))
    val schema = AvroSchema[LocalDateTimeTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate timestamp-millis logical type for Instant") {
    case class InstantTest(instant: Instant)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/instant.json"))
    val schema = AvroSchema[InstantTest]
    schema.toString(true) shouldBe expected.toString(true)
  }

  test("generate timestamp-millis logical type for Timestamp") {
    case class TimestampTest(ts: Timestamp)
    val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/timestamp.json"))
    val schema = AvroSchema[TimestampTest]
    schema.toString(true) shouldBe expected.toString(true)
  }
} 
Example 148
Source File: SparkUtilities.scala    From spark-practice   with MIT License 5 votes vote down vote up
package utilities

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import probelms.customerInsights.CIConstants


object SparkUtilities {

  def  getSparkContext(appName:String):SparkContext={
    val conf = new SparkConf().setAppName(appName).setMaster("local")
                             // .set("spark.serializer","spark.kryo.registrator")
    val sc = new SparkContext(conf)
    sc
  }

  def getSparkSession(appName:String):SparkSession={
    val spark = SparkSession.builder()
                            .appName(appName)
                            .master("local")
                          //  .config("spark.serializer","spark.kryo.registrator")
                            .getOrCreate()

    spark
  }

  def convertCurrencyToDouble(currency:String):Double={
    currency.stripPrefix("$").trim.toDouble
  }

  def getDate(date:String):Timestamp={
    new java.sql.Timestamp(CIConstants.formatter.parseDateTime(date).getMillis)
  }

} 
Example 149
Source File: StructuredNetworkWordCountWindowed.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.sql.streaming

import java.sql.Timestamp

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._


object StructuredNetworkWordCountWindowed {

  def main(args: Array[String]) {
    if (args.length < 3) {
      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
        " <window duration in seconds> [<slide duration in seconds>]")
      System.exit(1)
    }

    val host = args(0)
    val port = args(1).toInt
    val windowSize = args(2).toInt
    val slideSize = if (args.length == 3) windowSize else args(3).toInt
    if (slideSize > windowSize) {
      System.err.println("<slide duration> must be less than or equal to <window duration>")
    }
    val windowDuration = s"$windowSize seconds"
    val slideDuration = s"$slideSize seconds"

    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCountWindowed")
      .getOrCreate()

    import spark.implicits._

    // Create DataFrame representing the stream of input lines from connection to host:port
    val lines = spark.readStream
      .format("socket")
      .option("host", host)
      .option("port", port)
      .option("includeTimestamp", true)
      .load()

    // Split the lines into words, retaining timestamps
    val words = lines.as[(String, Timestamp)].flatMap(line =>
      line._1.split(" ").map(word => (word, line._2))
    ).toDF("word", "timestamp")

    // Group the data by window and word and compute the count of each group
    val windowedCounts = words.groupBy(
      window($"timestamp", windowDuration, slideDuration), $"word"
    ).count().orderBy("window")

    // Start running the query that prints the windowed word counts to the console
    val query = windowedCounts.writeStream
      .outputMode("complete")
      .format("console")
      .option("truncate", "false")
      .start()

    query.awaitTermination()
  }
}
// scalastyle:on println 
Example 150
Source File: QueryPartitionSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import java.io.File
import java.sql.Timestamp

import com.google.common.io.Files
import org.apache.hadoop.fs.FileSystem

import org.apache.spark.sql._
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.util.Utils

class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  import spark.implicits._

  test("SPARK-5068: query data when path doesn't exist") {
    withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "true")) {
      val testData = sparkContext.parallelize(
        (1 to 10).map(i => TestData(i, i.toString))).toDF()
      testData.createOrReplaceTempView("testData")

      val tmpDir = Files.createTempDir()
      // create the table for test
      sql(s"CREATE TABLE table_with_partition(key int,value string) " +
        s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') " +
        "SELECT key,value FROM testData")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='2') " +
        "SELECT key,value FROM testData")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='3') " +
        "SELECT key,value FROM testData")
      sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') " +
        "SELECT key,value FROM testData")

      // test for the exist path
      checkAnswer(sql("select key,value from table_with_partition"),
        testData.toDF.collect ++ testData.toDF.collect
          ++ testData.toDF.collect ++ testData.toDF.collect)

      // delete the path of one partition
      tmpDir.listFiles
        .find { f => f.isDirectory && f.getName().startsWith("ds=") }
        .foreach { f => Utils.deleteRecursively(f) }

      // test for after delete the path
      checkAnswer(sql("select key,value from table_with_partition"),
        testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect)

      sql("DROP TABLE IF EXISTS table_with_partition")
      sql("DROP TABLE IF EXISTS createAndInsertTest")
    }
  }

  test("SPARK-21739: Cast expression should initialize timezoneId") {
    withTable("table_with_timestamp_partition") {
      sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)")
      sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " +
        "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)")

      // test for Cast expression in TableReader
      checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"),
        Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000"))))

      // test for Cast expression in HiveTableScanExec
      checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " +
        "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1))
    }
  }
} 
Example 151
Source File: TypeCast.scala    From mimir   with Apache License 2.0 5 votes vote down vote up
package mimir.exec.spark.datasource.google.spreadsheet

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheet] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
} 
Example 152
Source File: TestResultSetDataConverter.scala    From ohara   with Apache License 2.0 4 votes vote down vote up
package oharastream.ohara.connector.jdbc.source

import java.sql.{ResultSet, Time, Timestamp}

import oharastream.ohara.client.configurator.InspectApi.RdbColumn
import oharastream.ohara.common.rule.OharaTest
import oharastream.ohara.connector.jdbc.datatype.{MySQLDataTypeConverter, RDBDataTypeConverter}
import oharastream.ohara.connector.jdbc.util.{ColumnInfo, DateTimeUtils}
import org.junit.Test
import org.mockito.Mockito
import org.mockito.Mockito._
import org.scalatest.matchers.should.Matchers._

class TestResultSetDataConverter extends OharaTest {
  private[this] val VARCHAR: String   = "VARCHAR"
  private[this] val TIMESTAMP: String = "TIMESTAMP"
  private[this] val INT: String       = "INT"
  private[this] val DATE: String      = "DATE"
  private[this] val TIME: String      = "TIME"

  @Test
  def testConverterRecord(): Unit = {
    val resultSet: ResultSet = Mockito.mock(classOf[ResultSet])
    when(resultSet.getTimestamp("column1", DateTimeUtils.CALENDAR)).thenReturn(new Timestamp(0L))
    when(resultSet.getString("column2")).thenReturn("aaa")
    when(resultSet.getInt("column3")).thenReturn(10)

    val columnList = Seq(
      RdbColumn("column1", TIMESTAMP, true),
      RdbColumn("column2", VARCHAR, false),
      RdbColumn("column3", INT, false)
    )
    val dataTypeConverter: RDBDataTypeConverter = new MySQLDataTypeConverter()
    val result: Seq[ColumnInfo[_]]              = ResultSetDataConverter.converterRecord(dataTypeConverter, resultSet, columnList)
    result.head.columnName shouldBe "column1"
    result.head.columnType shouldBe TIMESTAMP
    result.head.value.toString shouldBe "1970-01-01 08:00:00.0"

    result(1).columnName shouldBe "column2"
    result(1).columnType shouldBe VARCHAR
    result(1).value shouldBe "aaa"

    result(2).columnName shouldBe "column3"
    result(2).columnType shouldBe INT
    result(2).value shouldBe 10
  }

  @Test
  def testNullValue(): Unit = {
    val resultSet: ResultSet = Mockito.mock(classOf[ResultSet])
    when(resultSet.getTimestamp("column1", DateTimeUtils.CALENDAR)).thenReturn(new Timestamp(0L))
    when(resultSet.getString("column2")).thenReturn(null)
    when(resultSet.getDate("column3")).thenReturn(null)
    when(resultSet.getTime("column4")).thenReturn(null)

    val columnList = Seq(
      RdbColumn("column1", TIMESTAMP, true),
      RdbColumn("column2", VARCHAR, false),
      RdbColumn("column3", DATE, false),
      RdbColumn("column4", TIME, false)
    )
    val dataTypeConverter: RDBDataTypeConverter = new MySQLDataTypeConverter()
    val result: Seq[ColumnInfo[_]]              = ResultSetDataConverter.converterRecord(dataTypeConverter, resultSet, columnList)
    result(1).columnName shouldBe "column2"
    result(1).columnType shouldBe VARCHAR
    result(1).value shouldBe "null"

    result(2).columnName shouldBe "column3"
    result(2).columnType shouldBe DATE
    result(2).value.toString shouldBe "1970-01-01"

    result(3).columnName shouldBe "column4"
    result(3).columnType shouldBe TIME
    result(3).value.toString shouldBe new Time(0).toString
  }
} 
Example 153
Source File: StreamingProducer.scala    From Scala-Programming-Projects   with MIT License 4 votes vote down vote up
package coinyser

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.TimeZone

import cats.effect.IO
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.pusher.client.Client
import com.pusher.client.channel.SubscriptionEventListener
import com.typesafe.scalalogging.StrictLogging

object StreamingProducer extends StrictLogging {

  def subscribe(pusher: Client)(onTradeReceived: String => Unit): IO[Unit] =
    for {
      _ <- IO(pusher.connect())
      channel <- IO(pusher.subscribe("live_trades"))

      _ <- IO(channel.bind("trade", new SubscriptionEventListener() {
        override def onEvent(channel: String, event: String, data: String): Unit = {
          logger.info(s"Received event: $event with data: $data")
          onTradeReceived(data)
        }
      }))
    } yield ()


  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    // Very important: the storage must be in UTC
    sdf.setTimeZone(TimeZone.getTimeZone("UTC"))
    m.setDateFormat(sdf)
  }

  def deserializeWebsocketTransaction(s: String): WebsocketTransaction =
    mapper.readValue(s, classOf[WebsocketTransaction])

  def convertWsTransaction(wsTx: WebsocketTransaction): Transaction =
    Transaction(
      timestamp = new Timestamp(wsTx.timestamp.toLong * 1000), tid = wsTx.id,
      price = wsTx.price, sell = wsTx.`type` == 1, amount = wsTx.amount)

  def serializeTransaction(tx: Transaction): String =
    mapper.writeValueAsString(tx)

} 
Example 154
Source File: StreamingPredictionsSpec.scala    From odsc-east-realish-predictions   with Apache License 2.0 4 votes vote down vote up
package com.twilio.open.odsc.realish

import java.sql.Timestamp
import java.time.Instant
import java.util.{Random, UUID}

import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoders, SQLContext, SparkSession}
import org.scalatest.{FunSuite, Matchers}
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.{OutputMode, Trigger}

import scala.concurrent.duration._

class StreamingPredictionsSpec extends FunSuite with Matchers with SharedSparkSql {

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("odsc-spark-utils")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.session.timeZone", "UTC")
  }

  final val notRandomRandom = {
    val generator = new Random
    generator.setSeed(100L)
    generator
  }

  test("should stream in some mock data for fun") {
    implicit val spark: SparkSession = sparkSql
    import spark.implicits._
    implicit val sqlContext: SQLContext = spark.sqlContext

    implicit val metricEncoder = Encoders.product[Metric]
    val metricData = MemoryStream[Metric]

    val startingInstant = Instant.now()

    val backingData = (1 to 10000).map(offset => {
      val metric = if (offset % 2 == 0) "loss_percentage" else "connect_duration"
      val nextLoss = notRandomRandom.nextDouble() * notRandomRandom.nextInt(100)
      Metric(
        Timestamp.from(startingInstant.minusSeconds(offset)),
        UUID.randomUUID().toString,
        metric,
        value = if (metric == "loss_percentage") nextLoss else notRandomRandom.nextDouble() * notRandomRandom.nextInt(240),
        countryCode = if (offset % 8 == 0) "US" else "BR",
        callDirection = if (metric == "loss_percentage") "inbound" else "outbound"
      )
    })
    val processingTimeTrigger = Trigger.ProcessingTime(2.seconds)


    val streamingQuery = metricData.toDF()
      .withWatermark("timestamp", "2 hours")
      .groupBy(col("metric"), col("countryCode"), window($"timestamp", "5 minutes"))
      .agg(
        min("value") as "min",
        avg("value") as "mean",
        max("value") as "max",
        count("*") as "total"
      )
      .writeStream
      .format("memory")
      .queryName("datastream")
      .outputMode(OutputMode.Append())
      .trigger(processingTimeTrigger)
      .start()

    metricData.addData(backingData)

    streamingQuery.processAllAvailable()

    spark.sql("select * from datastream").show(20, false)

    val checkChange = spark.sql("select * from datastream")
      .groupBy("metric","countryCode")
      .agg(
        sum("total") as "total",
        avg("mean") as "mean"
      )

    checkChange.show(20, false)

    // now can do interesting things with minor back tracking...

    streamingQuery.stop()

  }

}