java.sql.Timestamp Scala Examples
The following examples show how to use java.sql.Timestamp.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkRandomGenDataIngress.scala From pipelines-examples with Apache License 2.0 | 5 votes |
package pipelines.example import java.sql.Timestamp import scala.util.Random import pipelines.streamlets.{ DurationConfigParameter, IntegerConfigParameter, StreamletShape } import pipelines.streamlets.avro._ import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } import org.apache.spark.sql.Dataset import org.apache.spark.sql.streaming.{ OutputMode, Trigger } import pipelines.spark.sql.SQLImplicits._ case class Rate(timestamp: Timestamp, value: Long) class SparkRandomGenDataIngress extends SparkStreamlet { val out = AvroOutlet[Data]("out", d ⇒ d.src) val shape = StreamletShape(out) val RecordsPerSecond = IntegerConfigParameter( "records-per-second", "Records per second to produce.", Some(50)) val RampUpTime = DurationConfigParameter( "ramp-up-time", "Time to reach max records per second.", Some("0 seconds")) override def configParameters = Vector(RecordsPerSecond, RampUpTime) override def createLogic() = new SparkStreamletLogic { override def buildStreamingQueries = { writeStream(process, out, OutputMode.Append).toQueryExecution } private def process: Dataset[Data] = { val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key) val rampUpTime = context.streamletConfig.getDuration(RampUpTime.key, java.util.concurrent.TimeUnit.SECONDS) println(s"Using rampup time of $rampUpTime seconds") val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas" val rateStream = session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .option("rampUpTime", s"${rampUpTime}s") .load() .as[Rate] rateStream.map { case Rate(timestamp, value) ⇒ Data(s"src-${value % 1000}", timestamp.getTime, None, None, gaugeGen(), value) } } } }
Example 2
Source File: TransactionsFlowUnitTest.scala From kafka-examples with Apache License 2.0 | 5 votes |
package com.cloudera.streaming.refapp import java.sql.Timestamp import org.scalatest.BeforeAndAfter import org.apache.spark.sql.execution.streaming.MemoryStream class TransactionsFlowUnitTest extends UnitTestBase with BeforeAndAfter { import testImplicits._ var transactionsFromStream: MemoryStream[Transaction] = _ var transactiosnFlow: TransactionsFlow = _ before { transactionsFromStream = MemoryStream[Transaction] transactiosnFlow = new TransactionsFlow( spark, statesFromCluster, customersFromCluster, vendorsFromCluster, transactionsFromStream = transactionsFromStream .toDF.withColumn("timestamp", $"event_timestamp".cast("timestamp"))) } test("Valid records are written to the validTransactions output") { val validTransaction = Transaction( transaction_id = "1", customer_id = Some(1), vendor_id = Some(1), event_state = Some("CREATED"), event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"), price = Some("100"), card_type = Some("Credit")) testStream(transactiosnFlow.validTransactions.select('transaction_id, 'customer_id, 'vendor_id, 'event_state, 'event_timestamp, 'price, 'card_type)) ( AddData(transactionsFromStream, validTransaction), CheckAnswer(validTransaction) ) } test("Invalid records are written to the invalidTransactions output") { // Note: transactionsFlow.validTransactions and invalidTransactions contain the fields that we used for internal calculations, e.g. for validation // It enables us to check the internal calculations testStream(transactiosnFlow.invalidTransactions.select('transaction_id, 'valid_card_type)) ( AddData(transactionsFromStream, Transaction( transaction_id = "2", customer_id = Some(1), vendor_id = Some(1), event_state = Some("CREATED"), event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"), price = Some("100"), card_type = Some("Invalid"))), CheckAnswer(("2", false)) ) } }
Example 3
Source File: LocalIntegrationTest.scala From kafka-examples with Apache License 2.0 | 5 votes |
package com.cloudera.streaming.refapp import java.sql.Timestamp import org.scalatest.Matchers._ import org.scalatest.concurrent.Eventually._ import org.scalatest.time.{Seconds, Span} import org.apache.spark.sql.Encoders class LocalIntegrationTest extends IntegrationTestBase { test("Integration test with one kafka and one spark instance embedded in the same JVM") { val inputDir = "src/test/resources/samples" val spark = EmbeddedSpark.sparkSession val fileSource = new FileSources(spark, inputDir) val kafkaConfig = EmbeddedKafkaBroker.defaultKafkaConfig val kafkaSource = new KafkaSource(spark, kafkaConfig) val application = new Application( spark, Sources( statesFromCluster = fileSource.jsonFile("states"), customersFromCluster = fileSource.jsonFile("customers"), vendorsFromCluster = fileSource.jsonFile("vendors"), customersFromStream = kafkaSource.jsonStreamWithKafkaTimestamp("customer"), vendorsFromStream = kafkaSource.jsonStreamWithTimestampFromMessage("vendor", "update_timestamp"), transactionsFromStream = kafkaSource.jsonStreamWithTimestampFromMessage("transaction", "event_timestamp") ), Sinks( invalidTransactions = Memory.memorySink("invalidTransactions"), validTransactions = Memory.memorySink("validTransactions"), customerOrphans = Memory.memorySink("customerOrphans"), vendorOrphans = Memory.memorySink("vendorOrphans"), customers = Memory.memorySink("customers"), vendors = Memory.memorySink("vendors"), transactionsOperationalMetadata = Memory.memorySink("transactionsOperationalMetadata") )) application.start() eventually(timeout(Span(20, Seconds)), interval(Span(5, Seconds))) { EmbeddedKafkaBroker.publishStringMessageToKafka( "transaction", """{ "transaction_id": "1", "customer_id": 1, "vendor_id": 1, "event_state": "CREATED", "event_timestamp": "2018-11-12 09:42:00", "price": "100", "card_type": "Credit"}""") EmbeddedKafkaBroker.publishStringMessageToKafka( "transaction", """{ "transaction_id": "21", "customer_id": 100, "vendor_id": 2, "event_state": "SWIPED", "event_timestamp": "2018-11-13 09:45:01", "price": "100", "card_type": "Debit"}""") val validTransactionsQuery = application.streamingQueries.validTransactions validTransactionsQuery.processAllAvailable() val currentContent = spark.table("validTransactions").as[Transaction](Encoders.product).collect() currentContent.shouldBe( Array( Transaction( transaction_id = "1", customer_id = Some(1), vendor_id = Some(1), event_state = Some("CREATED"), event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"), price = Some("100"), card_type = Some("Credit")), Transaction( transaction_id = "21", customer_id = Some(100), vendor_id = Some(2), event_state = Some("SWIPED"), event_timestamp = Timestamp.valueOf("2018-11-13 09:45:01"), price = Some("100"), card_type = Some("Debit")) )) } } }
Example 4
Source File: TypeCast.scala From spark-google-spreadsheets with Apache License 2.0 | 5 votes |
package com.github.potix2.spark.google.spreadsheets.util import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.NumberFormat import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try object TypeCast { private[spreadsheets] def castTo( datum: String, castType: DataType, nullable: Boolean = true ): Any = { castType match { case _: ByteType => datum.toByte case _: ShortType => datum.toShort case _: IntegerType => datum.toInt case _: LongType => datum.toLong case _: FloatType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue()) case _: DoubleType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue()) case _: BooleanType => datum.toBoolean case _: DecimalType => new BigDecimal(datum.replaceAll(",", "")) case _: TimestampType => Timestamp.valueOf(datum) case _: DateType => Date.valueOf(datum) case _: StringType => datum case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } }
Example 5
Source File: ProcessMarshaller.scala From sundial with MIT License | 5 votes |
package dao.postgres.marshalling import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp} import java.util.UUID import dao.postgres.common.ProcessTable import model.{Process, ProcessStatus} import util.JdbcUtil._ object ProcessMarshaller { def unmarshalProcess(rs: ResultSet): Process = { import ProcessTable._ Process( id = rs.getObject(COL_ID).asInstanceOf[UUID], processDefinitionName = rs.getString(COL_DEF_NAME), startedAt = javaDate(rs.getTimestamp(COL_STARTED)), status = rs.getString(COL_STATUS) match { case STATUS_SUCCEEDED => ProcessStatus.Succeeded(javaDate(rs.getTimestamp(COL_ENDED_AT))) case STATUS_FAILED => ProcessStatus.Failed(javaDate(rs.getTimestamp(COL_ENDED_AT))) case STATUS_RUNNING => ProcessStatus.Running() }, taskFilter = getStringArray(rs, COL_TASK_FILTER) ) } def marshalProcess(process: Process, stmt: PreparedStatement, columns: Seq[String], startIndex: Int = 1)(implicit conn: Connection) = { import ProcessTable._ var index = startIndex columns.foreach { col => col match { case COL_ID => stmt.setObject(index, process.id) case COL_DEF_NAME => stmt.setString(index, process.processDefinitionName) case COL_STARTED => stmt.setTimestamp(index, new Timestamp(process.startedAt.getTime())) case COL_ENDED_AT => stmt.setTimestamp(index, process.endedAt.getOrElse(null)) case COL_STATUS => stmt.setString( index, process.status match { case ProcessStatus.Succeeded(_) => STATUS_SUCCEEDED case ProcessStatus.Failed(_) => STATUS_FAILED case ProcessStatus.Running() => STATUS_RUNNING } ) case COL_TASK_FILTER => stmt.setArray(index, process.taskFilter.map(makeStringArray).getOrElse(null)) } index += 1 } } }
Example 6
Source File: JdbcUtil.scala From sundial with MIT License | 5 votes |
package util import java.sql.{Connection, Timestamp, ResultSet} import java.util.Date import scala.language.implicitConversions object JdbcUtil { implicit def resultSetItr(resultSet: ResultSet): Stream[ResultSet] = { new Iterator[ResultSet] { def hasNext = resultSet.next() def next() = resultSet }.toStream } implicit def javaDate(ts: Timestamp): Date = { new Date(ts.getTime()) } implicit def dateToTimestamp(date: Date) = { if (date != null) new Timestamp(date.getTime()) else null } private def getNullable[T](rs: ResultSet, f: ResultSet => T): Option[T] = { val obj = f(rs) if (rs.wasNull()) { Option.empty } else { Some(obj) } } def getIntOption(rs: ResultSet, col: String) = getNullable(rs, rs => rs.getInt(col)) def makeStringArray(seq: Seq[String])(implicit conn: Connection) = { conn.createArrayOf("varchar", seq.toArray[AnyRef]) } def getStringArray(rs: ResultSet, col: String) = { Option(rs.getArray(col)) .map(_.getArray().asInstanceOf[Array[String]].toList) } }
Example 7
Source File: SchedulerDataManager.scala From cave with MIT License | 5 votes |
package com.cave.metrics.data.postgresql import java.sql.Timestamp import com.cave.metrics.data.AwsConfig import com.cave.metrics.data.postgresql.Tables._ import org.joda.time.format.DateTimeFormat import org.joda.time.DateTime import scala.slick.jdbc.{GetResult, StaticQuery => Q} import scala.slick.driver.PostgresDriver.simple._ class SchedulerDataManager(awsConfig: AwsConfig) extends DatabaseConnection(awsConfig) { def leadershipTermTimeoutSeconds = awsConfig.leadershipTermTimeoutSeconds def leadershipTermLengthSeconds = awsConfig.leadershipTermLengthSeconds def DBDateTimeFormatter = DateTimeFormat.forPattern("YYYY-MM-dd HH:mm:ss Z") implicit val getSchedulersResult = GetResult(r => SchedulersRow(r.<<, r.<<, r.<<)) def takeLeadership(hostname: String): Boolean = { db.withTransaction { implicit session => val termTimeout = new DateTime().minusSeconds(leadershipTermTimeoutSeconds) val timeoutSql = DBDateTimeFormatter.print(termTimeout) val sql = s"BEGIN; SELECT * FROM schedulers WHERE created_at < '$timeoutSql' FOR UPDATE" val query = Q.queryNA[SchedulersRow](sql) def updateTimestamp(): Boolean = Schedulers.filter(_.createdAt < new Timestamp(termTimeout.getMillis)) .map(s => (s.name, s.createdAt)).update(hostname, new Timestamp(System.currentTimeMillis())) == 1 try { query.list.length == 1 && (updateTimestamp() || { session.rollback() false }) } catch { case e: Exception => log.error(e) session.rollback() false } } } }
Example 8
Source File: SchedulerDataManagerSpec.scala From cave with MIT License | 5 votes |
package com.cave.metrics.data.postgresql import java.sql.Timestamp import com.cave.metrics.data.postgresql.Tables._ import org.joda.time.format.DateTimeFormat import org.scalatest.BeforeAndAfter import scala.slick.driver.H2Driver.simple._ import scala.slick.jdbc.StaticQuery class SchedulerDataManagerSpec extends AbstractDataManagerSpec with BeforeAndAfter { val hostname_1 = "host1" val hostname_2 = "host2" val hostname_3 = "host3" var dm: SchedulerDataManager = _ before { dm = new SchedulerDataManager(awsConfig) { override def DBDateTimeFormatter = DateTimeFormat.forPattern("YYYY-MM-dd HH:mm:ss") override def leadershipTermTimeoutSeconds = 30 } Schedulers += SchedulersRow(1, "initialValue", new Timestamp(System.currentTimeMillis() - 1000 * 60)) } "Scheduler Data Manager" should "update Schedulers table" in { Schedulers.list.head.name should be("initialValue") assert(dm.takeLeadership(hostname_1), "Expected success") Schedulers.list.head.name should be(hostname_1) assert(dm.takeLeadership(hostname_3) == false, "Expected success") Schedulers.list.head.name should be(hostname_1) assert(dm.extendLeadership(hostname_2) == false, "Expected success") Schedulers.list.head.name should be(hostname_1) Thread.sleep(1500) assert(dm.extendLeadership(hostname_1), "A-hostname was not able to extend its leadership") Schedulers.list.head.name should be(hostname_1) } it should "not update the leader if one is active" in { StaticQuery.queryNA("truncate table SCHEDULERS").execute Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 20)) Schedulers.list.length should be(1) assert(!dm.takeLeadership(hostname_2), "Expected failure") Schedulers.list.head.name should be(hostname_1) Thread.sleep(100) assert(dm.extendLeadership(hostname_1), "Expected success") Schedulers.list.head.name should be(hostname_1) } it should "not give leadership to host3 when host2 is the leader" in { StaticQuery.queryNA("truncate table SCHEDULERS").execute Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 31)) Schedulers.list.length should be(1) assert(dm.takeLeadership(hostname_2), "Expected success") Schedulers.list.head.name should be(hostname_2) assert(!dm.takeLeadership(hostname_3), "Expected failure") Schedulers.list.head.name should be(hostname_2) assert(!dm.takeLeadership(hostname_1), "Expected failure") Schedulers.list.head.name should be(hostname_2) } it should "be thread safe" in { StaticQuery.queryNA("truncate table SCHEDULERS").execute Schedulers.list.length should be(0) Schedulers += SchedulersRow(1, hostname_1, new Timestamp(System.currentTimeMillis() - 1000 * 360)) Schedulers.list.length should be(1) Schedulers.list.head.name should be(hostname_1) import scala.slick.jdbc.{GetResult, StaticQuery => Q} val sql = s"BEGIN; select * from SCHEDULERS FOR UPDATE" val query = Q.queryNA[SchedulersRow](sql) query.list.length should be(1) assert(!dm.takeLeadership(hostname_1), "Expected failure") assert(!dm.takeLeadership(hostname_2), "Expected failure") assert(!dm.takeLeadership(hostname_3), "Expected failure") assert(!dm.extendLeadership(hostname_1), "Expected failure") assert(!dm.extendLeadership(hostname_2), "Expected failure") assert(!dm.extendLeadership(hostname_3), "Expected failure") Schedulers.list.head.name should be(hostname_1) } }
Example 9
Source File: TimeColumnBuffer.scala From spark-vector with Apache License 2.0 | 5 votes |
package com.actian.spark_vector.colbuffer.time import java.nio.ByteBuffer import java.sql.Timestamp import java.util.{ Calendar, TimeZone } import org.apache.spark.sql.catalyst.util.DateTimeUtils import com.actian.spark_vector.ComposePartial import com.actian.spark_vector.colbuffer._ import com.actian.spark_vector.colbuffer.util._ import com.actian.spark_vector.vector.VectorDataType private case class TimeColumnBufferParams(cbParams: ColumnBufferBuildParams, converter: TimeConversion.TimeConverter, adjustToUTC: Boolean = false) private[colbuffer] abstract class TimeColumnBuffer(p: TimeColumnBufferParams, valueWidth: Int) extends ColumnBuffer[Timestamp, Long](p.cbParams.name, p.cbParams.maxValueCount, valueWidth, valueWidth, p.cbParams.nullable) { private val ts = new Timestamp(System.currentTimeMillis()) private val cal = Calendar.getInstance override def put(source: Timestamp, buffer: ByteBuffer): Unit = { if (p.adjustToUTC) { TimeConversion.convertLocalTimestampToUTC(source, cal) } val convertedSource = p.converter.convert(TimeConversion.normalizeTime(source), p.cbParams.scale) putConverted(convertedSource, buffer) } protected def putConverted(converted: Long, buffer: ByteBuffer): Unit override def get(buffer: ByteBuffer): Long = { val deconvertedSource = p.converter.deconvert(getConverted(buffer), p.cbParams.scale) ts.setTime(TimeConversion.scaleNanos(deconvertedSource, MillisecondsScale)) ts.setNanos((deconvertedSource % PowersOfTen(NanosecondsScale)).toInt) if (p.adjustToUTC) { TimeConversion.convertUTCToLocalTimestamp(ts, cal) } DateTimeUtils.fromJavaTimestamp(ts) } protected def getConverted(buffer: ByteBuffer): Long } private class TimeIntColumnBuffer(p: TimeColumnBufferParams) extends TimeColumnBuffer(p, IntSize) { override protected def putConverted(converted: Long, buffer: ByteBuffer): Unit = buffer.putInt(converted.toInt) override protected def getConverted(buffer: ByteBuffer): Long = buffer.getInt() } private class TimeLongColumnBuffer(p: TimeColumnBufferParams) extends TimeColumnBuffer(p, LongSize) { override protected def putConverted(converted: Long, buffer: ByteBuffer): Unit = buffer.putLong(converted) override protected def getConverted(buffer: ByteBuffer): Long = buffer.getLong() } private class TimeNZLZConverter extends TimeConversion.TimeConverter { override def convert(unscaledNanos: Long, scale: Int): Long = TimeConversion.scaleNanos(unscaledNanos, scale) override def deconvert(scaledNanos: Long, scale: Int): Long = TimeConversion.unscaleNanos(scaledNanos, scale) } private class TimeTZConverter extends TimeConversion.TimeConverter { override def convert(unscaledNanos: Long, scale: Int): Long = (TimeConversion.scaleNanos(unscaledNanos, scale) << TimeMaskSize) override def deconvert(scaledNanos: Long, scale: Int): Long = TimeConversion.unscaleNanos(scaledNanos >> TimeMaskSize, scale) } private[colbuffer] object TimeColumnBuffer extends ColumnBufferBuilder { private final val (nzlzIntScaleBounds, nzlzLongScaleBounds) = ((0, 4), (5, 9)) private final val (tzIntScaleBounds, tzLongScaleBounds) = ((0, 1), (2, 9)) private val calIsNotUTC = Calendar.getInstance.getTimeZone != TimeZone.getTimeZone("UTC") private val buildNZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] = ofDataType(VectorDataType.TimeType) andThen { TimeColumnBufferParams(_, new TimeNZLZConverter(), calIsNotUTC) } private val buildLZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] = ofDataType(VectorDataType.TimeLTZType) andThen { TimeColumnBufferParams(_, new TimeNZLZConverter()) } private val buildNZLZ: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = (buildNZPartial orElse buildLZPartial) andThenPartial { case nzlz if isInBounds(nzlz.cbParams.scale, nzlzIntScaleBounds) => new TimeIntColumnBuffer(nzlz) case nzlz if isInBounds(nzlz.cbParams.scale, nzlzLongScaleBounds) => new TimeLongColumnBuffer(nzlz) } private val buildTZPartial: PartialFunction[ColumnBufferBuildParams, TimeColumnBufferParams] = ofDataType(VectorDataType.TimeTZType) andThen { TimeColumnBufferParams(_, new TimeTZConverter()) } private val buildTZ: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = buildTZPartial andThenPartial { case tz if isInBounds(tz.cbParams.scale, tzIntScaleBounds) => new TimeIntColumnBuffer(tz) case tz if isInBounds(tz.cbParams.scale, tzLongScaleBounds) => new TimeLongColumnBuffer(tz) } override private[colbuffer] val build: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = buildNZLZ orElse buildTZ }
Example 10
Source File: package.scala From spark-vector with Apache License 2.0 | 5 votes |
package com.actian.spark_vector.colbuffer import java.sql.Timestamp package object util { // scalastyle:off magic.number final val PowersOfTen = Seq(1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000) final val SecondsBeforeEpoch = 62167219200L final val TimeMaskSize = 11 final val SecondsInMinute = 60 final val MinutesInHour = 60 final val HoursInDay = 24 final val SecondsInDay = SecondsInMinute * MinutesInHour * HoursInDay final val MillisecondsScale = 3 final val MillisecondsInMinute = SecondsInMinute * PowersOfTen(MillisecondsScale) final val MillisecondsInHour = MinutesInHour * MillisecondsInMinute final val MillisecondsInDay = HoursInDay * MillisecondsInHour final val NanosecondsScale = 9 final val NanosecondsInMinute = (MillisecondsInMinute.toLong * PowersOfTen(NanosecondsScale - MillisecondsScale)) final val NanosecondsInHour = MinutesInHour * NanosecondsInMinute final val NanosecondsInDay = HoursInDay * NanosecondsInHour // scalastyle:on magic.number def floorDiv(x: Long, y: Long): Long = { val ret = x / y if (ret >= 0 || ret * y == x) ret else ret - 1 } }
Example 11
Source File: PackageSpec.scala From sparkpipe-core with Apache License 2.0 | 5 votes |
package software.uncharted.sparkpipe.ops.core.dataframe.temporal import org.scalatest._ import software.uncharted.sparkpipe.Spark import software.uncharted.sparkpipe.ops.core.rdd.toDF import java.text.SimpleDateFormat import java.sql.Timestamp class PackageSpec extends FunSpec { describe("ops.core.dataframe.temporal") { val rdd = Spark.sc.parallelize(Seq( (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18").getTime), "2015-11-18", 1), (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19").getTime), "2015-11-19", 2), (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-20").getTime), "2015-11-20", 3), (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21").getTime), "2015-11-21", 4) )) val df = toDF(Spark.sparkSession)(rdd) describe("#dateFilter()") { it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range") { val df2 = dateFilter( new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19"), new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21"), "yyyy-MM-dd", "_2" )(df) assert(df2.count == 3) } it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range, specified using strings") { val df2 = dateFilter( "2015-11-19", "2015-11-20", "yyyy-MM-dd", "_2" )(df) assert(df2.count == 2) } it("should support filtering rows in an input DataFrame with a Timestamp timestamp column, based on a date range") { val df2 = dateFilter( new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-17"), new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18"), "_1" )(df) assert(df2.count == 1) } } describe("#parseDate()") { it("should facilitate converting a string timestamp column into a TimestampType and adding it as a new column") { val df2 = parseDate("_2", "new", "yyyy-MM-dd")(df) assert(df2.filter("new = _1").count == df.count) assert(df2.schema.size == df.schema.size+1) } } describe("#dateField()") { it("should facilitate extracting a single field from a Timestamp column, and placing it a new column") { val df2 = dateField("_1", "new", java.util.Calendar.YEAR)(df) assert(df2.filter("new = 2015").count == df.count) assert(df2.schema.size == df.schema.size+1) } } } }
Example 12
Source File: KafkaStructuredStreamingDemo.scala From MaxCompute-Spark with Apache License 2.0 | 5 votes |
package com.aliyun.odps.spark.examples.structuredStreaming.kafka import java.sql.Timestamp import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions.window object KafkaStructuredStreamingDemo{ def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("KafkaStreamingDemo") .getOrCreate() import spark.implicits._ val df = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", "localhost:9092") .option("subscribe", "topic") .load() // 请使用OSS作为Checkpoint存储 val checkpointLocation3 = "oss://bucket/checkpoint3/" val windowedCountsWithWatermark = wordsWithTimestamp .withWatermark("timestamp", "5 seconds") .groupBy( window($"timestamp", "6 seconds", "3 seconds"), $"word" ).count() val query3 = windowedCountsWithWatermark.writeStream .outputMode("append") .format("console") .option("checkpointLocation", checkpointLocation3) .start() query3.awaitTermination() } }
Example 13
Source File: TimestampVectorWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.orc.vectors import java.sql.Timestamp import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector object TimestampVectorWriter extends OrcVectorWriter[TimestampColumnVector, Timestamp] { override def write(vector: TimestampColumnVector, offset: Int, value: Option[Timestamp]): Unit = { value match { case Some(ts) => vector.set(offset, value.asInstanceOf[Timestamp]) case _ => vector.setNullValue(offset) vector.noNulls = false vector.isNull(offset) = true } } }
Example 14
Source File: CallRecordGeneratorIngress.scala From pipelines-examples with Apache License 2.0 | 5 votes |
package pipelines.examples.carly.aggregator import java.sql.Timestamp import scala.util.Random import scala.concurrent.duration._ import org.apache.spark.sql.{ Dataset, SparkSession } import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.LongType import pipelines.streamlets._ import pipelines.streamlets.avro._ import pipelines.spark.sql.SQLImplicits._ import pipelines.examples.carly.data.CallRecord import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } import org.apache.log4j.{ Level, Logger } case class Rate(timestamp: Timestamp, value: Long) class CallRecordGeneratorIngress extends SparkStreamlet { val rootLogger = Logger.getRootLogger() rootLogger.setLevel(Level.ERROR) val RecordsPerSecond = IntegerConfigParameter( "records-per-second", "Records per second to process.", Some(50)) override def configParameters = Vector(RecordsPerSecond) val out = AvroOutlet[CallRecord]("out", _.user) val shape = StreamletShape(out) override def createLogic() = new SparkStreamletLogic { val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key) override def buildStreamingQueries = { val outStream = DataGenerator.mkData(super.session, recordsPerSecond) writeStream(outStream, out, OutputMode.Append).toQueryExecution } } } object DataGenerator { def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = { // do we need to expose this through configuration? val MaxTime = 2.hours.toMillis val MaxUsers = 100000 val TS0 = new java.sql.Timestamp(0) val ZeroTimestampProb = 0.05 // error rate // Random Data Generator val usersUdf = udf(() ⇒ "user-" + Random.nextInt(MaxUsers)) val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing") // Time-biased randomized filter - 1/2 hour cycles val sinTime: Long ⇒ Double = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI) val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob val timeFilterUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng)) val zeroTimestampUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ { if (rng < ZeroTimestampProb) { TS0 } else { ts } }) val rateStream = session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .load() .as[Rate] val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand()) val sampledData = randomDataset.where(timeFilterUdf($"timestamp", $"rng")) .withColumn("user", usersUdf()) .withColumn("other", usersUdf()) .withColumn("direction", directionUdf()) .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType)) .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng")) .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp" as "timestamp") .as[CallRecord] sampledData } }
Example 15
Source File: SparkRandomGenDataIngress.scala From pipelines-examples with Apache License 2.0 | 5 votes |
package pipelines.example import java.sql.Timestamp import scala.util.Random import pipelines.streamlets.{ IntegerConfigParameter, StreamletShape } import pipelines.streamlets.avro._ import pipelines.spark.{ SparkStreamlet, SparkStreamletLogic } import org.apache.spark.sql.Dataset import org.apache.spark.sql.streaming.OutputMode import pipelines.spark.sql.SQLImplicits._ case class Rate(timestamp: Timestamp, value: Long) class SparkRandomGenDataIngress extends SparkStreamlet { val out = AvroOutlet[Data]("out", d ⇒ d.src) val shape = StreamletShape(out) val RecordsPerSecond = IntegerConfigParameter( "records-per-second", "Records per second to produce.", Some(50)) override def configParameters = Vector(RecordsPerSecond) override def createLogic() = new SparkStreamletLogic { override def buildStreamingQueries = { writeStream(process, out, OutputMode.Append).toQueryExecution } private def process: Dataset[Data] = { val recordsPerSecond = context.streamletConfig.getInt(RecordsPerSecond.key) val gaugeGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "oil" else "gas" val rateStream = session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .load() .as[Rate] rateStream.map { case Rate(timestamp, value) ⇒ Data(s"src-${value % 100}", timestamp.getTime, gaugeGen(), Random.nextDouble() * value) } } } }
Example 16
Source File: PSetAny.scala From yoda-orm with MIT License | 5 votes |
package in.norbor.yoda.orm import java.sql.{Blob, Timestamp} import org.joda.time.DateTime trait PSetAny { def set(p: PStatement, v: Any): PStatement = v match { case _: Boolean => p.setBoolean(v.asInstanceOf[Boolean]) case _: Int => p.setInt(v.asInstanceOf[Int]) case _: Long => p.setLong(v.asInstanceOf[Long]) case _: Float => p.setDouble(v.asInstanceOf[Double]) case _: Double => p.setDouble(v.asInstanceOf[Double]) case _: String => p.setString(v.asInstanceOf[String]) case _: Timestamp => p.setTimestamp(v.asInstanceOf[Timestamp]) case _: DateTime => p.setDateTime(v.asInstanceOf[DateTime]) case _: Blob => p.setBlob(v.asInstanceOf[Blob]) case _: Array[Byte] => p.setBytes(v.asInstanceOf[Array[Byte]]) case _ => p; } }
Example 17
Source File: PStatementTest.scala From yoda-orm with MIT License | 5 votes |
package in.norbor.yoda.orm import java.sql.{Connection, DriverManager, ResultSet, Timestamp} import com.typesafe.scalalogging.LazyLogging import in.norbor.yoda.implicits.JavaSqlImprovement._ import mocks.People import org.joda.time.DateTime import org.scalatest.funsuite.AnyFunSuite class PStatementTest extends AnyFunSuite { Class.forName("org.h2.Driver") private implicit val conn: Connection = DriverManager.getConnection("jdbc:h2:~/test", "sa", "") test("0) apply") { val ps = PStatement("SELECT 1")(conn) assert(ps !== null) ps.equals(null) ps.canEqual(null) ps.hashCode ps.toString ps.productPrefix ps.productArity ps.productElement(0) ps.productIterator ps.copy() } test("0) query") { PStatement("DROP TABLE IF EXISTS yoda_sql; CREATE TABLE yoda_sql (id INTEGER);") .update } test("0) update") { val rs = PStatement("""select 1""") .query assert(rs !== null) } test("0) queryOne with non index parameter") { val result = PStatement("""select ?, ?, ?, ?, ?, ?, ?, ?""") .setBoolean(true) .setInt(1) .setLong(1L) .setDouble(1) .setString("YO") .setDateTime(DateTime.now) .setTimestamp(new Timestamp(System.currentTimeMillis)) .setTimestamp(null) .queryOne(parse) assert(result.head._1 === true) } test("3) queryList with parse method") { val peoples = PStatement("""select 1 as id, 'Peerapat' as name, now() as born;""") .queryList(parsePeople) assert(peoples.head.id === 1) assert(peoples.head.name === "Peerapat") assert(peoples.head.born.getMillis <= DateTime.now.getMillis) } test("5) batch") { val insert = PStatement("INSERT INTO yoda_sql VALUES(?)") .setInt(1) .addBatch() .setInt(2) .addBatch() .executeBatch assert(insert.length === 2) } private def parse(rs: ResultSet): (Boolean, Int, Long, Double, String, DateTime, Timestamp) = (rs.getBoolean(1) , rs.getInt(2) , rs.getLong(3) , rs.getDouble(4) , rs.getString(5) , rs.getDateTime(6) , rs.getTimestamp(7) ) private def parsePeople(rs: ResultSet): People = People(id = rs.getLong("id") , name = rs.getString("name") , born = rs.getDateTime("born") ) }
Example 18
Source File: DateTimeConverter.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.commons.datetime import java.sql.Timestamp import org.joda.time.format.{DateTimeFormatter, ISODateTimeFormat} import org.joda.time.{DateTime, DateTimeZone} trait DateTimeConverter { val zone: DateTimeZone = DateTimeZone.getDefault val dateTimeFormatter: DateTimeFormatter = ISODateTimeFormat.dateTime() def toString(dateTime: DateTime): String = dateTime.toString(dateTimeFormatter) def parseDateTime(s: String): DateTime = dateTimeFormatter.parseDateTime(s).withZone(zone) def parseTimestamp(s: String): Timestamp = new Timestamp(parseDateTime(s).getMillis) def now: DateTime = new DateTime(zone) def fromMillis(millis: Long): DateTime = new DateTime(zone).withMillis(millis) def dateTime( year: Int, monthOfyear: Int, dayOfMonth: Int, hourOfDay: Int = 0, minutesOfHour: Int = 0, secondsOfMinute: Int = 0): DateTime = new DateTime(year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, zone) def dateTimeFromUTC( year: Int, monthOfyear: Int, dayOfMonth: Int, hourOfDay: Int = 0, minutesOfHour: Int = 0, secondsOfMinute: Int = 0): DateTime = new DateTime( year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, DateTimeZone.UTC).withZone(DateTimeConverter.zone) } object DateTimeConverter extends DateTimeConverter
Example 19
Source File: CsvSchemaStringifierBeforeCsvWriting.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import io.deepsense.commons.datetime.DateTimeConverter import io.deepsense.deeplang.ExecutionContext import io.deepsense.deeplang.doperables.dataframe.DataFrame import io.deepsense.deeplang.doperations.exceptions.UnsupportedColumnTypeException object CsvSchemaStringifierBeforeCsvWriting { def preprocess(dataFrame: DataFrame) (implicit context: ExecutionContext): DataFrame = { requireNoComplexTypes(dataFrame) val schema = dataFrame.sparkDataFrame.schema def stringifySelectedTypes(schema: StructType): StructType = { StructType( schema.map { case field: StructField => field.copy(dataType = StringType) } ) } context.dataFrameBuilder.buildDataFrame( stringifySelectedTypes(schema), dataFrame.sparkDataFrame.rdd.map(stringifySelectedCells(schema))) } private def requireNoComplexTypes(dataFrame: DataFrame): Unit = { dataFrame.sparkDataFrame.schema.fields.map(structField => (structField.dataType, structField.name) ).foreach { case (dataType, columnName) => dataType match { case _: ArrayType | _: MapType | _: StructType => throw UnsupportedColumnTypeException(columnName, dataType) case _ => () } } } private def stringifySelectedCells(originalSchema: StructType)(row: Row): Row = { Row.fromSeq( row.toSeq.zipWithIndex map { case (value, index) => (value, originalSchema(index).dataType) match { case (null, _) => "" case (_, BooleanType) => if (value.asInstanceOf[Boolean]) "1" else "0" case (_, TimestampType) => DateTimeConverter.toString( DateTimeConverter.fromMillis(value.asInstanceOf[Timestamp].getTime)) case (x, _) => value.toString } }) } }
Example 20
Source File: WriteReadDataFrameWithDriverFilesIntegSpec.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperations import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import org.scalatest.BeforeAndAfter import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport} import io.deepsense.deeplang.doperables.dataframe.DataFrame import io.deepsense.deeplang.doperations.inout._ class WriteReadDataFrameWithDriverFilesIntegSpec extends DeeplangIntegTestSupport with BeforeAndAfter with TestFiles { import DeeplangIntegTestSupport._ val schema: StructType = StructType(Seq( StructField("boolean", BooleanType), StructField("double", DoubleType), StructField("string", StringType) )) val rows = { val base = Seq( Row(true, 0.45, "3.14"), Row(false, null, "\"testing...\""), Row(false, 3.14159, "Hello, world!"), // in case of CSV, an empty string is the same as null - no way around it Row(null, null, "") ) val repeatedFewTimes = (1 to 10).flatMap(_ => base) repeatedFewTimes } lazy val dataFrame = createDataFrame(rows, schema) "WriteDataFrame and ReadDataFrame" should { "write and read CSV file" in { val wdf = new WriteDataFrame() .setStorageType( new OutputStorageTypeChoice.File() .setOutputFile(absoluteTestsDirPath.fullPath + "/test_files") .setFileFormat( new OutputFileFormatChoice.Csv() .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Tab()) .setNamesIncluded(true))) wdf.executeUntyped(Vector(dataFrame))(executionContext) val rdf = new ReadDataFrame() .setStorageType( new InputStorageTypeChoice.File() .setSourceFile(absoluteTestsDirPath.fullPath + "/test_files") .setFileFormat(new InputFileFormatChoice.Csv() .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Tab()) .setNamesIncluded(true) .setShouldConvertToBoolean(true))) val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame] assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false) } "write and read JSON file" in { val wdf = new WriteDataFrame() .setStorageType(new OutputStorageTypeChoice.File() .setOutputFile(absoluteTestsDirPath.fullPath + "json") .setFileFormat(new OutputFileFormatChoice.Json())) wdf.executeUntyped(Vector(dataFrame))(executionContext) val rdf = new ReadDataFrame() .setStorageType(new InputStorageTypeChoice.File() .setSourceFile(absoluteTestsDirPath.fullPath + "json") .setFileFormat(new InputFileFormatChoice.Json())) val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame] assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false) } } }
Example 21
Source File: DataFrameReportPerformanceSpec.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.dataframe import java.sql.Timestamp import java.text.{DateFormat, SimpleDateFormat} import java.util.TimeZone import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType} import org.scalatest.{BeforeAndAfter, Ignore} import io.deepsense.commons.utils.{DoubleUtils, Logging} import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport} // It's ignored because it does not have got assertions, it only prints report generation time. @Ignore class DataFrameReportPerformanceSpec extends DeeplangIntegTestSupport with BeforeAndAfter with TestFiles with Logging { val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv" "DataFrame" should { "generate report" when { "DataFrame has 17K of rows" in { val numberOfTries = 10 var results: Seq[Double] = Seq() for (i <- 1 to numberOfTries) { val dataFrame: DataFrame = demandDataFrame() val start = System.nanoTime() val report = dataFrame.report val end = System.nanoTime() val time1: Double = (end - start).toDouble / 1000000000.0 results = results :+ time1 logger.debug("Report generation time: {}", DoubleUtils.double2String(time1)) } logger.debug( "Mean report generation time: {}", DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble)) } } } private def demandDataFrame(): DataFrame = { val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile) val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row) executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data) } private def demandSchema: StructType = StructType(Seq( StructField("datetime", TimestampType), StructField("log_count", DoubleType), StructField("workingday", DoubleType), StructField("holiday", DoubleType), StructField("season2", DoubleType), StructField("season3", DoubleType), StructField("season4", DoubleType))) private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } } private object DataFrameHelpers { def demandString2Row(s: String): Row = { val split = s.split(",") Row( timestamp(split(0)), split(1).toDouble, split(2).toDouble, split(3).toDouble, split(4).toDouble, split(5).toDouble, split(6).toDouble ) } private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } }
Example 22
Source File: StatisticsForContinuousIntegSpec.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.dataframe.report.distribution import java.sql.Timestamp import org.apache.spark.rdd.RDD import org.apache.spark.sql import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import io.deepsense.commons.datetime.DateTimeConverter import io.deepsense.deeplang.DeeplangIntegTestSupport import io.deepsense.deeplang.doperables.dataframe.{DataFrame, DataFrameTestFactory} import io.deepsense.reportlib.model._ class StatisticsForContinuousIntegSpec extends DeeplangIntegTestSupport with DataFrameTestFactory { "Statistics (Min, max and mean values)" should { "be calculated for each continuous column in distribution" when { "data is of type int" in { val distribution = distributionForInt(1, 2, 3, 4, 5) distribution.statistics.min shouldEqual Some("1") distribution.statistics.max shouldEqual Some("5") distribution.statistics.mean shouldEqual Some("3") } "data is of type Timestamp" in { val distribution = distributionForTimestamps(new Timestamp(1000), new Timestamp(2000), new Timestamp(3000)) distribution.statistics.min shouldEqual Some(formatDate(1000)) distribution.statistics.max shouldEqual Some(formatDate(3000)) distribution.statistics.mean shouldEqual Some(formatDate(2000)) } } } "Null value in data" should { val distribution = distributionForDouble(1, 2, 3, 4, Double.NaN, 5) "not be skipped in calculating min and max" in { distribution.statistics.min shouldEqual Some("1") distribution.statistics.max shouldEqual Some("5") } "result in mean value NaN" in { distribution.statistics.mean shouldEqual Some("NaN") } } lazy val columnName = "column_name" private def distributionForDouble(data: Double*): ContinuousDistribution = { distributionFor(data, DoubleType) } private def distributionForInt(data: Int*): ContinuousDistribution = { distributionFor(data, IntegerType) } private def distributionForTimestamps(data: Timestamp*): ContinuousDistribution = { distributionFor(data, TimestampType) } private def distributionFor(data: Seq[Any], dataType: DataType): ContinuousDistribution = { val schema = StructType(Array( StructField(columnName, dataType) )) val rows = data.map(v => Row(v)) val dataFrame = createDataFrame(rows, schema) val report = dataFrame.report report.content.distributions(columnName).asInstanceOf[ContinuousDistribution] } def buildDataFrame(schema: StructType, data: RDD[Row]): DataFrame = { val dataFrame: sql.DataFrame = sparkSQLSession.createDataFrame(data, schema) DataFrame.fromSparkDataFrame(dataFrame) } def formatDate(millis: Long): String = { DateTimeConverter.toString(DateTimeConverter.fromMillis(millis)) } }
Example 23
Source File: ParameterConversions.scala From scruid with Apache License 2.0 | 5 votes |
package ing.wbaa.druid.sql import java.sql.Timestamp import java.time.{ Instant, LocalDate, LocalDateTime } import scala.language.implicitConversions import ing.wbaa.druid.{ DruidConfig, SQLQueryParameter, SQLQueryParameterType } trait ParameterConversions { implicit def char2Param(v: Char): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Char, v.toString) implicit def string2Param(v: String): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Varchar, v) implicit def byte2Param(v: Byte): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Tinyint, v.toString) implicit def short2Param(v: Short): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Smallint, v.toString) implicit def int2Param(v: Int): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Integer, v.toString) implicit def long2Param(v: Long): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Bigint, v.toString) implicit def float2Param(v: Float): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Float, v.toString) implicit def double2Param(v: Double): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Double, v.toString) implicit def boolean2Param(v: Boolean): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Boolean, v.toString) implicit def localDate2Param(v: LocalDate)(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Date, v.format(config.FormatterDate)) implicit def localDateTime2Param( v: LocalDateTime )(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Timestamp, v.format(config.FormatterDateTime)) implicit def timestamp2Param(v: Timestamp)(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Timestamp, config.FormatterDateTime.format(v.toInstant)) implicit def instant2Param( v: Instant )(implicit config: DruidConfig = DruidConfig.DefaultConfig): SQLQueryParameter = SQLQueryParameter(SQLQueryParameterType.Timestamp, config.FormatterDateTime.format(v)) }
Example 24
Source File: CreateOps.scala From recogito2 with Apache License 2.0 | 5 votes |
package services.folder.create import java.util.{Date, UUID} import java.sql.Timestamp import org.jooq.DSLContext import scala.concurrent.Future import services.{PublicAccess, SharingLevel} import services.folder.FolderService import services.generated.Tables.{FOLDER, FOLDER_ASSOCIATION, SHARING_POLICY} import services.generated.tables.records.{FolderRecord, FolderAssociationRecord, SharingPolicyRecord} trait CreateOps { self: FolderService => def createFolder(owner: String, title: String, parent: Option[UUID]): Future[FolderRecord] = db.withTransaction { sql => val folder = new FolderRecord(UUID.randomUUID, owner, title, optUUID(parent), null, PublicAccess.PRIVATE.toString, null) sql.insertInto(FOLDER).set(folder).execute() folder } private def insertAssociation(documentId: String, folderId: UUID, sql: DSLContext) = { val association = new FolderAssociationRecord(folderId, documentId) sql.insertInto(FOLDER_ASSOCIATION).set(association).execute() association } def moveDocumentToFolder(documentId: String, folderId: UUID) = db.withTransaction { sql => sql.deleteFrom(FOLDER_ASSOCIATION) .where(FOLDER_ASSOCIATION.DOCUMENT_ID.equal(documentId)) .execute insertAssociation(documentId, folderId, sql) } def moveDocumentToRoot(documentId: String) = db.withTransaction { sql => sql.deleteFrom(FOLDER_ASSOCIATION) .where(FOLDER_ASSOCIATION.DOCUMENT_ID.equal(documentId)) .execute == 1 } def addCollaborator(folderId: UUID, sharedBy: String, sharedWith: String, level: SharingLevel) = db.query { sql => val existing = sql.selectFrom(SHARING_POLICY) .where(SHARING_POLICY.FOLDER_ID.equal(folderId) .and(SHARING_POLICY.SHARED_WITH.equal(sharedWith))).fetchOne val policy = Option(existing) match { case Some(policy) => policy.setSharedBy(sharedBy) policy.setSharedAt(new Timestamp(new Date().getTime)) policy.setAccessLevel(level.toString) policy case None => val policy = new SharingPolicyRecord( null, // auto-inc id folderId, null, // document_id sharedBy, sharedWith, new Timestamp(new Date().getTime), level.toString) policy.changed(SHARING_POLICY.ID, false) sql.attach(policy) policy } policy.store() == 1 } }
Example 25
Source File: A_1_WindowOperation.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.structured_streaming import java.sql.Timestamp import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ import org.apache.spark.sql.streaming.OutputMode object A_1_WindowOperation { def main(args: Array[String]): Unit = { if (args.length < 3) { println(s" Usage: StructuredNetworkWordCountWindowed <hostname> <port>" + " <window duration in seconds> [<slide duration in seconds>]") System.exit(1) } val host = args(0) val port = args(1).toInt val windowSize = args(2).toInt val slideSize = if (args.length == 3) windowSize else args(3).toInt if (slideSize > windowSize) { System.err.println("<滑动间隔> 必须要小于或等于 <窗口间隔>") } val windowDuration = s"$windowSize seconds" val slideDuration = s"$slideSize seconds" val spark = SparkSession.builder() .master("local") .appName(A_1_WindowOperation.getClass.getName) .getOrCreate() val lines = spark.readStream .format("socket") .option("host", host) .option("port", port) .load() import spark.implicits._ val words = lines.as[(String, Timestamp)] .flatMap(line => line._1.split(" ").map(word => (word, line._2))).toDF() val windowCount = words.groupBy( window($"timestamp", windowDuration, slideDuration) , $"word").count().orderBy("window") val query = windowCount.writeStream .outputMode(OutputMode.Complete()) .format("console") .option("truncate", "false") .start() query.awaitTermination() } }
Example 26
Source File: A_1_BasicOperation.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.structured_streaming import java.sql.Timestamp import org.apache.spark.sql.types.{BooleanType, StringType, StructType, TimestampType} import org.apache.spark.sql.{Dataset, SparkSession} object A_1_BasicOperation { //DateTime要使用Timestamp case类必须使用java.sql。在catalyst中作为TimestampType调用的时间戳 case class DeviceData(device: String, deviceType: String, signal: Double, time: Timestamp) def main(args: Array[String]): Unit = { val spark = SparkSession.builder() .appName(A_1_BasicOperation.getClass.getName) .master("local") .getOrCreate() val timeStructType = new StructType().add("device", StringType) .add("deviceType", StringType) .add("signal", BooleanType) .add("time", TimestampType) val dataFrame = spark.read.json("src/main/resources/sparkresource/device.json") import spark.implicits._ val ds: Dataset[DeviceData] = dataFrame.as[DeviceData] //使用无类型方式查询,类sql dataFrame.select("device").where("signal>10").show() //使用有类型方式进行查询 ds.filter(_.signal > 10).map(_.device).show() //使用无类型方式进行groupBy,并进行统计 dataFrame.groupBy("deviceType").count().show() import org.apache.spark.sql.expressions.scalalang.typed //使用有类型方式进行 计算每种类型的设备的平均信号值 ds.groupByKey(_.deviceType).agg(typed.avg(_.signal)).show() //也可以使用创建临时视图的形式,使用sql语句进行查询 dataFrame.createOrReplaceTempView("device") spark.sql("select * from device").show() //可以使用isStreaming来判断是否有流数据 println(dataFrame.isStreaming) } }
Example 27
Source File: SparkDataGenerator.scala From cloudflow with Apache License 2.0 | 5 votes |
package swissknife.spark import java.sql.Timestamp import cloudflow.streamlets.{ IntegerConfigParameter, StreamletShape } import cloudflow.streamlets.avro._ import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic } import org.apache.spark.sql.Dataset import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.functions._ import cloudflow.spark.sql.SQLImplicits._ import swissknife.data.Data case class Rate(timestamp: Timestamp, value: Long) class SparkDataGenerator extends SparkStreamlet { val out = AvroOutlet[Data]("out", d ⇒ d.src) val shape = StreamletShape(out) val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to produce.", Some(1)) override def configParameters = Vector(RecordsPerSecond) override def createLogic() = new SparkStreamletLogic { override def buildStreamingQueries = writeStream(process, out, OutputMode.Append).toQueryExecution private def process: Dataset[Data] = { val recordsPerSecond = RecordsPerSecond.value session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .load() .select(lit("origin").as("src"), $"timestamp", lit("").as("payload"), $"value".as("count")) .as[Data] } } }
Example 28
Source File: CallRecordGeneratorIngress.scala From cloudflow with Apache License 2.0 | 5 votes |
package carly.aggregator import java.sql.Timestamp import scala.util.Random import scala.concurrent.duration._ import org.apache.spark.sql.{ Dataset, SparkSession } import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.LongType import cloudflow.streamlets._ import cloudflow.streamlets.avro._ import cloudflow.spark.sql.SQLImplicits._ import carly.data.CallRecord import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic } import org.apache.log4j.{ Level, Logger } case class Rate(timestamp: Timestamp, value: Long) class CallRecordGeneratorIngress extends SparkStreamlet { val rootLogger = Logger.getRootLogger() rootLogger.setLevel(Level.ERROR) val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to process.", Some(50)) override def configParameters = Vector(RecordsPerSecond) val out = AvroOutlet[CallRecord]("out", _.user) val shape = StreamletShape(out) override def createLogic() = new SparkStreamletLogic { val recordsPerSecond = RecordsPerSecond.value override def buildStreamingQueries = { val outStream = DataGenerator.mkData(super.session, recordsPerSecond) writeStream(outStream, out, OutputMode.Append).toQueryExecution } } } object DataGenerator { def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = { // do we need to expose this through configuration? val MaxTime = 2.hours.toMillis val MaxUsers = 100000 val TS0 = new java.sql.Timestamp(0) val ZeroTimestampProb = 0.05 // error rate // Random Data Generator val usersUdf = udf(() ⇒ "user-" + Random.nextInt(MaxUsers)) val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing") // Time-biased randomized filter - 1/2 hour cycles val sinTime: Long ⇒ Double = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI) val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob val timeFilterUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng)) val zeroTimestampUdf = udf { (ts: java.sql.Timestamp, rng: Double) ⇒ if (rng < ZeroTimestampProb) { TS0 } else { ts } } val rateStream = session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .load() .as[Rate] val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand()) val sampledData = randomDataset .where(timeFilterUdf($"timestamp", $"rng")) .withColumn("user", usersUdf()) .withColumn("other", usersUdf()) .withColumn("direction", directionUdf()) .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType)) .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng")) .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp".as("timestamp")) .as[CallRecord] sampledData } }
Example 29
Source File: SparkRandomGenIngress.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.sparkdoc import scala.util.Random import cloudflow.spark._ import cloudflow.streamlets._ import cloudflow.streamlets.avro._ import cloudflow.spark.sql.SQLImplicits._ import org.apache.spark.sql.Dataset import org.apache.spark.sql.streaming.OutputMode import java.sql.Timestamp class SparkRandomGenDataIngress extends SparkStreamlet { val out = AvroOutlet[Data]("out", d ⇒ d.key) val shape = StreamletShape(out) case class Rate(timestamp: Timestamp, value: Long) override def createLogic() = new SparkStreamletLogic { override def buildStreamingQueries = writeStream(process, out, OutputMode.Append).toQueryExecution private def process: Dataset[Data] = { val recordsPerSecond = 10 val keyGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "keyOne" else "keyTwo" val rateStream = session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .load() .as[Rate] rateStream.map { case Rate(_, value) ⇒ Data(keyGen(), value.toInt) } } } }
Example 30
Source File: InsertMysqlDemo.scala From spark_mysql with Apache License 2.0 | 5 votes |
import java.sql.{Date, Timestamp} import InsertMysqlDemo.CardMember import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} import utils.MySQLUtils /** * Created with IntelliJ IDEA. * Author: [email protected] * Description:DataFrame 中数据存入到MySQL * Date: Created in 2018-11-17 12:39 */ object InsertMysqlDemo { case class CardMember(m_id: String, card_type: String, expire: Timestamp, duration: Int, is_sale: Boolean, date: Date, user: Long, salary: Float) def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local[*]").setAppName(getClass.getSimpleName).set("spark.testing.memory", "3147480000") val sparkContext = new SparkContext(conf) val hiveContext = new SQLContext(sparkContext) import hiveContext.implicits._ val memberSeq = Seq( CardMember("member_2", "月卡", new Timestamp(System.currentTimeMillis()), 31, false, new Date(System.currentTimeMillis()), 123223, 0.32f), CardMember("member_1", "季卡", new Timestamp(System.currentTimeMillis()), 93, false, new Date(System.currentTimeMillis()), 124224, 0.362f) ) val memberDF = memberSeq.toDF() MySQLUtils.saveDFtoDBCreateTableIfNotExist("member_test", memberDF) MySQLUtils.insertOrUpdateDFtoDBUsePool("member_test", memberDF, Array("user", "salary")) MySQLUtils.getDFFromMysql(hiveContext, "", null) sparkContext.stop() } }
Example 31
Source File: SchemaData.scala From pulsar-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.pulsar import java.sql.Timestamp import java.util import java.util.Calendar import scala.beans.BeanProperty import scala.collection.JavaConverters._ object SchemaData { val booleanSeq = Seq(true, false, true, true, false) val bytesSeq = 1.to(5).map(_.toString.getBytes) val cal = Calendar.getInstance() cal.clear() val dateSeq = (1 to 5).map { i => cal.set(2019, 0, i) cal.getTime } cal.clear() val timestampSeq = (1 to 5).map { i => cal.set(2019, 0, i, 20, 35, 40) new Timestamp(cal.getTimeInMillis) } val stringSeq = 1.to(5).map(_.toString) val int8Seq = 1.to(5).map(_.toByte) val doubleSeq = 1.to(5).map(_.toDouble) val floatSeq = 1.to(5).map(_.toFloat) val int32Seq = 1.to(5) val int64Seq = 1.to(5).map(_.toLong) val int16Seq = 1.to(5).map(_.toShort) case class Foo(@BeanProperty i: Int, @BeanProperty f: Float, @BeanProperty bar: Bar) case class Bar(@BeanProperty b: Boolean, @BeanProperty s: String) case class F1(@BeanProperty baz: Baz) case class Baz( @BeanProperty f: Float, @BeanProperty d: Double, @BeanProperty mp: util.Map[String, Bar], @BeanProperty arr: Array[Bar]) val fooSeq: Seq[Foo] = Foo(1, 1.0.toFloat, Bar(true, "a")) :: Foo(2, 2.0.toFloat, Bar(false, "b")) :: Foo(3, 0, null) :: Nil val f1Seq: Seq[F1] = F1( Baz( Float.NaN, Double.NaN, Map("1" -> Bar(true, "1"), "2" -> Bar(false, "2")).asJava, Array(Bar(true, "1"), Bar(true, "2")))) :: F1( Baz( Float.NegativeInfinity, Double.NegativeInfinity, Map("" -> Bar(true, "1")).asJava, null)) :: F1(Baz(Float.PositiveInfinity, Double.PositiveInfinity, null, null)) :: F1(Baz(1.0.toFloat, 2.0, null, null)) :: Nil val f1Results = f1Seq.map(f1 => (f1.baz.f, f1.baz.d, if (f1.baz.mp == null) null else f1.baz.mp.asScala, f1.baz.arr)) }
Example 32
Source File: Executor.scala From neo4j-spark-connector with Apache License 2.0 | 5 votes |
package org.neo4j.spark import java.time.{LocalDate, LocalDateTime, OffsetTime, ZoneOffset, ZonedDateTime} import java.util import java.sql.Timestamp import org.apache.spark.SparkContext import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types.StructType import org.neo4j.spark.dataframe.CypherTypes import org.neo4j.spark.utils.{Neo4jSessionAwareIterator, Neo4jUtils} import scala.collection.JavaConverters._ object Executor { def convert(value: AnyRef): Any = value match { case it: util.Collection[_] => it.toArray() case m: java.util.Map[_,_] => m.asScala case _ => Neo4jUtils.convert(value) } def toJava(parameters: Map[String, Any]): java.util.Map[String, Object] = { parameters.mapValues(toJava).asJava } private def toJava(x: Any): AnyRef = x match { case y: Seq[_] => y.asJava case _ => x.asInstanceOf[AnyRef] } val EMPTY = Array.empty[Any] val EMPTY_RESULT = new CypherResult(new StructType(), Iterator.empty) class CypherResult(val schema: StructType, val rows: Iterator[Array[Any]]) { def sparkRows: Iterator[Row] = rows.map(row => new GenericRowWithSchema(row, schema)) def fields = schema.fieldNames } def execute(sc: SparkContext, query: String, parameters: Map[String, AnyRef]): CypherResult = { execute(Neo4jConfig(sc.getConf), query, parameters) } private def rows(result: Iterator[_]) = { var i = 0 while (result.hasNext) i = i + 1 i } def execute(config: Neo4jConfig, query: String, parameters: Map[String, Any], write: Boolean = false): CypherResult = { val result = new Neo4jSessionAwareIterator(config, query, toJava(parameters), write) if (!result.hasNext) { return EMPTY_RESULT } val peek = result.peek() val keyCount = peek.size() if (keyCount == 0) { return new CypherResult(new StructType(), Array.fill[Array[Any]](rows(result))(EMPTY).toIterator) } val keys = peek.keys().asScala val fields = keys.map(k => (k, peek.get(k).`type`())).map(keyType => CypherTypes.field(keyType)) val schema = StructType(fields) val it = result.map(record => { val row = new Array[Any](keyCount) var i = 0 while (i < keyCount) { val value = convert(record.get(i).asObject()) row.update(i, value) i = i + 1 } row }) new CypherResult(schema, it) } }
Example 33
Source File: Neo4jUtils.scala From neo4j-spark-connector with Apache License 2.0 | 5 votes |
package org.neo4j.spark.utils import java.sql.Timestamp import java.time._ import java.util.concurrent.Callable import java.util.function import io.github.resilience4j.retry.{Retry, RetryConfig} import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.neo4j.driver.exceptions.{ServiceUnavailableException, SessionExpiredException, TransientException} import org.neo4j.driver.{Driver, Result, Session, Transaction} import org.neo4j.spark.Neo4jConfig import org.slf4j.LoggerFactory class Neo4jUtils object Neo4jUtils { private val logger = LoggerFactory.getLogger(classOf[Neo4jUtils]) def close(driver: Driver, session: Session): Unit = { try { if (session != null && session.isOpen) { closeSafety(session) } } finally { if (driver != null) { closeSafety(driver) } } } private def closeSafety(closable: AutoCloseable): Unit = { try { closable.close() } catch { case e: Throwable => { logger.error("Exception while trying to close an AutoCloseable, because of the following exception", e) } } } private val retryConfig = RetryConfig.custom.retryExceptions( classOf[SessionExpiredException], classOf[ServiceUnavailableException] // retry on the same exceptions the driver does [1] ) .retryOnException(new function.Predicate[Throwable] { override def test(exception: Throwable): Boolean = exception match { case t: TransientException => { val code = t.code() !("Neo.TransientError.Transaction.Terminated" == code) && !("Neo.TransientError.Transaction.LockClientStopped" == code) } case _ => false } }) .maxAttempts(3) .build def executeTxWithRetries[T](neo4jConfig: Neo4jConfig, query: String, params: java.util.Map[String, AnyRef], write: Boolean): (Driver, Session, Transaction, Result) = { val driver: Driver = neo4jConfig.driver() val session: Session = driver.session(neo4jConfig.sessionConfig(write)) Retry.decorateCallable( Retry.of("neo4jTransactionRetryPool", retryConfig), new Callable[(Driver, Session, Transaction, Result)] { override def call(): (Driver, Session, Transaction, Result) = { val transaction = session.beginTransaction() val result = transaction.run(query, params) (driver, session, transaction, result) } } ) .call() } def convert(value: AnyRef): AnyRef = value match { case m: ZonedDateTime => new Timestamp(DateTimeUtils.fromUTCTime(m.toInstant.toEpochMilli, m.getZone.getId)) case m: LocalDateTime => new Timestamp(DateTimeUtils.fromUTCTime(m.toInstant(ZoneOffset.UTC).toEpochMilli,"UTC")) case m: LocalDate => java.sql.Date.valueOf(m) case m: OffsetTime => new Timestamp(m.atDate(LocalDate.ofEpochDay(0)).toInstant.toEpochMilli) case _ => value } }
Example 34
Source File: AnnouncementService.scala From recogito2 with Apache License 2.0 | 5 votes |
package services.announcement import java.sql.Timestamp import java.util.Date import javax.inject.{Inject, Singleton} import scala.concurrent.{ExecutionContext, Future} import services.BaseService import services.generated.Tables.SERVICE_ANNOUNCEMENT import services.generated.tables.records.ServiceAnnouncementRecord import storage.db.DB import services.user.UserService import java.util.UUID @Singleton class AnnouncementService @Inject() (val db: DB, users: UserService, implicit val ctx: ExecutionContext) extends BaseService { def findLatestUnread(username: String): Future[Option[ServiceAnnouncementRecord]] = db.query { sql => Option(sql.selectFrom(SERVICE_ANNOUNCEMENT) .where(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username) .and(SERVICE_ANNOUNCEMENT.RESPONSE.isNull)) .orderBy(SERVICE_ANNOUNCEMENT.CREATED_AT.desc()) .fetchOne()) } def confirm(uuid: UUID, username: String, response: String): Future[Boolean] = db.query { sql => val result = sql.update(SERVICE_ANNOUNCEMENT) .set(SERVICE_ANNOUNCEMENT.VIEWED_AT, new Timestamp(new Date().getTime)) .set(SERVICE_ANNOUNCEMENT.RESPONSE, response) .where(SERVICE_ANNOUNCEMENT.ID.equal(uuid).and(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username))) .execute() result == 1 } def clearAll(): Future[Boolean] = db.query { sql => sql.deleteFrom(SERVICE_ANNOUNCEMENT).execute() true } recover { case t: Throwable => t.printStackTrace() false } def deleteForUser(username: String) = db.query { sql => sql.deleteFrom(SERVICE_ANNOUNCEMENT) .where(SERVICE_ANNOUNCEMENT.FOR_USER.equal(username)).execute() } def insertBroadcastAnnouncement(content: String): Future[Boolean] = { val BATCH_SIZE = 200 def insertOneBatch(users: Seq[String]): Future[_] = db.query { sql => sql.batch(users.map { user => sql.insertInto(SERVICE_ANNOUNCEMENT, SERVICE_ANNOUNCEMENT.ID, SERVICE_ANNOUNCEMENT.FOR_USER, SERVICE_ANNOUNCEMENT.CONTENT, SERVICE_ANNOUNCEMENT.CREATED_AT, SERVICE_ANNOUNCEMENT.VIEWED_AT, SERVICE_ANNOUNCEMENT.RESPONSE ).values( UUID.randomUUID(), user, content, new Timestamp(new Date().getTime), null, null) }:_*).execute() } def insertBatchesRecursive(offset: Int, numUsers: Int): Future[Boolean] = users.listUsers(offset, BATCH_SIZE, None, None).flatMap { users => insertOneBatch(users.items.map(_._1.getUsername)) } flatMap { _ => if (offset + BATCH_SIZE >= numUsers) Future.successful(true) else insertBatchesRecursive(offset + BATCH_SIZE, numUsers) } val f = for { numUsers <- users.countUsers() success <- insertBatchesRecursive(0, numUsers) } yield (success) f.recover { case t: Throwable => play.api.Logger.info(t.getMessage) t.printStackTrace() false } } }
Example 35
Source File: MimirUDF.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import java.sql.{ Timestamp, Date } import org.apache.spark.sql.types.{ DataType, StructType, StructField } import mimir.algebra._ import mimir.exec.spark._ import mimir.util.SparkUtils class MimirUDF { def getPrimitive(t:Type, value:Any) = value match { case null => NullPrimitive() case _ => t match { //case TInt() => IntPrimitive(value.asInstanceOf[Long]) case TInt() => IntPrimitive(value.asInstanceOf[Long]) case TFloat() => FloatPrimitive(value.asInstanceOf[Double]) case TDate() => SparkUtils.convertDate(value.asInstanceOf[Date]) case TTimestamp() => SparkUtils.convertTimestamp(value.asInstanceOf[Timestamp]) case TString() => StringPrimitive(value.asInstanceOf[String]) case TBool() => BoolPrimitive(value.asInstanceOf[Boolean]) case TRowId() => RowIdPrimitive(value.asInstanceOf[String]) case TType() => TypePrimitive(Type.fromString(value.asInstanceOf[String])) //case TAny() => NullPrimitive() //case TUser(name) => name.toLowerCase //case TInterval() => Primitive(value.asInstanceOf[Long]) case _ => StringPrimitive(value.asInstanceOf[String]) } } def getNative(primitive : PrimitiveValue) : AnyRef = primitive match { case NullPrimitive() => null case RowIdPrimitive(s) => s case StringPrimitive(s) => s case IntPrimitive(i) => new java.lang.Long(i) case FloatPrimitive(f) => new java.lang.Double(f) case BoolPrimitive(b) => new java.lang.Boolean(b) case ts@TimestampPrimitive(y,m,d,h,mm,s,ms) => SparkUtils.convertTimestamp(ts) case dt@DatePrimitive(y,m,d) => SparkUtils.convertDate(dt) case x => x.asString } def getStructType(datatypes:Seq[DataType]): StructType = { StructType(datatypes.map(dti => StructField("", RAToSpark.getInternalSparkType(dti), true))) } }
Example 36
Source File: TaskRecordAggregate.scala From recogito2 with Apache License 2.0 | 5 votes |
package services.task import java.sql.Timestamp import java.util.UUID import services.generated.tables.records.TaskRecord import play.api.libs.json._ import play.api.libs.json.Reads._ import play.api.libs.functional.syntax._ case class TaskRecordAggregate(taskRecords: Seq[TaskRecord]) { private def getDistinctField[T](filter: TaskRecord => T, errorMessage: String): T = { val fields = taskRecords.map(filter).distinct if (fields.size != 1) throw new RuntimeException("Invalid task record aggregation: " + errorMessage + " (" + fields.mkString(", ") + ")") fields.head } lazy val taskType = TaskType(getDistinctField[String](_.getTaskType, "different task types")) lazy val className = getDistinctField[String](_.getClassName, "different class names") lazy val documentId = getDistinctField[String](_.getDocumentId, "different document IDs") lazy val spawnedBy = getDistinctField[String](_.getSpawnedBy, "different values for spawned_by") lazy val spawnedAt = taskRecords.sortBy(_.getSpawnedAt.getTime).head.getSpawnedAt lazy val stoppedAt = { val stoppedAtByTask = taskRecords.map(task => Option(task.getStoppedAt)) if (stoppedAtByTask.exists(_.isEmpty)) // At least one sub-task is unfinished - report aggregate task as unfinished None else /// All stopped - use latest stop time Some(stoppedAtByTask.flatten.sortBy(_.getTime).reverse.head) } lazy val stoppedWith = taskRecords.flatMap(task => Option(task.getStoppedWith)) lazy val status = taskRecords.map(task => TaskStatus.withName(task.getStatus)) match { case statusByTask if statusByTask.exists(_ == TaskStatus.FAILED) => // Any task that failed? TaskStatus.FAILED case statusByTask if statusByTask.forall(_ == TaskStatus.COMPLETED) => // All complete? TaskStatus.COMPLETED case statusByTask if statusByTask.forall(_ == TaskStatus.PENDING) => // All pending? TaskStatus.PENDING case _ => TaskStatus.RUNNING } lazy val progress = taskRecords.map(_.getProgress.toInt).sum / taskRecords.size } object TaskRecordAggregate { implicit val taskRecordWrites: Writes[TaskRecord] = ( (JsPath \ "task_type").write[String] and (JsPath \ "filepart_id").write[UUID] and (JsPath \ "status").write[String] and (JsPath \ "progress").write[Int] )(r => ( r.getTaskType, r.getFilepartId, r.getStatus, r.getProgress )) implicit val aggregateTaskRecordWrites: Writes[TaskRecordAggregate] = ( (JsPath \ "document_id").write[String] and (JsPath \ "status").write[String] and (JsPath \ "progress").write[Int] and (JsPath \ "subtasks").write[Seq[TaskRecord]] )(r => ( r.documentId, r.status.toString, r.progress, r.taskRecords )) }
Example 37
Source File: NetworkOps.scala From recogito2 with Apache License 2.0 | 5 votes |
package services.document.network import java.sql.Timestamp import scala.concurrent.{ExecutionContext, Future} import services.document.DocumentService trait NetworkOps { self: DocumentService => def getNetwork(docId: String)(implicit ctx: ExecutionContext): Future[Option[AncestryTree]] = { val f = for { maybeRoot <- getNetworkRoot(docId) descendants <- maybeRoot.map(rootNode => getDescendants(rootNode.id)) .getOrElse(Future.successful(Seq.empty[TreeRecord])) } yield (maybeRoot, descendants) f.map { case (maybeRoot, descendants) => maybeRoot.map(rootNode => AncestryTree(rootNode, descendants)) } } }
Example 38
Source File: AncestryTree.scala From recogito2 with Apache License 2.0 | 5 votes |
package services.document.network import java.sql.Timestamp case class AncestryTree(private val root: TreeRecord, private[network] val descendants: Seq[TreeRecord]) { val rootNode = AncestryTreeNode( root.id, root.owner, root.clonedFrom, root.clonedAt, // Should ALWAYS be None this) } case class AncestryTreeNode( id: String, owner: String, clonedFrom: Option[String], clonedAt: Option[Timestamp], private val tree: AncestryTree ) { lazy val children: Seq[AncestryTreeNode] = tree.descendants .filter(_.clonedFrom == Some(id)) .map(r => AncestryTreeNode(r.id, r.owner, r.clonedFrom, r.clonedAt, tree)) }
Example 39
Source File: PublicAccountInfo.scala From recogito2 with Apache License 2.0 | 5 votes |
package controllers.my.account import java.sql.Timestamp import org.joda.time.DateTime import play.api.libs.json._ import play.api.libs.functional.syntax._ import services.HasDate import services.contribution.stats.ContributorActivity import services.document.read.AccessibleDocumentsCount import services.user.User case class PublicAccountInfo( user: User, accessibleDocuments: AccessibleDocumentsCount, stats: ContributorActivity) object PublicAccountInfo extends HasDate { implicit val accessibleDocumentsWrites: Writes[AccessibleDocumentsCount] = ( (JsPath \ "public").write[Long] and (JsPath \ "shared_with_me").writeNullable[Long] )(d => (d.public, d.shared)) implicit val visitedAccountInfoWrites: Writes[PublicAccountInfo] = ( (JsPath \ "username").write[String] and (JsPath \ "real_name").writeNullable[String] and (JsPath \ "member_since").write[DateTime] and (JsPath \ "bio").writeNullable[String] and (JsPath \ "website").writeNullable[String] and (JsPath \ "documents").write[AccessibleDocumentsCount] and (JsPath \ "stats").write[ContributorActivity] )(v => ( v.user.username, v.user.realName, new DateTime(v.user.memberSince.getTime), v.user.bio, v.user.website, v.accessibleDocuments, v.stats )) }
Example 40
Source File: PrivateAccountInfo.scala From recogito2 with Apache License 2.0 | 5 votes |
package controllers.my.account import java.sql.Timestamp import org.joda.time.DateTime import play.api.libs.json._ import play.api.libs.functional.syntax._ import services.{HasDate, HasNullableSeq} import services.contribution.stats.ContributorActivity import services.user.User case class PrivateAccountInfo( user: User, myDocumentsCount: Long, sharedWithMeCount: Long, stats: ContributorActivity, usedMb: Double) object PrivateAccountInfo extends HasDate with HasNullableSeq { implicit val personalAccountInfoWrites: Writes[PrivateAccountInfo] = ( (JsPath \ "username").write[String] and (JsPath \ "real_name").writeNullable[String] and (JsPath \ "member_since").write[DateTime] and (JsPath \ "bio").writeNullable[String] and (JsPath \ "website").writeNullable[String] and (JsPath \ "feature_toggles").writeNullable[Seq[String]] and (JsPath \ "documents").write[JsObject] and (JsPath \ "storage").write[JsObject] and (JsPath \ "stats").write[ContributorActivity] )(p => ( p.user.username, p.user.realName, new DateTime(p.user.memberSince.getTime), p.user.bio, p.user.website, toOptSeq(p.user.featureToggles), Json.obj( "my_documents" -> p.myDocumentsCount, "shared_with_me" -> p.sharedWithMeCount ), Json.obj( "quota_mb" -> p.user.quotaMb.toInt, "used_mb" -> p.usedMb ), p.stats )) }
Example 41
Source File: TwitterBatchTimely.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.timeseries import java.sql.Timestamp import com.cloudera.sparkts.{DateTimeIndex, TimeSeriesRDD} import io.gzet.timeseries.timely.MetricImplicits._ import io.gzet.timeseries.timely.TimelyImplicits._ import io.gzet.timeseries.twitter.Twitter._ import io.gzet.utils.spark.accumulo.AccumuloConfig import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} import org.joda.time.{DateTime, Minutes, Period} object TwitterBatchTimely extends SimpleConfig { case class Observation( hashtag: String, time: Timestamp, count: Double ) def main(args: Array[String]) = { val sparkConf = new SparkConf().setAppName("Twitter Extractor") val sc = new SparkContext(sparkConf) val sqlContext = new SQLContext(sc) import sqlContext.implicits._ val twitterJsonRDD = sc.textFile("file:///Users/antoine/CHAPTER/twitter-trump", 500) val tweetRDD = twitterJsonRDD mapPartitions analyzeJson cache() // Publish metrics to Timely tweetRDD.count() tweetRDD.countByState.publish() tweetRDD.sentimentByState.publish() // Read metrics from Timely val conf = AccumuloConfig("GZET", "alice", "alice", "localhost:2181") val metricsRDD = sc.timely(conf, Some("io.gzet.count")) val minDate = metricsRDD.map(_.time).min() val maxDate = metricsRDD.map(_.time).max() class TwitterFrequency(val minutes: Int) extends com.cloudera.sparkts.PeriodFrequency(Period.minutes(minutes)) { def difference(dt1: DateTime, dt2: DateTime): Int = Minutes.minutesBetween(dt1, dt2).getMinutes / minutes override def toString: String = s"minutes $minutes" } val dtIndex = DateTimeIndex.uniform(minDate, maxDate, new TwitterFrequency(1)) val metricsDF = metricsRDD.filter({ metric => metric.tags.keys.toSet.contains("tag") }).flatMap({ metric => metric.tags map { case (k, v) => ((v, roundFloorMinute(metric.time, 1)), metric.value) } }).reduceByKey(_+_).map({ case ((metric, time), sentiment) => Observation(metric, new Timestamp(time), sentiment) }).toDF() val tsRDD = TimeSeriesRDD.timeSeriesRDDFromObservations(dtIndex, metricsDF, "time", "hashtag", "count").filter(_._2.toArray.exists(!_.isNaN)) } def roundFloorMinute(time: Long, windowMinutes: Int) = { val dt = new DateTime(time) dt.withMinuteOfHour((dt.getMinuteOfHour / windowMinutes) * windowMinutes).minuteOfDay().roundFloorCopy().toDate.getTime } }
Example 42
Source File: FieldDateTime.scala From spark-gdb with Apache License 2.0 | 5 votes |
package com.esri.gdb import java.nio.ByteBuffer import java.sql.Timestamp import org.apache.spark.sql.types.{Metadata, TimestampType} class FieldDateTime(name: String, nullValueAllowed: Boolean, metadata:Metadata) extends Field(name, TimestampType, nullValueAllowed, metadata) { override def readValue(byteBuffer: ByteBuffer, oid: Int) = { val numDays = byteBuffer.getDouble // convert days since 12/30/1899 to 1/1/1970 val unixDays = numDays - 25569 val millis = (unixDays * 1000 * 60 * 60 * 24).ceil.toLong new Timestamp(millis) } }
Example 43
Source File: package.scala From modelmatrix with Apache License 2.0 | 5 votes |
package com.collective.modelmatrix import java.sql.Timestamp import java.time.Instant import org.apache.spark.sql.types._ import scodec.bits.ByteVector import slick.driver.PostgresDriver.api._ package object catalog { implicit val instantColumnType = MappedColumnType.base[Instant, java.sql.Timestamp]( instant => Timestamp.from(instant), _.toInstant ) implicit val dataTypeColumnType = MappedColumnType.base[DataType, String]({ case ShortType => "short" case IntegerType => "integer" case LongType => "long" case DoubleType => "double" case StringType => "string" }, { case "short" => ShortType case "integer" => IntegerType case "long" => LongType case "double" => DoubleType case "string" => StringType }) implicit val byteVectorColumnType = MappedColumnType.base[ByteVector, Array[Byte]]( _.toArray, ByteVector.apply ) }
Example 44
Source File: Utils.scala From lemon-schedule with GNU General Public License v2.0 | 5 votes |
package com.gabry.job.utils import java.sql.Timestamp import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.TimeUnit import scala.collection.mutable.ArrayBuffer def getLoadedClass(classLoader: ClassLoader):Array[Class[_]] = { val loadedClass = ArrayBuffer.empty[Class[_]] val loadedClassEnum = classesField.get(classLoader).asInstanceOf[java.util.Vector[Class[_]]].elements() while(loadedClassEnum.hasMoreElements){ val nextElement = loadedClassEnum.nextElement() loadedClass.append(nextElement) } loadedClass.toArray } }
Example 45
Source File: Utils.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.datasources import java.sql.{Date, Timestamp} import org.apache.hadoop.hbase.spark.AvroSerdes import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private object Utils { def hbaseFieldToScalaType( f: Field, src: Array[Byte], offset: Int, length: Int): Any = { if (f.exeSchema.isDefined) { // If we have avro schema defined, use it to get record, and then convert them to catalyst data type val m = AvroSerdes.deserialize(src, f.exeSchema.get) val n = f.avroToCatalyst.map(_(m)) n.get } else { // Fall back to atomic type f.dt match { case BooleanType => src(offset) != 0 case ByteType => src(offset) case ShortType => Bytes.toShort(src, offset) case IntegerType => Bytes.toInt(src, offset) case LongType => Bytes.toLong(src, offset) case FloatType => Bytes.toFloat(src, offset) case DoubleType => Bytes.toDouble(src, offset) case DateType => new Date(Bytes.toLong(src, offset)) case TimestampType => new Timestamp(Bytes.toLong(src, offset)) case StringType => UTF8String.fromBytes(src, offset, length) case BinaryType => val newArray = new Array[Byte](length) System.arraycopy(src, offset, newArray, 0, length) newArray // TODO: SparkSqlSerializer.deserialize[Any](src) case _ => throw new Exception(s"unsupported data type ${f.dt}") } } } // convert input to data type def toBytes(input: Any, field: Field): Array[Byte] = { if (field.schema.isDefined) { // Here we assume the top level type is structType val record = field.catalystToAvro(input) AvroSerdes.serialize(record, field.schema.get) } else { field.dt match { case BooleanType => Bytes.toBytes(input.asInstanceOf[Boolean]) case ByteType => Array(input.asInstanceOf[Number].byteValue) case ShortType => Bytes.toBytes(input.asInstanceOf[Number].shortValue) case IntegerType => Bytes.toBytes(input.asInstanceOf[Number].intValue) case LongType => Bytes.toBytes(input.asInstanceOf[Number].longValue) case FloatType => Bytes.toBytes(input.asInstanceOf[Number].floatValue) case DoubleType => Bytes.toBytes(input.asInstanceOf[Number].doubleValue) case DateType | TimestampType => Bytes.toBytes(input.asInstanceOf[java.util.Date].getTime) case StringType => Bytes.toBytes(input.toString) case BinaryType => input.asInstanceOf[Array[Byte]] case _ => throw new Exception(s"unsupported data type ${field.dt}") } } } }
Example 46
Source File: RDBDataTypeConverter.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.connector.jdbc.datatype import java.sql.{Date, ResultSet, Time, Timestamp} import java.util.Optional import oharastream.ohara.client.configurator.InspectApi.RdbColumn import oharastream.ohara.connector.jdbc.util.DateTimeUtils trait RDBDataTypeConverter { def converterValue(resultSet: ResultSet, column: RdbColumn): Any = { val columnName = column.name val typeName = column.dataType.toUpperCase val dataType: DataTypeEnum = converterDataType(column) dataType match { case DataTypeEnum.INTEGER => java.lang.Integer.valueOf(resultSet.getInt(columnName)) case DataTypeEnum.LONG => java.lang.Long.valueOf(resultSet.getLong(columnName)) case DataTypeEnum.BOOLEAN => java.lang.Boolean.valueOf(resultSet.getBoolean(columnName)) case DataTypeEnum.FLOAT => java.lang.Float.valueOf(resultSet.getFloat(columnName)) case DataTypeEnum.DOUBLE => java.lang.Double.valueOf(resultSet.getDouble(columnName)) case DataTypeEnum.BIGDECIMAL => Optional.ofNullable(resultSet.getBigDecimal(columnName)).orElseGet(() => new java.math.BigDecimal(0L)) case DataTypeEnum.STRING => Optional.ofNullable(resultSet.getString(columnName)).orElseGet(() => "null") case DataTypeEnum.DATE => Optional.ofNullable(resultSet.getDate(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Date(0)) case DataTypeEnum.TIME => Optional.ofNullable(resultSet.getTime(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Time(0)) case DataTypeEnum.TIMESTAMP => Optional .ofNullable(resultSet.getTimestamp(columnName, DateTimeUtils.CALENDAR)) .orElseGet(() => new Timestamp(0)) case DataTypeEnum.BYTES => Optional.ofNullable(resultSet.getBytes(columnName)).orElseGet(() => Array()) case _ => throw new UnsupportedOperationException( s"JDBC Source Connector not support ${typeName} data type in ${columnName} column for ${dataBaseProductName} implement." ) } } protected[datatype] def dataBaseProductName: String protected[datatype] def converterDataType(column: RdbColumn): DataTypeEnum }
Example 47
Source File: SnowflakeWriter.scala From spark-snowflake with Apache License 2.0 | 5 votes |
package net.snowflake.spark.snowflake import java.sql.{Date, Timestamp} import net.snowflake.client.jdbc.internal.apache.commons.codec.binary.Base64 import net.snowflake.spark.snowflake.Parameters.MergedParameters import net.snowflake.spark.snowflake.io.SupportedFormat import net.snowflake.spark.snowflake.io.SupportedFormat.SupportedFormat import org.apache.spark.rdd.RDD import org.apache.spark.sql.types._ import org.apache.spark.sql._ private def removeUselessColumns(dataFrame: DataFrame, params: MergedParameters): DataFrame = params.columnMap match { case Some(map) => // Enclose column name with backtick(`) if dot(.) exists in column name val names = map.keys.toSeq.map(name => if (name.contains(".")) { s"`$name`" } else { name }) try { dataFrame.select(names.head, names.tail: _*) } catch { case e: AnalysisException => throw new IllegalArgumentException( "Incorrect column name when column mapping: " + e.toString ) } case _ => dataFrame } // Prepare a set of conversion functions, based on the schema def genConversionFunctions(schema: StructType): Array[Any => Any] = schema.fields.map { field => field.dataType match { case DateType => (v: Any) => v match { case null => "" case t: Timestamp => Conversions.formatTimestamp(t) case d: Date => Conversions.formatDate(d) } case TimestampType => (v: Any) => { if (v == null) "" else Conversions.formatTimestamp(v.asInstanceOf[Timestamp]) } case StringType => (v: Any) => { if (v == null) "" else Conversions.formatString(v.asInstanceOf[String]) } case BinaryType => (v: Any) => v match { case null => "" case bytes: Array[Byte] => Base64.encodeBase64String(bytes) } case _ => (v: Any) => Conversions.formatAny(v) } } } object DefaultSnowflakeWriter extends SnowflakeWriter(DefaultJDBCWrapper)
Example 48
Source File: CustomMatchers.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.extractor.services import java.sql.Timestamp import org.scalatest._ import matchers._ import scala.concurrent.duration._ trait CustomMatchers { class SqlTimestampMoreOrLessEquals(expected: Timestamp, tolerance: Duration) extends Matcher[Timestamp] with Matchers { def apply(left: Timestamp) = { MatchResult( left.getTime === (expected.getTime +- tolerance.toMillis), s"""Timestamp $left was not within ${tolerance} to "$expected"""", s"""Timestamp $left was within ${tolerance} to "$expected"""", ) } } def beWithin5Minutes(expected: Timestamp) = new SqlTimestampMoreOrLessEquals(expected, 5.minutes) def beWithin(duration: Duration)(expected: Timestamp) = new SqlTimestampMoreOrLessEquals(expected, duration) def beWithinSeconds(seconds: Long)(expected: Timestamp) = new SqlTimestampMoreOrLessEquals(expected, seconds.seconds) def beWithinMillis(millis: Long)(expected: Timestamp) = new SqlTimestampMoreOrLessEquals(expected, millis.millis) } object CustomMatchers extends CustomMatchers
Example 49
Source File: Transaction.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import java.sql.{Date, Timestamp} import java.time.ZoneOffset case class Transaction(timestamp: Timestamp, date: Date, tid: Int, price: Double, sell: Boolean, amount: Double) object Transaction { def apply(timestamp: Timestamp, tid: Int, price: Double, sell: Boolean, amount: Double) = new Transaction( timestamp = timestamp, date = Date.valueOf( timestamp.toInstant.atOffset(ZoneOffset.UTC).toLocalDate), tid = tid, price = price, sell = sell, amount = amount) }
Example 50
Source File: StreamingProducerSpec.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import java.sql.Timestamp import coinyser.StreamingProducerSpec._ import org.scalactic.TypeCheckedTripleEquals import org.scalatest.{Matchers, WordSpec} class StreamingProducerSpec extends WordSpec with Matchers with TypeCheckedTripleEquals { "StreamingProducer.deserializeWebsocketTransaction" should { "deserialize a valid String to a WebsocketTransaction" in { val str = """{"amount": 0.045318270000000001, "buy_order_id": 1969499130, |"sell_order_id": 1969495276, "amount_str": "0.04531827", |"price_str": "6339.73", "timestamp": "1533797395", |"price": 6339.7299999999996, "type": 0, "id": 71826763}""".stripMargin StreamingProducer.deserializeWebsocketTransaction(str) should ===(SampleWebsocketTransaction) } } "StreamingProducer.convertWsTransaction" should { "convert a WebSocketTransaction to a Transaction" in { StreamingProducer.convertWsTransaction(SampleWebsocketTransaction) should ===(SampleTransaction) } } "StreamingProducer.serializeTransaction" should { "serialize a Transaction to a String" in { StreamingProducer.serializeTransaction(SampleTransaction) should ===(SampleJsonTransaction) } } "StreamingProducer.subscribe" should { "register a callback that receives live trades" in { val pusher = new FakePusher(Vector("a", "b", "c")) var receivedTrades = Vector.empty[String] val io = StreamingProducer.subscribe(pusher) { trade => receivedTrades = receivedTrades :+ trade } io.unsafeRunSync() receivedTrades should ===(Vector("a", "b", "c")) } } } object StreamingProducerSpec { val SampleWebsocketTransaction = WebsocketTransaction( amount = 0.04531827, buy_order_id = 1969499130, sell_order_id = 1969495276, amount_str = "0.04531827", price_str = "6339.73", timestamp = "1533797395", price = 6339.73, `type` = 0, id = 71826763) val SampleTransaction = Transaction( timestamp = new Timestamp(1533797395000L), tid = 71826763, price = 6339.73, sell = false, amount = 0.04531827) val SampleJsonTransaction = """{"timestamp":"2018-08-09 06:49:55", |"date":"2018-08-09","tid":71826763,"price":6339.73,"sell":false, |"amount":0.04531827}""".stripMargin }
Example 51
Source File: BatchProducerSpec.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import java.io.{BufferedOutputStream, StringReader} import java.nio.CharBuffer import java.sql.Timestamp import cats.effect.IO import org.apache.spark.sql._ import org.apache.spark.sql.test.SharedSparkSession import org.scalatest.{Matchers, WordSpec} class BatchProducerSpec extends WordSpec with Matchers with SharedSparkSession { val httpTransaction1 = HttpTransaction("1532365695", "70683282", "7740.00", "0", "0.10041719") val httpTransaction2 = HttpTransaction("1532365693", "70683281", "7739.99", "0", "0.00148564") "BatchProducer.jsonToHttpTransaction" should { "create a Dataset[HttpTransaction] from a Json string" in { val json = """[{"date": "1532365695", "tid": "70683282", "price": "7740.00", "type": "0", "amount": "0.10041719"}, |{"date": "1532365693", "tid": "70683281", "price": "7739.99", "type": "0", "amount": "0.00148564"}]""".stripMargin val ds: Dataset[HttpTransaction] = BatchProducer.jsonToHttpTransactions(json) ds.collect() should contain theSameElementsAs Seq(httpTransaction1, httpTransaction2) } } "BatchProducer.httpToDomainTransactions" should { "transform a Dataset[HttpTransaction] into a Dataset[Transaction]" in { import testImplicits._ val source: Dataset[HttpTransaction] = Seq(httpTransaction1, httpTransaction2).toDS() val target: Dataset[Transaction] = BatchProducer.httpToDomainTransactions(source) val transaction1 = Transaction(timestamp = new Timestamp(1532365695000L), tid = 70683282, price = 7740.00, sell = false, amount = 0.10041719) val transaction2 = Transaction(timestamp = new Timestamp(1532365693000L), tid = 70683281, price = 7739.99, sell = false, amount = 0.00148564) target.collect() should contain theSameElementsAs Seq(transaction1, transaction2) } } }
Example 52
Source File: BatchProducerIT.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import java.sql.Timestamp import java.time.Instant import java.util.concurrent.TimeUnit import cats.effect.{IO, Timer} import org.apache.spark.sql.test.SharedSparkSession import org.scalatest.{Matchers, WordSpec} import scala.concurrent.duration.FiniteDuration class BatchProducerIT extends WordSpec with Matchers with SharedSparkSession { import testImplicits._ "BatchProducer.save" should { "save a Dataset[Transaction] to parquet" in withTempDir { tmpDir => val transaction1 = Transaction(timestamp = new Timestamp(1532365695000L), tid = 70683282, price = 7740.00, sell = false, amount = 0.10041719) val transaction2 = Transaction(timestamp = new Timestamp(1532365693000L), tid = 70683281, price = 7739.99, sell = false, amount = 0.00148564) val sourceDS = Seq(transaction1, transaction2).toDS() val uri = tmpDir.toURI BatchProducer.save(sourceDS, uri).unsafeRunSync() tmpDir.list() should contain("date=2018-07-23") val readDS = spark.read.parquet(uri.toString).as[Transaction] spark.read.parquet(uri + "/date=2018-07-23").show() sourceDS.collect() should contain theSameElementsAs readDS.collect() } } "BatchProducer.processOneBatch" should { "filter and save a batch of transaction, wait 59 mn, fetch the next batch" in withTempDir { tmpDir => implicit object FakeTimer extends Timer[IO] { private var clockRealTimeInMillis: Long = Instant.parse("2018-08-02T01:00:00Z").toEpochMilli def clockRealTime(unit: TimeUnit): IO[Long] = IO(unit.convert(clockRealTimeInMillis, TimeUnit.MILLISECONDS)) def sleep(duration: FiniteDuration): IO[Unit] = IO { clockRealTimeInMillis = clockRealTimeInMillis + duration.toMillis } def shift: IO[Unit] = ??? def clockMonotonic(unit: TimeUnit): IO[Long] = ??? } implicit val appContext: AppContext = new AppContext(transactionStorePath = tmpDir.toURI) implicit def toTimestamp(str: String): Timestamp = Timestamp.from(Instant.parse(str)) val tx1 = Transaction("2018-08-01T23:00:00Z", 1, 7657.58, true, 0.021762) val tx2 = Transaction("2018-08-02T01:00:00Z", 2, 7663.85, false, 0.01385517) val tx3 = Transaction("2018-08-02T01:58:30Z", 3, 7663.85, false, 0.03782426) val tx4 = Transaction("2018-08-02T01:58:59Z", 4, 7663.86, false, 0.15750809) val tx5 = Transaction("2018-08-02T02:30:00Z", 5, 7661.49, true, 0.1) // Start at 01:00, tx 2 ignored (too soon) val txs0 = Seq(tx1) // Fetch at 01:59, get nb 2 and 3, but will miss nb 4 because of Api lag val txs1 = Seq(tx2, tx3) // Fetch at 02:58, get nb 3, 4, 5 val txs2 = Seq(tx3, tx4, tx5) // Fetch at 03:57, get nothing val txs3 = Seq.empty[Transaction] val start0 = Instant.parse("2018-08-02T00:00:00Z") val end0 = Instant.parse("2018-08-02T00:59:55Z") val threeBatchesIO = for { tuple1 <- BatchProducer.processOneBatch(IO(txs1.toDS()), txs0.toDS(), start0, end0) // end - Api lag (ds1, start1, end1) = tuple1 tuple2 <- BatchProducer.processOneBatch(IO(txs2.toDS()), ds1, start1, end1) (ds2, start2, end2) = tuple2 _ <- BatchProducer.processOneBatch(IO(txs3.toDS()), ds2, start2, end2) } yield (ds1, start1, end1, ds2, start2, end2) val (ds1, start1, end1, ds2, start2, end2) = threeBatchesIO.unsafeRunSync() ds1.collect() should contain theSameElementsAs txs1 start1 should ===(end0) end1 should ===(Instant.parse("2018-08-02T01:58:55Z")) // initialClock + 1mn - 15s - 5s ds2.collect() should contain theSameElementsAs txs2 start2 should ===(end1) end2 should ===(Instant.parse("2018-08-02T02:57:55Z")) // initialClock + 1mn -15s + 1mn -15s -5s = end1 + 45s val lastClock = Instant.ofEpochMilli( FakeTimer.clockRealTime(TimeUnit.MILLISECONDS).unsafeRunSync()) lastClock should === (Instant.parse("2018-08-02T03:57:00Z")) val savedTransactions = spark.read.parquet(tmpDir.toString).as[Transaction].collect() val expectedTxs = Seq(tx2, tx3, tx4, tx5) savedTransactions should contain theSameElementsAs expectedTxs } } }
Example 53
Source File: StructuredNetworkWordCountWindowed.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.sql.streaming import java.sql.Timestamp import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ object StructuredNetworkWordCountWindowed { def main(args: Array[String]) { if (args.length < 3) { System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" + " <window duration in seconds> [<slide duration in seconds>]") System.exit(1) } val host = args(0) val port = args(1).toInt val windowSize = args(2).toInt val slideSize = if (args.length == 3) windowSize else args(3).toInt if (slideSize > windowSize) { System.err.println("<slide duration> must be less than or equal to <window duration>") } val windowDuration = s"$windowSize seconds" val slideDuration = s"$slideSize seconds" val spark = SparkSession .builder .appName("StructuredNetworkWordCountWindowed") .getOrCreate() import spark.implicits._ // Create DataFrame representing the stream of input lines from connection to host:port val lines = spark.readStream .format("socket") .option("host", host) .option("port", port) .option("includeTimestamp", true) .load() // Split the lines into words, retaining timestamps val words = lines.as[(String, Timestamp)].flatMap(line => line._1.split(" ").map(word => (word, line._2)) ).toDF("word", "timestamp") // Group the data by window and word and compute the count of each group val windowedCounts = words.groupBy( window($"timestamp", windowDuration, slideDuration), $"word" ).count().orderBy("window") // Start running the query that prints the windowed word counts to the console val query = windowedCounts.writeStream .outputMode("complete") .format("console") .option("truncate", "false") .start() query.awaitTermination() } } // scalastyle:on println
Example 54
Source File: LiteralGenerator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.scalacheck.{Arbitrary, Gen} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval object LiteralGenerator { lazy val byteLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType) lazy val shortLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType) lazy val integerLiteralGen: Gen[Literal] = for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType) lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) lazy val floatLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) } yield Literal.create(f, DoubleType) // TODO cache the generated data def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = { assert(scale >= 0) assert(precision >= scale) Arbitrary.arbBigInt.arbitrary.map { s => val a = (s % BigInt(10).pow(precision - scale)).toString() val b = (s % BigInt(10).pow(scale)).abs.toString() Literal.create( Decimal(BigDecimal(s"$a.$b"), precision, scale), DecimalType(precision, scale)) } } lazy val stringLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType) lazy val binaryLiteralGen: Gen[Literal] = for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) } yield Literal.create(ab.toArray, BinaryType) lazy val booleanLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType) lazy val dateLiteralGen: Gen[Literal] = for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType) lazy val timestampLiteralGen: Gen[Literal] = for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType) lazy val calendarIntervalLiterGen: Gen[Literal] = for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary} yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType) // Sometimes, it would be quite expensive when unlimited value is used, // for example, the `times` arguments for StringRepeat would hang the test 'forever' // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited // range is more reasonable lazy val limitedIntegerLiteralGen: Gen[Literal] = for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType) def randomGen(dt: DataType): Gen[Literal] = { dt match { case ByteType => byteLiteralGen case ShortType => shortLiteralGen case IntegerType => integerLiteralGen case LongType => longLiteralGen case DoubleType => doubleLiteralGen case FloatType => floatLiteralGen case DateType => dateLiteralGen case TimestampType => timestampLiteralGen case BooleanType => booleanLiteralGen case StringType => stringLiteralGen case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } }
Example 55
Source File: SQLCompatibilityFunctionSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.math.BigDecimal import java.sql.Timestamp import org.apache.spark.sql.test.SharedSQLContext class SQLCompatibilityFunctionSuite extends QueryTest with SharedSQLContext { test("ifnull") { checkAnswer( sql("SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)"), Row("x", "y", null)) // Type coercion checkAnswer( sql("SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)"), Row(1.0, 2.1)) } test("nullif") { checkAnswer( sql("SELECT nullif('x', 'x'), nullif('x', 'y')"), Row(null, "x")) // Type coercion checkAnswer( sql("SELECT nullif(1, 2.1d), nullif(1, 1.0d)"), Row(1.0, null)) } test("nvl") { checkAnswer( sql("SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)"), Row("x", "y", null)) // Type coercion checkAnswer( sql("SELECT nvl(1, 2.1d), nvl(null, 2.1d)"), Row(1.0, 2.1)) } test("nvl2") { checkAnswer( sql("SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)"), Row("y", "x", null)) // Type coercion checkAnswer( sql("SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)"), Row(2.1, 1.0)) } test("SPARK-16730 cast alias functions for Hive compatibility") { checkAnswer( sql("SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)"), Row(true, 1.toByte, 1.toShort, 1, 1L)) checkAnswer( sql("SELECT float(1), double(1), decimal(1)"), Row(1.toFloat, 1.0, new BigDecimal(1))) checkAnswer( sql("SELECT date(\"2014-04-04\"), timestamp(date(\"2014-04-04\"))"), Row(new java.util.Date(114, 3, 4), new Timestamp(114, 3, 4, 0, 0, 0, 0))) checkAnswer( sql("SELECT string(1)"), Row("1")) // Error handling: only one argument val errorMsg = intercept[AnalysisException](sql("SELECT string(1, 2)")).getMessage assert(errorMsg.contains("Function string accepts only one argument")) } }
Example 56
Source File: DataConverter.scala From spark-cdm with MIT License | 5 votes |
package com.microsoft.cdm.utils import java.text.SimpleDateFormat import java.util.{Locale, TimeZone} import java.sql.Timestamp import org.apache.commons.lang.time.DateUtils import org.apache.spark.sql.catalyst.util.TimestampFormatter import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String class DataConverter() extends Serializable { val dateFormatter = new SimpleDateFormat(Constants.SINGLE_DATE_FORMAT) val timestampFormatter = TimestampFormatter(Constants.TIMESTAMP_FORMAT, TimeZone.getTimeZone("UTC")) val toSparkType: Map[CDMDataType.Value, DataType] = Map( CDMDataType.int64 -> LongType, CDMDataType.dateTime -> DateType, CDMDataType.string -> StringType, CDMDataType.double -> DoubleType, CDMDataType.decimal -> DecimalType(Constants.DECIMAL_PRECISION,0), CDMDataType.boolean -> BooleanType, CDMDataType.dateTimeOffset -> TimestampType ) def jsonToData(dt: DataType, value: String): Any = { return dt match { case LongType => value.toLong case DoubleType => value.toDouble case DecimalType() => Decimal(value) case BooleanType => value.toBoolean case DateType => dateFormatter.parse(value) case TimestampType => timestampFormatter.parse(value) case _ => UTF8String.fromString(value) } } def toCdmType(dt: DataType): CDMDataType.Value = { return dt match { case IntegerType => CDMDataType.int64 case LongType => CDMDataType.int64 case DateType => CDMDataType.dateTime case StringType => CDMDataType.string case DoubleType => CDMDataType.double case DecimalType() => CDMDataType.decimal case BooleanType => CDMDataType.boolean case TimestampType => CDMDataType.dateTimeOffset } } def dataToString(data: Any, dataType: DataType): String = { (dataType, data) match { case (_, null) => null case (DateType, _) => dateFormatter.format(data) case (TimestampType, v: Number) => timestampFormatter.format(data.asInstanceOf[Long]) case _ => data.toString } } }
Example 57
Source File: MessageSink.scala From parquet4s with MIT License | 5 votes |
package com.github.mjakubowski84.parquet4s.indefinite import java.sql.Timestamp import java.util.UUID import akka.Done import akka.kafka.CommitterSettings import akka.kafka.ConsumerMessage.CommittableOffsetBatch import akka.kafka.scaladsl.Committer import akka.stream.scaladsl.{Flow, Keep, Sink} import com.github.mjakubowski84.parquet4s.{ChunkPathBuilder, ParquetStreams, ParquetWriter} import com.google.common.io.Files import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.metadata.CompressionCodecName import scala.concurrent.Future import scala.concurrent.duration._ object MessageSink { case class Data(timestamp: Timestamp, word: String) val MaxChunkSize: Int = 128 val ChunkWriteTimeWindow: FiniteDuration = 10.seconds val WriteDirectoryName: String = "messages" } trait MessageSink { this: Akka => import MessageSink._ import MessageSource._ protected val baseWritePath: String = new Path(Files.createTempDir().getAbsolutePath, WriteDirectoryName).toString private val writerOptions = ParquetWriter.Options(compressionCodecName = CompressionCodecName.SNAPPY) private lazy val committerSink = Flow.apply[Seq[Message]].map { messages => CommittableOffsetBatch(messages.map(_.committableOffset)) }.toMat(Committer.sink(CommitterSettings(system)))(Keep.right) def chunkPath: ChunkPathBuilder[Message] = { case (basePath, chunk) => val lastElementDateTime = new Timestamp(chunk.last.record.timestamp()).toLocalDateTime val year = lastElementDateTime.getYear val month = lastElementDateTime.getMonthValue val day = lastElementDateTime.getDayOfMonth val uuid = UUID.randomUUID() basePath.suffix(s"/$year/$month/$day/part-$uuid.parquet") } lazy val messageSink: Sink[Message, Future[Done]] = ParquetStreams.toParquetIndefinite( path = baseWritePath, maxChunkSize = MaxChunkSize, chunkWriteTimeWindow = ChunkWriteTimeWindow, buildChunkPath = chunkPath, preWriteTransformation = { message: Message => Data( timestamp = new Timestamp(message.record.timestamp()), word = message.record.value() ) }, postWriteSink = committerSink, options = writerOptions ) }
Example 58
Source File: JdbcResultSpec.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package daf.dataset.query.jdbc import java.sql.Timestamp import java.time.{ LocalDateTime, OffsetDateTime } import org.scalatest.{ MustMatchers, WordSpec } import play.api.libs.json._ class JdbcResultSpec extends WordSpec with MustMatchers { "A JDBC Result container" must { "convert to CSV" in { JdbcResults.flat.toCsv.toList must be { List( """"int", "string", "bool", "timestamp"""", """1, "str1", true, "2018-06-25T09:00:00Z"""", """2, "str2", false, "2018-06-25T09:30:00Z"""", """<null>, <null>, false, <null>""" ) } } "convert to json" in { JdbcResults.flat.toJson.toList must be { Seq( JsObject { Seq( "int" -> JsNumber(1), "string" -> JsString("str1"), "bool" -> JsBoolean(true), "timestamp" -> JsString("2018-06-25T09:00:00Z") ) }, JsObject { Seq( "int" -> JsNumber(2), "string" -> JsString("str2"), "bool" -> JsBoolean(false), "timestamp" -> JsString("2018-06-25T09:30:00Z") ) }, JsObject { Seq( "int" -> JsNull, "string" -> JsNull, "bool" -> JsBoolean(false), "timestamp" -> JsNull ) } ) } } } } object JdbcResults { private val offset = OffsetDateTime.now().getOffset private def timestamp(dateTime: LocalDateTime) = Timestamp.from { dateTime.toInstant(offset) } val flat = JdbcResult( header = Seq("int", "string", "bool", "timestamp"), rows = Vector( List( Int.box(1), "str1", Boolean.box(true), timestamp { LocalDateTime.of(2018, 6, 25, 9, 0) } ), List( Int.box(2), "str2", Boolean.box(false), timestamp { LocalDateTime.of(2018, 6, 25, 9, 30) } ), List( null, null, Boolean.box(false), null ) ) ) }
Example 59
Source File: UJESSQLTypeParser.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.ujes.jdbc import java.sql.{Timestamp, Types} object UJESSQLTypeParser { def parserFromName(typeName: String): Int = { typeName match { case null => throw new UJESSQLException(UJESSQLErrorCode.METADATA_EMPTY) case "string" => Types.CHAR case "short" => Types.SMALLINT case "int" => Types.INTEGER case "long" => Types.BIGINT case "float" => Types.FLOAT case "double" => Types.DOUBLE case "boolean" => Types.BOOLEAN case "byte" => Types.TINYINT case "char" => Types.CHAR case "timestamp" => Types.TIMESTAMP case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR) } } def parserFromVal(obj: Any): Int ={ obj match { case _: String => Types.CHAR case _: Short => Types.SMALLINT case _: Int => Types.INTEGER case _: Long => Types.BIGINT case _: Float => Types.FLOAT case _: Double => Types.DOUBLE case _: Boolean => Types.BOOLEAN case _: Byte => Types.TINYINT case _: Char => Types.CHAR case _: Timestamp => Types.TIMESTAMP case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR) } } def parserFromMetaData(dataType: Int): String = { dataType match { case Types.CHAR => "string" case Types.SMALLINT => "short" case Types.INTEGER => "int" case Types.BIGINT => "long" case Types.FLOAT => "float" case Types.DOUBLE => "double" case Types.BOOLEAN => "boolean" case Types.TINYINT => "byte" case Types.CHAR => "char" case Types.TIMESTAMP => "timestamp" case _ => throw new UJESSQLException(UJESSQLErrorCode.PREPARESTATEMENT_TYPEERROR) } } }
Example 60
Source File: FilterPushdown.scala From spark-select with Apache License 2.0 | 5 votes |
package io.minio.spark.select import java.sql.{Date, Timestamp} import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ private def getTypeForAttribute(schema: StructType, attribute: String): Option[DataType] = { if (schema.fieldNames.contains(attribute)) { Some(schema(attribute).dataType) } else { None } } def queryFromSchema(schema: StructType, filters: Array[Filter]): String = { var columnList = schema.fields.map(x => s"s."+s""""${x.name}"""").mkString(",") if (columnList.length == 0) { columnList = "*" } val whereClause = buildWhereClause(schema, filters) if (whereClause.length == 0) { s"select $columnList from S3Object s" } else { s"select $columnList from S3Object s $whereClause" } } }
Example 61
Source File: TypeCast.scala From spark-select with Apache License 2.0 | 5 votes |
package io.minio.spark.select.util import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.{SimpleDateFormat, NumberFormat} import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try @throws[IllegalArgumentException] private[select] def toChar(str: String): Char = { if (str.charAt(0) == '\\') { str.charAt(1) match { case 't' => '\t' case 'r' => '\r' case 'b' => '\b' case 'f' => '\f' case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options case '\'' => '\'' case 'u' if str == """\u0000""" => '\u0000' case _ => throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str") } } else if (str.length == 1) { str.charAt(0) } else { throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str") } } }
Example 62
Source File: SlickJdbcScheduledMessagesRepository.scala From reliable-http-client with Apache License 2.0 | 5 votes |
package rhttpc.transport.amqpjdbc.slick import java.sql.Timestamp import rhttpc.transport.amqpjdbc.{MessageToSchedule, ScheduledMessage, ScheduledMessagesRepository} import slick.jdbc.{JdbcBackend, JdbcProfile} import scala.concurrent.{ExecutionContext, Future} private[amqpjdbc] class SlickJdbcScheduledMessagesRepository(profile: JdbcProfile, db: JdbcBackend.Database) (implicit ec: ExecutionContext) extends ScheduledMessagesRepository { class V1_001__AddingPropertiesToScheduledMessagesMigration extends AddingPropertiesToScheduledMessagesMigration { override protected val profile: JdbcProfile = SlickJdbcScheduledMessagesRepository.this.profile } val messagesMigration = new V1_001__AddingPropertiesToScheduledMessagesMigration import messagesMigration._ import profile.api._ override def save(msg: MessageToSchedule): Future[Unit] = { import msg._ val action = for { currentTimestamp <- sql"select current_timestamp".as[Timestamp].head plannedRun = new Timestamp(currentTimestamp.getTime + msg.delay.toMillis) messageToAdd = ScheduledMessage(None, queueName, content, properties, plannedRun) insertResult <- scheduledMessages += messageToAdd } yield () db.run(action.transactionally) } override def fetchMessagesShouldByRun(queueName: String, batchSize: Int) (onMessages: (Seq[ScheduledMessage]) => Future[Any]): Future[Int] = { def drain(): Future[Int] = { val fetchAction = for { currentTimestamp <- sql"select current_timestamp".as[Timestamp].head fetched <- scheduledMessages.filter { msg => msg.queueName === queueName && msg.plannedRun <= currentTimestamp }.sortBy(_.plannedRun desc).take(batchSize).result } yield fetched def consumeAction(fetched: Seq[ScheduledMessage]) = { val fetchedIds = fetched.flatMap(_.id) for { deleted <- scheduledMessages.filter(_.id inSet fetchedIds).delete _ <- { if (deleted != fetched.size) { DBIO.failed(ConcurrentFetchException) } else { DBIO.successful(Unit) } } _ <- DBIO.from(onMessages(fetched)) } yield fetched.size } val consumedFuture = for { fetched <- db.run(fetchAction.transactionally) consumed <- db.run(consumeAction(fetched).transactionally) } yield consumed val consumedRecovered = consumedFuture.recover { case ConcurrentFetchException => 0 } for { consumed <- consumedRecovered consumedNext <- { if (consumed == batchSize) drain() else Future.successful(0) } } yield consumed + consumedNext } drain() } override def queuesStats(names: Set[String]): Future[Map[String, Int]] = { val action = scheduledMessages .filter(_.queueName inSet names) .groupBy(_.queueName).map { case (queueName, msgs) => (queueName, msgs.size) }.result db.run(action).map(_.toMap) } } case object ConcurrentFetchException extends Exception(s"Concurrent fetch detected")
Example 63
Source File: ReadParquetEEL.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels import java.sql.Timestamp import io.eels.component.parquet.{ParquetSink, ParquetSource} import io.eels.datastream.DataStream import io.eels.schema.{ArrayType, DecimalType, Field, IntType, Precision, Scale, StringType, StructType, TimestampMillisType} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} object ReadParquetEEL extends App { def readParquet(path: Path): Unit = { implicit val hadoopConfiguration = new Configuration() implicit val hadoopFileSystem = FileSystem.get(hadoopConfiguration) val rows = ParquetSource(parquetFilePath).toDataStream().collect rows.foreach(row => println(row)) } val parquetFilePath = new Path("file:///home/sam/development/person2.parquet") implicit val hadoopConfiguration = new Configuration() implicit val hadoopFileSystem = FileSystem.get(hadoopConfiguration) val friendStruct = Field.createStructField("FRIEND", Seq( Field("NAME", StringType), Field("AGE", IntType.Signed) ) ) val personDetailsStruct = Field.createStructField("PERSON_DETAILS", Seq( Field("NAME", StringType), Field("AGE", IntType.Signed), Field("SALARY", DecimalType(Precision(38), Scale(5))), Field("CREATION_TIME", TimestampMillisType) ) ) val friendType = StructType(friendStruct) val schema = StructType(personDetailsStruct, Field("FRIENDS", ArrayType(friendType), nullable = false)) val friends = Vector( Vector(Vector("John", 25)), Vector(Vector("Adam", 26)), Vector(Vector("Steven", 27)) ) val rows = Vector( Vector(Vector("Fred", 50, BigDecimal("50000.99000"), new Timestamp(System.currentTimeMillis())), friends) ) try { DataStream.fromValues(schema, rows).to(ParquetSink(parquetFilePath).withOverwrite(true)) } catch { case e: Exception => e.printStackTrace() } try { readParquet(parquetFilePath) } catch { case e: Exception => e.printStackTrace() } }
Example 64
Source File: SchemaFn.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.schema import java.sql.Timestamp object SchemaFn { def toDataType(clz: Class[_]): DataType = { val intClass = classOf[Int] val floatClass = classOf[Float] val stringClass = classOf[String] val charClass = classOf[Char] val bigIntClass = classOf[BigInt] val booleanClass = classOf[Boolean] val doubleClass = classOf[Double] val bigdecimalClass = classOf[BigDecimal] val longClass = classOf[Long] val byteClass = classOf[Byte] val shortClass = classOf[Short] val timestampClass = classOf[Timestamp] clz match { case `bigdecimalClass` => DecimalType(Precision(22), Scale(5)) case `bigIntClass` => BigIntType case `booleanClass` => BooleanType case `byteClass` => ByteType.Signed case `charClass` => CharType(1) case `doubleClass` => DoubleType case `intClass` => IntType.Signed case `floatClass` => FloatType case `longClass` => LongType.Signed case `stringClass` => StringType case `shortClass` => ShortType.Signed case `timestampClass` => TimestampMillisType case _ => sys.error(s"Can not map $clz to data type") } } }
Example 65
Source File: TimestampCastSpec.scala From flint with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.sql.Timestamp import java.time.Instant import scala.reflect.runtime.universe.TypeTag import com.twosigma.flint.timeseries.TimeSeriesSuite import org.apache.spark.sql.functions._ class TimestampCastSpec extends TimeSeriesSuite { import TimestampCastSpec._ import testImplicits._ behavior of "TimestampToNanos" testEvalAndCodegen("retain up to microsecond precision", nanosToTimestamp(expectedNanos)){ df => val actual = df.select(TimestampToNanos(col("time")).as("long")) .collect() .map(_.getAs[Long]("long")) assert(actual === expectedNanos) } behavior of "LongToTimestamp" testEvalAndCodegen("retain up to microsecond precision", expectedNanos) { df => val actual = df.select(NanosToTimestamp(col("time")).as("timestamp")) .collect() .map(_.getAs[Timestamp]("timestamp")) val expectedTimestamps = expectedNanos.map { nanos => Timestamp.from(Instant.ofEpochSecond(0, nanos)) } assert(actual === expectedTimestamps) } private def asExternalRDD[T: TypeTag](input: Seq[T]): DataFrame = { sc.range(0, input.size.toLong).map { i => Tuple1(input(i.toInt)) }.toDF("time") } } object TimestampCastSpec { val expectedNanos = Seq[Long]( 0L, Long.MaxValue - (Long.MaxValue % 1000), // clip to microsecond precision 946684800000000000L, // 2001-01-01 1262304000000000000L, // 2010-01-01 1893456000000000000L // 2030-01-01 ) def nanosToTimestamp(input: Seq[Long]): Seq[Timestamp] = input.map { v => Timestamp.from(Instant.ofEpochSecond(0, v)) } }
Example 66
Source File: JsonSupport.scala From akka-http-slick-sample with MIT License | 5 votes |
package net.softler.data.model import java.sql.Timestamp import java.time.Instant import java.util.UUID import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport import spray.json.{DefaultJsonProtocol, JsNumber, JsString, JsValue, JsonFormat, RootJsonFormat} trait BaseJsonProtocol extends DefaultJsonProtocol { implicit val timestampFormat: JsonFormat[Timestamp] = new JsonFormat[Timestamp] { override def write(obj: Timestamp): JsValue = JsNumber(obj.getTime) override def read(json: JsValue): Timestamp = json match { case JsNumber(x) => Timestamp.from(Instant.ofEpochMilli(x.toLong)) case _ => throw new IllegalArgumentException( s"Can not parse json value [$json] to a timestamp object") } } implicit val uuidJsonFormat: JsonFormat[UUID] = new JsonFormat[UUID] { override def write(x: UUID): JsValue = JsString(x.toString) override def read(value: JsValue): UUID = value match { case JsString(x) => UUID.fromString(x) case x => throw new IllegalArgumentException("Expected UUID as JsString, but got " + x.getClass) } } } trait JsonProtocol extends SprayJsonSupport with BaseJsonProtocol { implicit val userFormat: RootJsonFormat[User] = jsonFormat10(User) }
Example 67
Source File: entities.scala From akka-http-slick-sample with MIT License | 5 votes |
package net.softler.data.model import java.sql.Timestamp import java.util.UUID trait Entity { def id: UUID def created: Timestamp def updated: Option[Timestamp] def deleted: Option[Timestamp] } case class User(id: UUID, login: String, password: String, email: String, firstName: Option[String], lastName: Option[String], readOnly: Boolean, created: Timestamp, updated: Option[Timestamp], deleted: Option[Timestamp]) extends Entity case class Role(id: UUID, name: String, readOnly: Boolean, created: Timestamp, updated: Option[Timestamp], deleted: Option[Timestamp]) extends Entity case class UserRole(user: User, role: Role)
Example 68
Source File: StationSuite.scala From gihyo-spark-book-example with Apache License 2.0 | 5 votes |
package jp.gihyo.spark.ch05 import java.sql.Timestamp import java.text.SimpleDateFormat import org.scalatest.FunSuite class StationSuite extends FunSuite { test("should be parse") { val line = "2,San Jose Diridon Caltrain Station,37.329732,-121.901782,27,San Jose,8/6/2013" val station = Station.parse(line) val dateFormat = new SimpleDateFormat("MM/dd/yyy") assert(station.id === 2) assert(station.name === "San Jose Diridon Caltrain Station") assert(station.lat === 37.329732) assert(station.lon === -121.901782) assert(station.dockcount === 27) assert(station.landmark === "San Jose") assert(station.installation === new Timestamp(dateFormat.parse("8/6/2013").getTime)) } }
Example 69
Source File: QueryPartitionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import java.io.File import java.sql.Timestamp import com.google.common.io.Files import org.apache.hadoop.fs.FileSystem import org.apache.spark.internal.config._ import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.Utils class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { import spark.implicits._ private def queryWhenPathNotExist(): Unit = { withTempView("testData") { withTable("table_with_partition", "createAndInsertTest") { withTempDir { tmpDir => val testData = sparkContext.parallelize( (1 to 10).map(i => TestData(i, i.toString))).toDF() testData.createOrReplaceTempView("testData") // create the table for test sql(s"CREATE TABLE table_with_partition(key int,value string) " + s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " + "SELECT key,value FROM testData") // test for the exist path checkAnswer(sql("select key,value from table_with_partition"), testData.union(testData).union(testData).union(testData)) // delete the path of one partition tmpDir.listFiles .find { f => f.isDirectory && f.getName().startsWith("ds=") } .foreach { f => Utils.deleteRecursively(f) } // test for after delete the path checkAnswer(sql("select key,value from table_with_partition"), testData.union(testData).union(testData)) } } } } test("SPARK-5068: query data when path doesn't exist") { withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") { queryWhenPathNotExist() } } test("Replace spark.sql.hive.verifyPartitionPath by spark.files.ignoreMissingFiles") { withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "false") { sparkContext.conf.set(IGNORE_MISSING_FILES.key, "true") queryWhenPathNotExist() } } test("SPARK-21739: Cast expression should initialize timezoneId") { withTable("table_with_timestamp_partition") { sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)") sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " + "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)") // test for Cast expression in TableReader checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"), Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000")))) // test for Cast expression in HiveTableScanExec checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " + "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1)) } } }
Example 70
Source File: LiteralGenerator.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.scalacheck.{Arbitrary, Gen} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval object LiteralGenerator { lazy val byteLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType) lazy val shortLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType) lazy val integerLiteralGen: Gen[Literal] = for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType) lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) lazy val floatLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) } yield Literal.create(f, DoubleType) // TODO cache the generated data def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = { assert(scale >= 0) assert(precision >= scale) Arbitrary.arbBigInt.arbitrary.map { s => val a = (s % BigInt(10).pow(precision - scale)).toString() val b = (s % BigInt(10).pow(scale)).abs.toString() Literal.create( Decimal(BigDecimal(s"$a.$b"), precision, scale), DecimalType(precision, scale)) } } lazy val stringLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType) lazy val binaryLiteralGen: Gen[Literal] = for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) } yield Literal.create(ab.toArray, BinaryType) lazy val booleanLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType) lazy val dateLiteralGen: Gen[Literal] = for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType) lazy val timestampLiteralGen: Gen[Literal] = for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType) lazy val calendarIntervalLiterGen: Gen[Literal] = for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary} yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType) // Sometimes, it would be quite expensive when unlimited value is used, // for example, the `times` arguments for StringRepeat would hang the test 'forever' // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited // range is more reasonable lazy val limitedIntegerLiteralGen: Gen[Literal] = for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType) def randomGen(dt: DataType): Gen[Literal] = { dt match { case ByteType => byteLiteralGen case ShortType => shortLiteralGen case IntegerType => integerLiteralGen case LongType => longLiteralGen case DoubleType => doubleLiteralGen case FloatType => floatLiteralGen case DateType => dateLiteralGen case TimestampType => timestampLiteralGen case BooleanType => booleanLiteralGen case StringType => stringLiteralGen case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } }
Example 71
Source File: CallMethodViaReflectionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure import org.apache.spark.sql.types.{IntegerType, StringType} class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelper { import CallMethodViaReflection._ // Get rid of the $ so we are getting the companion object's name. private val staticClassName = ReflectStaticClass.getClass.getName.stripSuffix("$") private val dynamicClassName = classOf[ReflectDynamicClass].getName test("findMethod via reflection for static methods") { assert(findMethod(staticClassName, "method1", Seq.empty).exists(_.getName == "method1")) assert(findMethod(staticClassName, "method2", Seq(IntegerType)).isDefined) assert(findMethod(staticClassName, "method3", Seq(IntegerType)).isDefined) assert(findMethod(staticClassName, "method4", Seq(IntegerType, StringType)).isDefined) } test("findMethod for a JDK library") { assert(findMethod(classOf[java.util.UUID].getName, "randomUUID", Seq.empty).isDefined) } test("class not found") { val ret = createExpr("some-random-class", "method").checkInputDataTypes() assert(ret.isFailure) val errorMsg = ret.asInstanceOf[TypeCheckFailure].message assert(errorMsg.contains("not found") && errorMsg.contains("class")) } test("method not found because name does not match") { val ret = createExpr(staticClassName, "notfoundmethod").checkInputDataTypes() assert(ret.isFailure) val errorMsg = ret.asInstanceOf[TypeCheckFailure].message assert(errorMsg.contains("cannot find a static method")) } test("method not found because there is no static method") { val ret = createExpr(dynamicClassName, "method1").checkInputDataTypes() assert(ret.isFailure) val errorMsg = ret.asInstanceOf[TypeCheckFailure].message assert(errorMsg.contains("cannot find a static method")) } test("input type checking") { assert(CallMethodViaReflection(Seq.empty).checkInputDataTypes().isFailure) assert(CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes().isFailure) assert(CallMethodViaReflection( Seq(Literal(staticClassName), Literal(1))).checkInputDataTypes().isFailure) assert(createExpr(staticClassName, "method1").checkInputDataTypes().isSuccess) } test("unsupported type checking") { val ret = createExpr(staticClassName, "method1", new Timestamp(1)).checkInputDataTypes() assert(ret.isFailure) val errorMsg = ret.asInstanceOf[TypeCheckFailure].message assert(errorMsg.contains("arguments from the third require boolean, byte, short")) } test("invoking methods using acceptable types") { checkEvaluation(createExpr(staticClassName, "method1"), "m1") checkEvaluation(createExpr(staticClassName, "method2", 2), "m2") checkEvaluation(createExpr(staticClassName, "method3", 3), "m3") checkEvaluation(createExpr(staticClassName, "method4", 4, "four"), "m4four") } private def createExpr(className: String, methodName: String, args: Any*) = { CallMethodViaReflection( Literal.create(className, StringType) +: Literal.create(methodName, StringType) +: args.map(Literal.apply) ) } }
Example 72
Source File: SortOrderExpressionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.util.TimeZone import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._ class SortOrderExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SortPrefix") { val b1 = Literal.create(false, BooleanType) val b2 = Literal.create(true, BooleanType) val i1 = Literal.create(20132983, IntegerType) val i2 = Literal.create(-20132983, IntegerType) val l1 = Literal.create(20132983, LongType) val l2 = Literal.create(-20132983, LongType) val millis = 1524954911000L; // Explicitly choose a time zone, since Date objects can create different values depending on // local time zone of the machine on which the test is running val oldDefaultTZ = TimeZone.getDefault val d1 = try { TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) Literal.create(new java.sql.Date(millis), DateType) } finally { TimeZone.setDefault(oldDefaultTZ) } val t1 = Literal.create(new Timestamp(millis), TimestampType) val f1 = Literal.create(0.7788229f, FloatType) val f2 = Literal.create(-0.7788229f, FloatType) val db1 = Literal.create(0.7788229d, DoubleType) val db2 = Literal.create(-0.7788229d, DoubleType) val s1 = Literal.create("T", StringType) val s2 = Literal.create("This is longer than 8 characters", StringType) val bin1 = Literal.create(Array[Byte](12), BinaryType) val bin2 = Literal.create(Array[Byte](12, 17, 99, 0, 0, 0, 2, 3, 0xf4.asInstanceOf[Byte]), BinaryType) val dec1 = Literal(Decimal(20132983L, 10, 2)) val dec2 = Literal(Decimal(20132983L, 19, 2)) val dec3 = Literal(Decimal(20132983L, 21, 2)) val list1 = Literal(List(1, 2), ArrayType(IntegerType)) val nullVal = Literal.create(null, IntegerType) checkEvaluation(SortPrefix(SortOrder(b1, Ascending)), 0L) checkEvaluation(SortPrefix(SortOrder(b2, Ascending)), 1L) checkEvaluation(SortPrefix(SortOrder(i1, Ascending)), 20132983L) checkEvaluation(SortPrefix(SortOrder(i2, Ascending)), -20132983L) checkEvaluation(SortPrefix(SortOrder(l1, Ascending)), 20132983L) checkEvaluation(SortPrefix(SortOrder(l2, Ascending)), -20132983L) // For some reason, the Literal.create code gives us the number of days since the epoch checkEvaluation(SortPrefix(SortOrder(d1, Ascending)), 17649L) checkEvaluation(SortPrefix(SortOrder(t1, Ascending)), millis * 1000) checkEvaluation(SortPrefix(SortOrder(f1, Ascending)), DoublePrefixComparator.computePrefix(f1.value.asInstanceOf[Float].toDouble)) checkEvaluation(SortPrefix(SortOrder(f2, Ascending)), DoublePrefixComparator.computePrefix(f2.value.asInstanceOf[Float].toDouble)) checkEvaluation(SortPrefix(SortOrder(db1, Ascending)), DoublePrefixComparator.computePrefix(db1.value.asInstanceOf[Double])) checkEvaluation(SortPrefix(SortOrder(db2, Ascending)), DoublePrefixComparator.computePrefix(db2.value.asInstanceOf[Double])) checkEvaluation(SortPrefix(SortOrder(s1, Ascending)), StringPrefixComparator.computePrefix(s1.value.asInstanceOf[UTF8String])) checkEvaluation(SortPrefix(SortOrder(s2, Ascending)), StringPrefixComparator.computePrefix(s2.value.asInstanceOf[UTF8String])) checkEvaluation(SortPrefix(SortOrder(bin1, Ascending)), BinaryPrefixComparator.computePrefix(bin1.value.asInstanceOf[Array[Byte]])) checkEvaluation(SortPrefix(SortOrder(bin2, Ascending)), BinaryPrefixComparator.computePrefix(bin2.value.asInstanceOf[Array[Byte]])) checkEvaluation(SortPrefix(SortOrder(dec1, Ascending)), 20132983L) checkEvaluation(SortPrefix(SortOrder(dec2, Ascending)), 2013298L) checkEvaluation(SortPrefix(SortOrder(dec3, Ascending)), DoublePrefixComparator.computePrefix(201329.83d)) checkEvaluation(SortPrefix(SortOrder(list1, Ascending)), 0L) checkEvaluation(SortPrefix(SortOrder(nullVal, Ascending)), null) } }
Example 73
Source File: ApplicationMonitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor.application import java.sql.{Connection, Timestamp} import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.TimeUnit import scala.concurrent.duration.Duration import org.apache.spark.alarm.AlertMessage import org.apache.spark.alarm.AlertType._ import org.apache.spark.monitor.Monitor import org.apache.spark.monitor.MonitorItem.MonitorItem abstract class ApplicationMonitor extends Monitor { override val alertType = Seq(Application) } class ApplicationInfo( title: MonitorItem, appName: String, appId: String, md5: String, startTime: Date, duration: Long, appUiUrl: String, historyUrl: String, eventLogDir: String, minExecutor: Int, maxExecutor: Int, executorCore: Int, executorMemoryMB: Long, executorAccu: Double, user: String) extends AlertMessage(title) { override def toCsv(): String = { s"${user},${appId}," + s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}," + s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}," + s"${executorMemoryMB},${executorCore},${executorAccu.formatted("%.2f")},${appName}" } // scalastyle:off override def toHtml(): String = { val html = <h1>任务完成! </h1> <h2>任务信息 </h2> <ul> <li>作业名:{appName}</li> <li>作业ID:{appId}</li> <li>开始时间:{new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}</li> <li>任务用时:{Duration(duration, TimeUnit.MILLISECONDS).toSeconds} s</li> </ul> <h2>资源用量</h2> <ul> <li>Executor个数:{minExecutor}~{maxExecutor}</li> <li>Executor内存:{executorMemoryMB} MB</li> <li>Executor核数:{executorCore}</li> <li>Executor累积用量:{executorAccu.formatted("%.2f")} executor*min</li> </ul> <h2>调试信息</h2> <ul> <li>回看链接1:<a href={appUiUrl.split(",").head}>{appUiUrl.split(",").head}</a></li> <li>回看链接2:<a href={historyUrl}>{historyUrl}</a></li> <li>日志文件所在目录:{eventLogDir}</li> </ul> html.mkString } override def toJdbc(conn: Connection, appId: String): Unit = { val query = "INSERT INTO `xsql_monitor`.`spark_history`(" + "`user`, `md5`, `appId`, `startTime`, `duration`, " + "`yarnURL`, `sparkHistoryURL`, `eventLogDir`, `coresPerExecutor`, `memoryPerExecutorMB`," + " `executorAcc`, `appName`, `minExecutors`, `maxExecutors`)" + " SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? FROM DUAL" + " WHERE NOT EXISTS (SELECT * FROM `xsql_monitor`.`spark_history` WHERE `appId` = ?);" val preparedStmt = conn.prepareStatement(query) preparedStmt.setString(1, user) preparedStmt.setString(2, md5) preparedStmt.setString(3, appId) preparedStmt.setTimestamp(4, new Timestamp(startTime.getTime)) preparedStmt.setLong(5, Duration(duration, TimeUnit.MILLISECONDS).toSeconds) preparedStmt.setString(6, appUiUrl) preparedStmt.setString(7, historyUrl) preparedStmt.setString(8, eventLogDir) preparedStmt.setInt(9, executorCore) preparedStmt.setLong(10, executorMemoryMB) preparedStmt.setDouble(11, executorAccu) preparedStmt.setString(12, appName) preparedStmt.setInt(13, minExecutor) preparedStmt.setInt(14, maxExecutor) preparedStmt.setString(15, appId) preparedStmt.execute } }
Example 74
Source File: TimestampExpressionSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types.{DateType, IntegerType} import org.scalatest.FunSuite class TimestampExpressionSuite extends FunSuite with ExpressionEvalHelper { test("add_seconds") { // scalastyle:off magic.number checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-01 00:11:33")), Literal(28)), DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-01-01 00:12:01"))) checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-02 00:00:00")), Literal(-1)), DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-01-01 23:59:59"))) checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-01 00:00:00")), Literal(-1)), DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2014-12-31 23:59:59"))) checkEvaluation(AddSeconds(Literal(Timestamp.valueOf("2015-01-02 00:00:00")), Literal.create(null, IntegerType)), null) checkEvaluation(AddSeconds(Literal.create(null, DateType), Literal(1)), null) checkEvaluation(AddSeconds(Literal.create(null, DateType), Literal.create(null, IntegerType)), null) } }
Example 75
Source File: AddSecondsSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp import org.apache.spark.sql.{GlobalSapSQLContext, Row} import org.scalatest.FunSuite class AddSecondsSuite extends FunSuite with GlobalSapSQLContext { val rowA = TimestampRow("AAA", Timestamp.valueOf("2015-01-01 12:12:04")) val rowB = TimestampRow("BBB", Timestamp.valueOf("2015-01-01 00:00:00")) val rowC = TimestampRow("CCC", Timestamp.valueOf("2015-12-31 23:59:58")) val rowD = TimestampRow("DDD", Timestamp.valueOf("2012-01-01 23:30:45")) val dataWithTimestamps = Seq(rowA, rowB, rowC, rowD) test("add_seconds") { val rdd = sc.parallelize(dataWithTimestamps) val dSrc = sqlContext.createDataFrame(rdd).cache() dSrc.registerTempTable("src") val result1 = sqlContext.sql("SELECT name, ADD_SECONDS(t, 5) FROM src").collect() assertResult( Row(rowA.name, Timestamp.valueOf("2015-01-01 12:12:09")) :: Row(rowB.name, Timestamp.valueOf("2015-01-01 00:00:05")) :: Row(rowC.name, Timestamp.valueOf("2016-01-01 00:00:03")) :: Row(rowD.name, Timestamp.valueOf("2012-01-01 23:30:50")) :: Nil)(result1) val result2 = sqlContext.sql("SELECT name, ADD_SECONDS(t, -5) FROM src").collect() assertResult( Row(rowA.name, Timestamp.valueOf("2015-01-01 12:11:59")) :: Row(rowB.name, Timestamp.valueOf("2014-12-31 23:59:55")) :: Row(rowC.name, Timestamp.valueOf("2015-12-31 23:59:53")) :: Row(rowD.name, Timestamp.valueOf("2012-01-01 23:30:40")) :: Nil)(result2) // example from SAP HANA documentation at // http://help.sap.com/hana/SAP_HANA_SQL_and_System_Views_Reference_en.pdf val result3 = sqlContext.sql("SELECT name, ADD_SECONDS(t, 60*30) FROM src").collect() assertResult( Row(rowA.name, Timestamp.valueOf("2015-01-01 12:42:04")) :: Row(rowB.name, Timestamp.valueOf("2015-01-01 00:30:00")) :: Row(rowC.name, Timestamp.valueOf("2016-01-01 00:29:58")) :: Row(rowD.name, Timestamp.valueOf("2012-01-02 00:00:45")) :: Nil)(result3) } }
Example 76
Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0 | 5 votes |
package org.zuinnote.spark.office.excel import java.math.BigDecimal import java.sql.Date import java.sql.Timestamp import java.text.DateFormat import java.text.SimpleDateFormat import java.util.Calendar import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.NullWritable import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.mapreduce.RecordWriter import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow } import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.OutputWriter import org.apache.spark.sql.types._ import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil import org.zuinnote.hadoop.office.format.mapreduce._ import org.apache.commons.logging.LogFactory import org.apache.commons.logging.Log import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import java.util.Locale import java.text.DecimalFormat import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO import java.text.NumberFormat // NOTE: This class is instantiated and used on executor side only, no need to be serializable. private[excel] class ExcelOutputWriter( path: String, dataSchema: StructType, context: TaskAttemptContext, options: Map[String, String]) extends OutputWriter { def write(row: Row): Unit = { // check useHeader if (useHeader) { val headers = row.schema.fieldNames var i = 0 for (x <- headers) { val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName) recordWriter.write(NullWritable.get(), headerColumnSCD) i += 1 } currentRowNum += 1 useHeader = false } // for each value in the row if (row.size>0) { var currentColumnNum = 0; val simpleObject = new Array[AnyRef](row.size) for (i <- 0 to row.size - 1) { // for each element of the row val obj = row.get(i) if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) { val formattedValue = obj.asInstanceOf[Seq[String]](0) val comment = obj.asInstanceOf[Seq[String]](1) val formula = obj.asInstanceOf[Seq[String]](2) val address = obj.asInstanceOf[Seq[String]](3) val sheetName = obj.asInstanceOf[Seq[String]](4) simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName) } else { simpleObject(i)=obj.asInstanceOf[AnyRef] } } // convert row to spreadsheetcellDAO val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum) // write it for (x<- spreadSheetCellDAORow) { recordWriter.write(NullWritable.get(), x) } } currentRowNum += 1 } override def close(): Unit = { recordWriter.close(context) currentRowNum = 0; } }
Example 77
Source File: SpecificPrimitivesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.sql.{Date, Timestamp} import java.util.UUID class SpecificPrimitivesSpec extends Specification { "A case class with an `Int` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest00(1) val record2 = AvroTypeProviderTest00(2) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Float` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest01(1F) val record2 = AvroTypeProviderTest01(2F) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Long` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest02(1L) val record2 = AvroTypeProviderTest02(2L) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Double` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest03(1D) val record2 = AvroTypeProviderTest03(2D) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Boolean` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest04(true) val record2 = AvroTypeProviderTest04(false) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `String` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest05("hello world") val record2 = AvroTypeProviderTest05("hello galaxy") val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Null` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest06(null) val record2 = AvroTypeProviderTest06(null) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Array[Bytes]` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest69("hello world".getBytes) val record2 = AvroTypeProviderTest69("hello galaxy".getBytes) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with a `logicalType` fields from .avsc" should { "deserialize correctly" in { val t1 = System.currentTimeMillis() val t2 = System.currentTimeMillis() val record1 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t1), UUID.randomUUID()) val record2 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t2), UUID.randomUUID()) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } }
Example 78
Source File: ActionsHandler.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.ProducerRecord import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import java.sql.Timestamp import org.apache.spark.sql.types.StructType import java.util.concurrent.atomic.AtomicInteger def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries; def destroy(); } trait ActionsHandlerFactory { def createInstance(params: Params): ActionsHandler; } abstract class AbstractActionsHandler extends ActionsHandler { def getRequiredParam(requestBody: Map[String, Any], key: String): Any = { val opt = requestBody.get(key); if (opt.isEmpty) { throw new MissingRequiredRequestParameterException(key); } opt.get; } override def destroy() = { } } class NullActionsHandler extends AbstractActionsHandler { override def listActionHandlerEntries(requestBody: Map[String, Any]): ActionHandlerEntries = new ActionHandlerEntries() { def apply(action: String) = Map[String, Any](); //yes, do nothing def isDefinedAt(action: String) = false; }; } //rich row with extra info: id, time stamp, ... case class RowEx(originalRow: Row, batchId: Long, offsetInBatch: Long, timestamp: Timestamp) { def withTimestamp(): Row = Row.fromSeq(originalRow.toSeq :+ timestamp); def withId(): Row = Row.fromSeq(originalRow.toSeq :+ s"$batchId-$offsetInBatch"); def extra: (Long, Long, Timestamp) = { (batchId, offsetInBatch, timestamp) }; } trait SendStreamActionSupport { def onReceiveStream(topic: String, rows: Array[RowEx]); def getRequiredParam(requestBody: Map[String, Any], key: String): Any; val listeners = ArrayBuffer[StreamListener](); def addListener(listener: StreamListener): this.type = { listeners += listener; this; } protected def notifyListeners(topic: String, data: Array[RowEx]) { listeners.foreach { _.onArrive(topic, data); } } def handleSendStream(requestBody: Map[String, Any]): Map[String, Any] = { val topic = getRequiredParam(requestBody, "topic").asInstanceOf[String]; val batchId = getRequiredParam(requestBody, "batchId").asInstanceOf[Long]; val rows = getRequiredParam(requestBody, "rows").asInstanceOf[Array[Row]]; val ts = new Timestamp(System.currentTimeMillis()); var index = -1; val rows2 = rows.map { row ⇒ index += 1; RowEx(Row.fromSeq(row.toSeq), batchId, index, ts) } onReceiveStream(topic, rows2); notifyListeners(topic, rows2); Map("rowsCount" -> rows.size); } }
Example 79
Source File: SSHOrder.scala From Aton with GNU General Public License v3.0 | 5 votes |
package model import java.sql.Timestamp case class SSHOrder( id: Long, sentDatetime: Timestamp, superUser: Boolean, interrupt: Boolean, command: String, webUser: String) { def this(sentDatetime: Timestamp, superUser: Boolean, interrupt: Boolean, command: String, username: String) = this(0, sentDatetime, superUser, interrupt, command, username) def this(sentDatetime: Timestamp, superUser: Boolean, command: String) = this(0, sentDatetime, superUser, false, command, "") def this(sentDatetime: Timestamp, command: String, webUser: String) = this(0, sentDatetime, false, false, command, webUser) }
Example 80
Source File: ConnectedUserTable.scala From Aton with GNU General Public License v3.0 | 5 votes |
package model.table import java.sql.Timestamp import model.{ComputerState, ConnectedUser} import slick.driver.H2Driver.api._ import slick.lifted.{ForeignKeyQuery, ProvenShape} class ConnectedUserTable(tag: Tag) extends Table[ConnectedUser](tag, "CONNECTED_USER") { // Primary key def id: Rep[Int] = column[Int]("ID", O.PrimaryKey, O.AutoInc) // Date maps to java.sql.TimeStamp. // Ver: http://stackoverflow.com/questions/31351361/storing-date-and-time-into-mysql-using-slick-scala def computerStateRegisteredDate: Rep[Timestamp] = column[Timestamp]("COMPUTER_STATE_REGISTERED_DATE") // Other columns/attributes def computerStateComputerIp: Rep[String] = column[String]("COMPUTER_STATE_COMPUTER_IP") // Foreign key to Computer def computer: ForeignKeyQuery[ComputerStateTable, ComputerState] = foreignKey("CONNECTEC_USER_COMPUTER_STATE", (computerStateComputerIp, computerStateRegisteredDate), TableQuery[ComputerStateTable])(x => (x.computerIp, x.registeredDate), onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) // All tables need the * method with the type that it was created the table with. override def * : ProvenShape[ConnectedUser] = (id, username, computerStateComputerIp, computerStateRegisteredDate) <> (ConnectedUser.tupled, ConnectedUser.unapply) def username: Rep[String] = column[String]("USERNAME") }
Example 81
Source File: SuggestionController.scala From Aton with GNU General Public License v3.0 | 5 votes |
package controllers import java.sql.Timestamp import java.util.Calendar import com.google.inject.Inject import model.form.SuggestionForm import model.{Role, Suggestion} import play.api.Environment import play.api.i18n.MessagesApi import services.{SuggestionService, UserService, state} import views.html._ import scala.concurrent.{ExecutionContext, Future} class SuggestionController @Inject()(suggestionService: SuggestionService, val messagesApi: MessagesApi)(implicit userService: UserService, executionContext: ExecutionContext, environment: Environment) extends ControllerWithNoAuthRequired { def home = AsyncStack { implicit request => implicit val (username: Option[String], isAdmin: Boolean) = loggedIn match { case Some(user) => (Some(user.username), user.role == Role.Administrator) case _ => (None, false) } if (isAdmin) { suggestionService.listAll.map { suggestions => Ok//(index(messagesApi("suggestion"), suggestionHome(SuggestionForm.form, suggestions))) } } else { Future.successful(Ok)//(index(messagesApi("suggestion"), suggestionHome(SuggestionForm.form, Seq.empty[Suggestion])))) } } def add = AsyncStack() { implicit request => implicit val (username: Option[String], isAdmin: Boolean) = loggedIn match { case Some(user) => (Some(user.username), user.role == Role.Administrator) case _ => (None, false) } SuggestionForm.form.bindFromRequest().fold( errorForm => Future.successful(Ok(errorForm.toString)), data => { val text = data.suggestion val suggestion = Suggestion(0, text, now, username) suggestionService.add(suggestion).map { case state.ActionCompleted => Redirect(routes.SuggestionController.home()) case _ => BadRequest } } ) } private def now = new Timestamp(Calendar.getInstance().getTime.getTime) }
Example 82
Source File: RowReaderTest.scala From filo with Apache License 2.0 | 5 votes |
package org.velvia.filo import org.joda.time.DateTime import java.sql.Timestamp import org.scalatest.FunSpec import org.scalatest.Matchers class RowReaderTest extends FunSpec with Matchers { val schema = Seq( VectorInfo("name", classOf[String]), VectorInfo("age", classOf[Int]), VectorInfo("timestamp", classOf[Timestamp]) ) val rows = Seq( (Some("Matthew Perry"), Some(18), Some(new Timestamp(10000L))), (Some("Michelle Pfeiffer"), None, Some(new Timestamp(10010L))), (Some("George C"), Some(59), None), (Some("Rich Sherman"), Some(26), Some(new Timestamp(10000L))) ) val csvRows = Seq( "Matthew Perry,18,1973-01-25T00Z", "Michelle Pfeiffer,,1970-07-08T00Z", "George C,59,", "Rich Sherman,26,1991-10-12T00Z" ).map(str => (str.split(',') :+ "").take(3)) def readValues[T](r: FastFiloRowReader, len: Int)(f: FiloRowReader => T): Seq[T] = { (0 until len).map { i => r.rowNo = i f(r) } } it("should extract from columns back to rows") { val columnData = RowToVectorBuilder.buildFromRows(rows.map(TupleRowReader).toIterator, schema, BuilderEncoder.SimpleEncoding) val chunks = Array(columnData("name"), columnData("age"), columnData("timestamp")) val types = schema.map(_.dataType) val reader = new FastFiloRowReader(chunks, types.toArray) readValues(reader, 4)(_.getString(0)) should equal ( Seq("Matthew Perry", "Michelle Pfeiffer", "George C", "Rich Sherman")) reader.rowNo = 1 reader.notNull(1) should equal (false) reader.as[Timestamp](2) should equal (new Timestamp(10010L)) } it("should write to columns from ArrayStringRowReader and read back properly") { val columnData = RowToVectorBuilder.buildFromRows(csvRows.map(ArrayStringRowReader).toIterator, schema, BuilderEncoder.SimpleEncoding) val chunks = Array(columnData("name"), columnData("age"), columnData("timestamp")) val types = schema.map(_.dataType) val reader = new FastFiloRowReader(chunks, types.toArray) readValues(reader, 4)(_.getString(0)) should equal ( Seq("Matthew Perry", "Michelle Pfeiffer", "George C", "Rich Sherman")) reader.rowNo = 1 reader.notNull(1) should equal (false) reader.as[Timestamp](2) should equal (new Timestamp(DateTime.parse("1970-07-08T00Z").getMillis)) } it("should read longs from timestamp strings from ArrayStringRowReader") { ArrayStringRowReader(csvRows.head).getLong(2) should equal (96768000000L) } import org.velvia.filo.{vectors => bv} it("should append to BinaryAppendableVector from Readers with RowReaderAppender") { val readers = rows.map(TupleRowReader) val appenders = Seq( new IntReaderAppender(bv.IntBinaryVector.appendingVector(10), 1), new LongReaderAppender(bv.LongBinaryVector.appendingVector(10), 2) ) readers.foreach { r => appenders.foreach(_.append(r)) } val bufs = appenders.map(_.appender.optimize().toFiloBuffer).toArray val reader = new FastFiloRowReader(bufs, Array(classOf[Int], classOf[Long])) readValues(reader, 4)(_.getInt(0)) should equal (Seq(18, 0, 59, 26)) reader.rowNo = 1 reader.notNull(0) should equal (false) } import RowReader._ it("should compare RowReaders using TypedFieldExtractor") { val readers = rows.map(TupleRowReader) StringFieldExtractor.compare(readers(1), readers(2), 0) should be > (0) IntFieldExtractor.compare(readers(0), readers(2), 1) should be < (0) TimestampFieldExtractor.compare(readers(0), readers(3), 2) should equal (0) // Ok, we should be able to compare the reader with the NA / None too IntFieldExtractor.compare(readers(1), readers(2), 1) should be < (0) } }
Example 83
Source File: FastFiloRowReaderBenchmark.scala From filo with Apache License 2.0 | 5 votes |
package org.velvia.filo import java.sql.Timestamp import org.openjdk.jmh.annotations.Benchmark import org.openjdk.jmh.annotations.BenchmarkMode import org.openjdk.jmh.annotations.{Mode, State, Scope} import org.openjdk.jmh.annotations.OutputTimeUnit import scalaxy.loops._ import scala.language.postfixOps import java.util.concurrent.TimeUnit @State(Scope.Thread) class FastFiloRowReaderBenchmark { import VectorReader._ // Ok, create an IntColumn and benchmark it. val numValues = 10000 val randomInts = (0 until numValues).map(i => util.Random.nextInt) val randomLongs = randomInts.map(_.toLong) val randomTs = randomLongs.map(l => new Timestamp(l)) val chunks = Array(VectorBuilder(randomInts).toFiloBuffer, VectorBuilder(randomLongs).toFiloBuffer, VectorBuilder(randomTs).toFiloBuffer) val clazzes = Array[Class[_]](classOf[Int], classOf[Long], classOf[Timestamp]) // According to @ktosopl, be sure to return some value if possible so that JVM won't // optimize out the method body. However JMH is apparently very good at avoiding this. // fastest loop possible using FiloVectorApply method @Benchmark @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.MICROSECONDS) def createFastFiloRowReader(): RowReader = { new FastFiloRowReader(chunks, clazzes) } val fastReader = new FastFiloRowReader(chunks, clazzes) @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.SECONDS) def fastFiloRowReaderReadOne(): Int = { fastReader.setRowNo(0) if (fastReader.notNull(0)) fastReader.getInt(0) + 1 else 0 } }
Example 84
Source File: KustoResponseDeserializer.scala From azure-kusto-spark with Apache License 2.0 | 5 votes |
package com.microsoft.kusto.spark.datasource import java.sql.Timestamp import java.util import com.microsoft.azure.kusto.data.{KustoResultColumn, KustoResultSetTable, Results} import com.microsoft.kusto.spark.utils.DataTypeMapping import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StructType, _} import org.joda.time.DateTime import scala.collection.JavaConverters._ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer object KustoResponseDeserializer { def apply(kustoResult: KustoResultSetTable): KustoResponseDeserializer = new KustoResponseDeserializer(kustoResult) } // Timespan columns are casted to strings in kusto side. A simple test to compare the translation to a Duration string // in the format of timespan resulted in less performance. One way was using a new expression that extends UnaryExpression, // second was by a udf function, both were less performant. case class KustoSchema(sparkSchema: StructType, toStringCastedColumns: Set[String]) class KustoResponseDeserializer(val kustoResult: KustoResultSetTable) { val schema: KustoSchema = getSchemaFromKustoResult private def getValueTransformer(valueType: String): Any => Any = { valueType.toLowerCase() match { case "string" => value: Any => value case "int64" => value: Any => value case "datetime" => value: Any => new Timestamp(new DateTime(value).getMillis) case "timespan" => value: Any => value case "sbyte" => value: Any => value case "long" => value: Any => value match { case i: Int => i.toLong case _ => value.asInstanceOf[Long] } case "double" => value: Any => value case "decimal" => value: Any => BigDecimal(value.asInstanceOf[String]) case "int" => value: Any => value case "int32" => value: Any => value case "bool" => value: Any => value case "real" => value: Any => value case _ => value: Any => value.toString } } private def getSchemaFromKustoResult: KustoSchema = { if (kustoResult.getColumns.isEmpty) { KustoSchema(StructType(List()), Set()) } else { val columns = kustoResult.getColumns KustoSchema(StructType(columns.map(col => StructField(col.getColumnName, DataTypeMapping.kustoTypeToSparkTypeMap.getOrElse(col.getColumnType.toLowerCase, StringType)))), columns.filter(c => c.getColumnType.equalsIgnoreCase("TimeSpan")).map(c => c.getColumnName).toSet) } } def getSchema: KustoSchema = { schema } def toRows: java.util.List[Row] = { val columnInOrder = kustoResult.getColumns val value: util.ArrayList[Row] = new util.ArrayList[Row](kustoResult.count()) // Calculate the transformer function for each column to use later by order val valueTransformers: mutable.Seq[Any => Any] = columnInOrder.map(col => getValueTransformer(col.getColumnType)) kustoResult.getData.asScala.foreach(row => { val genericRow = row.toArray().zipWithIndex.map( column => { if (column._1 == null) null else valueTransformers(column._2)(column._1) }) value.add(new GenericRowWithSchema(genericRow, schema.sparkSchema)) }) value } // private def getOrderedColumnName = { // val columnInOrder = ArrayBuffer.fill(kustoResult.getColumnNameToIndex.size()){ "" } // kustoResult.getColumns.foreach((columnIndexPair: KustoResultColumn) => columnInOrder(columnIndexPair.) = columnIndexPair._1) // columnInOrder // } }
Example 85
Source File: FileOutputIT.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta import java.sql.Timestamp import java.util.UUID import com.github.nscala_time.time.Imports._ import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum} import org.apache.log4j.{Level, Logger} import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} import org.scalatest._ import scala.reflect.io.File class FileOutputIT extends FlatSpec with ShouldMatchers with BeforeAndAfterAll { self: FlatSpec => @transient var sc: SparkContext = _ override def beforeAll { Logger.getRootLogger.setLevel(Level.ERROR) sc = FileOutputIT.getNewLocalSparkContext(1, "test") } override def afterAll { sc.stop() System.clearProperty("spark.driver.port") } trait CommonValues { val sqlContext = SQLContext.getOrCreate(sc) import sqlContext.implicits._ val time = new Timestamp(DateTime.now.getMillis) val data = sc.parallelize(Seq(Person("Kevin", 18, time), Person("Kira", 21, time), Person("Ariadne", 26, time))).toDF val tmpPath: String = s"/tmp/sparta-test/${UUID.randomUUID().toString}" } trait WithEventData extends CommonValues { val properties = Map("path" -> tmpPath, "createDifferentFiles" -> "false") val output = new FileOutput("file-test", properties) } "FileOutputIT" should "save a dataframe" in new WithEventData { output.save(data, SaveModeEnum.Append, Map(Output.TimeDimensionKey -> "minute", Output.TableNameKey -> "person")) val source = new java.io.File(tmpPath).listFiles() val read = sqlContext.read.json(tmpPath).toDF read.count shouldBe(3) File("/tmp/sparta-test").deleteRecursively } } object FileOutputIT { def getNewLocalSparkContext(numExecutors: Int = 1, title: String): SparkContext = { val conf = new SparkConf().setMaster(s"local[$numExecutors]").setAppName(title) SparkContext.getOrCreate(conf) } } case class Person(name: String, age: Int, minute: Timestamp) extends Serializable
Example 86
Source File: CubeWriterHelper.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.driver.writer import java.sql.{Date, Timestamp} import akka.event.slf4j.SLF4JLogging import com.stratio.sparta.driver.factory.SparkContextFactory import com.stratio.sparta.driver.step.Cube import com.stratio.sparta.sdk.pipeline.aggregation.cube.{DimensionValue, DimensionValuesTime, MeasuresValues} import com.stratio.sparta.sdk.pipeline.output.Output import com.stratio.sparta.sdk.pipeline.schema.TypeOp import org.apache.spark.sql._ import org.apache.spark.streaming.dstream.DStream object CubeWriterHelper extends SLF4JLogging { def writeCube(cube: Cube, outputs: Seq[Output], stream: DStream[(DimensionValuesTime, MeasuresValues)]): Unit = { stream.map { case (dimensionValuesTime, measuresValues) => toRow(cube, dimensionValuesTime, measuresValues) }.foreachRDD(rdd => { if (!rdd.isEmpty()) { val sparkSession = SparkContextFactory.sparkSessionInstance val cubeDf = sparkSession.createDataFrame(rdd, cube.schema) val extraOptions = Map(Output.TableNameKey -> cube.name) val cubeAutoCalculatedFieldsDf = WriterHelper.write(cubeDf, cube.writerOptions, extraOptions, outputs) TriggerWriterHelper.writeTriggers(cubeAutoCalculatedFieldsDf, cube.triggers, cube.name, outputs) } else log.debug("Empty event received") }) } private[driver] def toRow(cube: Cube, dimensionValuesT: DimensionValuesTime, measures: MeasuresValues): Row = { val measuresSorted = measuresValuesSorted(measures.values) val rowValues = dimensionValuesT.timeConfig match { case None => val dimensionValues = dimensionsValuesSorted(dimensionValuesT.dimensionValues) dimensionValues ++ measuresSorted case Some(timeConfig) => val timeValue = Seq(timeFromDateType(timeConfig.eventTime, cube.dateType)) val dimFilteredByTime = filterDimensionsByTime(dimensionValuesT.dimensionValues, timeConfig.timeDimension) val dimensionValues = dimensionsValuesSorted(dimFilteredByTime) ++ timeValue val measuresValuesWithTime = measuresSorted dimensionValues ++ measuresValuesWithTime } Row.fromSeq(rowValues) } private[driver] def dimensionsValuesSorted(dimensionValues: Seq[DimensionValue]): Seq[Any] = dimensionValues.sorted.map(dimVal => dimVal.value) private[driver] def measuresValuesSorted(measures: Map[String, Option[Any]]): Seq[Any] = measures.toSeq.sortWith(_._1 < _._1).map(measure => measure._2.getOrElse(null)) private[driver] def filterDimensionsByTime(dimensionValues: Seq[DimensionValue], timeDimension: String): Seq[DimensionValue] = dimensionValues.filter(dimensionValue => dimensionValue.dimension.name != timeDimension) private[driver] def timeFromDateType(time: Long, dateType: TypeOp.Value): Any = { dateType match { case TypeOp.Date | TypeOp.DateTime => new Date(time) case TypeOp.Long => time case TypeOp.Timestamp => new Timestamp(time) case _ => time.toString } } }
Example 87
Source File: CubeMakerTest.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.driver.test.cube import java.sql.Timestamp import com.github.nscala_time.time.Imports._ import com.stratio.sparta.driver.step.{Cube, CubeOperations, Trigger} import com.stratio.sparta.driver.writer.WriterOptions import com.stratio.sparta.plugin.default.DefaultField import com.stratio.sparta.plugin.cube.field.datetime.DateTimeField import com.stratio.sparta.plugin.cube.operator.count.CountOperator import com.stratio.sparta.sdk.pipeline.aggregation.cube.{Dimension, DimensionValue, DimensionValuesTime, InputFields} import com.stratio.sparta.sdk.pipeline.schema.TypeOp import com.stratio.sparta.sdk.utils.AggregationTime import org.apache.spark.sql.Row import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType, TimestampType} import org.apache.spark.streaming.TestSuiteBase import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class CubeMakerTest extends TestSuiteBase { val PreserverOrder = false def getEventOutput(timestamp: Timestamp, millis: Long): Seq[Seq[(DimensionValuesTime, InputFields)]] = { val dimensionString = Dimension("dim1", "eventKey", "identity", new DefaultField) val dimensionTime = Dimension("minute", "minute", "minute", new DateTimeField) val dimensionValueString1 = DimensionValue(dimensionString, "value1") val dimensionValueString2 = dimensionValueString1.copy(value = "value2") val dimensionValueString3 = dimensionValueString1.copy(value = "value3") val dimensionValueTs = DimensionValue(dimensionTime, timestamp) val tsMap = Row(timestamp) val valuesMap1 = InputFields(Row("value1", timestamp), 1) val valuesMap2 = InputFields(Row("value2", timestamp), 1) val valuesMap3 = InputFields(Row("value3", timestamp), 1) Seq(Seq( (DimensionValuesTime("cubeName", Seq(dimensionValueString1, dimensionValueTs)), valuesMap1), (DimensionValuesTime("cubeName", Seq(dimensionValueString2, dimensionValueTs)), valuesMap2), (DimensionValuesTime("cubeName", Seq(dimensionValueString3, dimensionValueTs)), valuesMap3) )) } }
Example 88
Source File: AvroOutputIT.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.plugin.output.avro import java.sql.Timestamp import java.time.Instant import com.databricks.spark.avro._ import com.stratio.sparta.plugin.TemporalSparkContext import com.stratio.sparta.sdk.pipeline.output.{Output, SaveModeEnum} import org.apache.spark.sql.types._ import org.apache.spark.sql.{Row, SparkSession} import org.junit.runner.RunWith import org.scalatest._ import org.scalatest.junit.JUnitRunner import scala.reflect.io.File import scala.util.Random @RunWith(classOf[JUnitRunner]) class AvroOutputIT extends TemporalSparkContext with Matchers { trait CommonValues { val tmpPath: String = File.makeTemp().name val sparkSession = SparkSession.builder().config(sc.getConf).getOrCreate() val schema = StructType(Seq( StructField("name", StringType), StructField("age", IntegerType), StructField("minute", LongType) )) val data = sparkSession.createDataFrame(sc.parallelize(Seq( Row("Kevin", Random.nextInt, Timestamp.from(Instant.now).getTime), Row("Kira", Random.nextInt, Timestamp.from(Instant.now).getTime), Row("Ariadne", Random.nextInt, Timestamp.from(Instant.now).getTime) )), schema) } trait WithEventData extends CommonValues { val properties = Map("path" -> tmpPath) val output = new AvroOutput("avro-test", properties) } "AvroOutput" should "throw an exception when path is not present" in { an[Exception] should be thrownBy new AvroOutput("avro-test", Map.empty) } it should "throw an exception when empty path " in { an[Exception] should be thrownBy new AvroOutput("avro-test", Map("path" -> " ")) } it should "save a dataframe " in new WithEventData { output.save(data, SaveModeEnum.Append, Map(Output.TableNameKey -> "person")) val read = sparkSession.read.avro(s"$tmpPath/person") read.count should be(3) read should be eq data File(tmpPath).deleteRecursively File("spark-warehouse").deleteRecursively } }
Example 89
Source File: CsvOutputIT.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.plugin.output.csv import java.sql.Timestamp import java.time.Instant import com.databricks.spark.avro._ import com.stratio.sparta.plugin.TemporalSparkContext import com.stratio.sparta.sdk.pipeline.output.{Output, SaveModeEnum} import org.apache.spark.sql.types._ import org.apache.spark.sql.{Row, SparkSession} import org.junit.runner.RunWith import org.scalatest._ import org.scalatest.junit.JUnitRunner import scala.reflect.io.File import scala.util.Random @RunWith(classOf[JUnitRunner]) class CsvOutputIT extends TemporalSparkContext with Matchers { trait CommonValues { val tmpPath: String = File.makeTemp().name val sparkSession = SparkSession.builder().config(sc.getConf).getOrCreate() val schema = StructType(Seq( StructField("name", StringType), StructField("age", IntegerType), StructField("minute", LongType) )) val data = sparkSession.createDataFrame(sc.parallelize(Seq( Row("Kevin", Random.nextInt, Timestamp.from(Instant.now).getTime), Row("Kira", Random.nextInt, Timestamp.from(Instant.now).getTime), Row("Ariadne", Random.nextInt, Timestamp.from(Instant.now).getTime) )), schema) } trait WithEventData extends CommonValues { val properties = Map("path" -> tmpPath) val output = new CsvOutput("csv-test", properties) } "CsvOutput" should "throw an exception when path is not present" in { an[Exception] should be thrownBy new CsvOutput("csv-test", Map.empty) } it should "throw an exception when empty path " in { an[Exception] should be thrownBy new CsvOutput("csv-test", Map("path" -> " ")) } it should "save a dataframe " in new WithEventData { output.save(data, SaveModeEnum.Append, Map(Output.TableNameKey -> "person")) val read = sparkSession.read.csv(s"$tmpPath/person.csv") read.count should be(3) read should be eq data File(tmpPath).deleteRecursively File("spark-warehouse").deleteRecursively } }
Example 90
Source File: TestJodaTimeVersionedEntityRepository.scala From slick-repo with MIT License | 5 votes |
package com.byteslounge.slickrepo.repository import java.sql.Timestamp import com.byteslounge.slickrepo.meta.{Versioned, VersionedEntity} import org.joda.time.Instant import slick.ast.BaseTypedType import com.byteslounge.slickrepo.scalaversion.JdbcProfile import com.byteslounge.slickrepo.version.JodaTimeVersionImplicits.instantVersionGenerator case class TestJodaTimeVersionedEntity(override val id: Option[Int], price: Double, override val version: Option[Instant]) extends VersionedEntity[TestJodaTimeVersionedEntity, Int, Instant] { def withId(id: Int): TestJodaTimeVersionedEntity = this.copy(id = Some(id)) def withVersion(version: Instant): TestJodaTimeVersionedEntity = this.copy(version = Some(version)) } class TestJodaTimeVersionedEntityRepository(override val driver: JdbcProfile) extends VersionedRepository[TestJodaTimeVersionedEntity, Int, Instant](driver) { import driver.api._ implicit val jodaTimeInstantToSqlTimestampMapper = MappedColumnType.base[Instant, Timestamp]( { instant => new java.sql.Timestamp(instant.getMillis) }, { sqlTimestamp => new Instant(sqlTimestamp.getTime) }) val pkType = implicitly[BaseTypedType[Int]] val versionType = implicitly[BaseTypedType[Instant]] val tableQuery = TableQuery[TestJodaTimeVersionedEntities] type TableType = TestJodaTimeVersionedEntities class TestJodaTimeVersionedEntities(tag: slick.lifted.Tag) extends Table[TestJodaTimeVersionedEntity](tag, "TJTV_ENTITY") with Versioned[Int, Instant] { def id = column[Int]("ID", O.PrimaryKey) def price = column[Double]("PRICE") def version = column[Instant]("VERSION") def * = (id.?, price, version.?) <> ((TestJodaTimeVersionedEntity.apply _).tupled, TestJodaTimeVersionedEntity.unapply) } }
Example 91
Source File: PredicatePushdownSuite.scala From spark-exasol-connector with Apache License 2.0 | 5 votes |
package com.exasol.spark import java.sql.Timestamp import org.apache.spark.sql.functions.col import com.holdenkarau.spark.testing.DataFrameSuiteBase import org.scalatest.funsuite.AnyFunSuite class PredicatePushdownSuite extends AnyFunSuite with BaseDockerSuite with DataFrameSuiteBase { test("with where clause build from filters: filter") { createDummyTable() import spark.implicits._ val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE") .load() .filter($"id" < 3) .filter(col("city").like("Ber%")) .select("id", "city") val result = df.collect().map(x => (x.getLong(0), x.getString(1))).toSet assert(result.size === 1) assert(result === Set((1, "Berlin"))) } test("with where clause build from filters: createTempView and spark.sql") { createDummyTable() val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE") .load() df.createOrReplaceTempView("myTable") val myDF = spark .sql("SELECT id, city FROM myTable WHERE id BETWEEN 1 AND 3 AND name < 'Japan'") val result = myDF.collect().map(x => (x.getLong(0), x.getString(1))).toSet assert(result.size === 2) assert(result === Set((1, "Berlin"), (2, "Paris"))) } test("date and timestamp should be read and filtered correctly") { import java.sql.Date createDummyTable() val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT date_info, updated_at FROM $EXA_SCHEMA.$EXA_TABLE") .load() val minTimestamp = Timestamp.valueOf("2017-12-30 00:00:00.0000") val testDate = Date.valueOf("2017-12-31") val resultDate = df.collect().map(_.getDate(0)) assert(resultDate.contains(testDate)) val resultTimestamp = df.collect().map(_.getTimestamp(1)).map(x => x.after(minTimestamp)) assert(!resultTimestamp.contains(false)) val filteredByDateDF = df.filter(col("date_info") === testDate) assert(filteredByDateDF.count() === 1) val filteredByTimestampDF = df.filter(col("updated_at") < minTimestamp) assert(filteredByTimestampDF.count() === 0) } test("count should be performed successfully") { createDummyTable() val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE") .load() val result = df.count() assert(result === 3) } }
Example 92
Source File: StructuredNetworkWordCountWindowed.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.sql.streaming import java.sql.Timestamp import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ object StructuredNetworkWordCountWindowed { def main(args: Array[String]) { if (args.length < 3) { System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" + " <window duration in seconds> [<slide duration in seconds>]") System.exit(1) } val host = args(0) val port = args(1).toInt val windowSize = args(2).toInt val slideSize = if (args.length == 3) windowSize else args(3).toInt if (slideSize > windowSize) { System.err.println("<slide duration> must be less than or equal to <window duration>") } val windowDuration = s"$windowSize seconds" val slideDuration = s"$slideSize seconds" val spark = SparkSession .builder .appName("StructuredNetworkWordCountWindowed") .getOrCreate() import spark.implicits._ // Create DataFrame representing the stream of input lines from connection to host:port val lines = spark.readStream .format("socket") .option("host", host) .option("port", port) .option("includeTimestamp", true) .load() // Split the lines into words, retaining timestamps val words = lines.as[(String, Timestamp)].flatMap(line => line._1.split(" ").map(word => (word, line._2)) ).toDF("word", "timestamp") // Group the data by window and word and compute the count of each group val windowedCounts = words.groupBy( window($"timestamp", windowDuration, slideDuration), $"word" ).count().orderBy("window") // Start running the query that prints the windowed word counts to the console val query = windowedCounts.writeStream .outputMode("complete") .format("console") .option("truncate", "false") .start() query.awaitTermination() } } // scalastyle:on println
Example 93
Source File: XGBoostBigModelTimeSeries.scala From uberdata with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import java.sql.Timestamp import eleflow.uberdata.IUberdataForecastUtil import eleflow.uberdata.core.data.DataTransformer import eleflow.uberdata.enums.SupportedAlgorithm import ml.dmlc.xgboost4j.scala.spark.XGBoostModel import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.linalg.{VectorUDT, Vector => SparkVector} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared.HasTimeCol import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.Dataset import org.apache.spark.sql.types.{StructField, _} class XGBoostBigModelTimeSeries[I](override val uid: String, override val models: Seq[(ParamMap, XGBoostModel)]) extends XGBoostBigModel[I](uid, models) with HasTimeCol{ def setTimecol(time: String): this.type = set(timeCol, Some(time)) override def transform(dataSet: Dataset[_]): DataFrame = { val prediction = predict(dataSet) val rows = dataSet.rdd .map { case (row: Row) => (DataTransformer.toFloat(row.getAs($(idCol))), (row.getAs[SparkVector](IUberdataForecastUtil.FEATURES_COL_NAME), row.getAs[java.sql.Timestamp]($(timeCol).get))) } .join(prediction) .map { case (id, ((features, time), predictValue)) => Row(id, features, time, SupportedAlgorithm.XGBoostAlgorithm.toString, predictValue) } dataSet.sqlContext.createDataFrame(rows, transformSchema(dataSet.schema)) } @DeveloperApi override def transformSchema(schema: StructType): StructType = StructType(Array( StructField($(idCol), FloatType), StructField(IUberdataForecastUtil.FEATURES_COL_NAME, new VectorUDT), StructField($(timeCol).get, TimestampType), StructField(IUberdataForecastUtil.ALGORITHM, StringType), StructField("prediction", FloatType) ) ) }
Example 94
Source File: MergeProjection.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.mutation.merge import java.sql.{Date, Timestamp} import org.apache.spark.sql.{CarbonDatasourceHadoopRelation, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, GenericRowWithSchema, InterpretedMutableProjection, Projection} import org.apache.spark.sql.catalyst.util.DateTimeUtils case class MergeProjection( @transient tableCols: Seq[String], @transient statusCol : String, @transient ds: Dataset[Row], @transient rltn: CarbonDatasourceHadoopRelation, @transient sparkSession: SparkSession, @transient mergeAction: MergeAction) { private val cutOffDate = Integer.MAX_VALUE >> 1 val isUpdate = mergeAction.isInstanceOf[UpdateAction] val isDelete = mergeAction.isInstanceOf[DeleteAction] def apply(row: GenericRowWithSchema): InternalRow = { // TODO we can avoid these multiple conversions if this is added as a SparkPlan node. val values = row.values.map { case s: String => org.apache.spark.unsafe.types.UTF8String.fromString(s) case d: java.math.BigDecimal => org.apache.spark.sql.types.Decimal.apply(d) case b: Array[Byte] => org.apache.spark.unsafe.types.UTF8String.fromBytes(b) case d: Date => DateTimeUtils.fromJavaDate(d) case t: Timestamp => DateTimeUtils.fromJavaTimestamp(t) case value => value } projection(new GenericInternalRow(values)).asInstanceOf[GenericInternalRow] } val (projection, output) = generateProjection private def generateProjection: (Projection, Array[Expression]) = { val existingDsOutput = rltn.carbonRelation.schema.toAttributes val colsMap = mergeAction match { case UpdateAction(updateMap) => updateMap case InsertAction(insertMap) => insertMap case _ => null } if (colsMap != null) { val output = new Array[Expression](tableCols.length) val expecOutput = new Array[Expression](tableCols.length) colsMap.foreach { case (k, v) => val tableIndex = tableCols.indexOf(k.toString().toLowerCase) if (tableIndex < 0) { throw new CarbonMergeDataSetException(s"Mapping is wrong $colsMap") } output(tableIndex) = v.expr.transform { case a: Attribute if !a.resolved => ds.queryExecution.analyzed.resolveQuoted(a.name, sparkSession.sessionState.analyzer.resolver).get } expecOutput(tableIndex) = existingDsOutput.find(_.name.equalsIgnoreCase(tableCols(tableIndex))).get } if (output.contains(null)) { throw new CarbonMergeDataSetException(s"Not all columns are mapped") } (new InterpretedMutableProjection(output++Seq( ds.queryExecution.analyzed.resolveQuoted(statusCol, sparkSession.sessionState.analyzer.resolver).get), ds.queryExecution.analyzed.output), expecOutput) } else { (null, null) } } }
Example 95
Source File: TimestampDataTypeNullDataTest.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.directdictionary import java.io.File import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HiveContext import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.TimeStampGranularityConstants import org.apache.carbondata.core.util.CarbonProperties import org.apache.spark.sql.test.util.QueryTest class TimestampDataTypeNullDataTest extends QueryTest with BeforeAndAfterAll { var hiveContext: HiveContext = _ override def beforeAll { try { CarbonProperties.getInstance() .addProperty(TimeStampGranularityConstants.CARBON_CUTOFF_TIMESTAMP, "2000-12-13 02:10.00.0") CarbonProperties.getInstance() .addProperty(TimeStampGranularityConstants.CARBON_TIME_GRANULARITY, TimeStampGranularityConstants.TIME_GRAN_SEC.toString ) sql( """CREATE TABLE IF NOT EXISTS timestampTyeNullData (ID Int, dateField Timestamp, country String, name String, phonetype String, serialname String, salary Int) STORED AS carbondata""" ) CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") val csvFilePath = s"$resourcesPath/datasamplenull.csv" sql("LOAD DATA LOCAL INPATH '" + csvFilePath + "' INTO TABLE timestampTyeNullData").collect(); } catch { case x: Throwable => CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) } } test("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null") { checkAnswer( sql("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null"), Seq(Row(Timestamp.valueOf("2015-07-23 00:00:00.0")) ) ) } test("SELECT * FROM timestampTyeNullData where dateField is null") { checkAnswer( sql("SELECT dateField FROM timestampTyeNullData where dateField is null"), Seq(Row(null) )) } override def afterAll { sql("drop table timestampTyeNullData") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") } }
Example 96
Source File: TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.directdictionary import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HiveContext import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.TimeStampGranularityConstants import org.apache.carbondata.core.util.CarbonProperties import org.apache.spark.sql.test.util.QueryTest class TimestampDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with BeforeAndAfterAll { var hiveContext: HiveContext = _ override def beforeAll { CarbonProperties.getInstance() .addProperty(TimeStampGranularityConstants.CARBON_CUTOFF_TIMESTAMP, "2000-12-13 02:10.00.0") CarbonProperties.getInstance() .addProperty(TimeStampGranularityConstants.CARBON_TIME_GRANULARITY, TimeStampGranularityConstants.TIME_GRAN_SEC.toString ) CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true") sql( """ CREATE TABLE IF NOT EXISTS directDictionaryTable (empno String, doj Timestamp, salary Int) STORED AS carbondata""" ) val csvFilePath = s"$resourcesPath/datasample.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS" + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')") } test("select doj from directDictionaryTable") { checkAnswer( sql("select doj from directDictionaryTable"), Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09.0")), Row(Timestamp.valueOf("2016-04-14 15:00:09.0")), Row(null) ) ) } test("select doj from directDictionaryTable with equals filter") { checkAnswer( sql("select doj from directDictionaryTable where doj='2016-03-14 15:00:09'"), Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09"))) ) } test("select doj from directDictionaryTable with greater than filter") { checkAnswer( sql("select doj from directDictionaryTable where doj>'2016-03-14 15:00:09'"), Seq(Row(Timestamp.valueOf("2016-04-14 15:00:09"))) ) } override def afterAll { sql("drop table directDictionaryTable") CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") } }
Example 97
Source File: TimestampNoDictionaryColumnTestCase.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.directdictionary import java.sql.Timestamp import org.apache.spark.sql.Row import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties import org.apache.spark.sql.test.util.QueryTest class TimestampNoDictionaryColumnTestCase extends QueryTest with BeforeAndAfterAll { override def beforeAll { CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy") sql("drop table if exists timestamp_nodictionary") sql( """ CREATE TABLE IF NOT EXISTS timestamp_nodictionary (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp, attendance int, utilization int, salary Int) STORED AS carbondata""" ) val csvFilePath = s"$resourcesPath/data_beyond68yrs.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE timestamp_nodictionary OPTIONS" + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')") } test("select projectjoindate, projectenddate from timestamp_nodictionary") { checkAnswer( sql("select projectjoindate, projectenddate from timestamp_nodictionary"), Seq(Row(Timestamp.valueOf("2000-01-29 00:00:00.0"), Timestamp.valueOf("2016-06-29 00:00:00.0")), Row(Timestamp.valueOf("1800-02-17 00:00:00.0"), Timestamp.valueOf("1900-11-29 00:00:00.0")), Row(null, Timestamp.valueOf("2016-05-29 00:00:00.0")), Row(null, Timestamp.valueOf("2016-11-30 00:00:00.0")), Row(Timestamp.valueOf("3000-10-22 00:00:00.0"), Timestamp.valueOf("3002-11-15 00:00:00.0")), Row(Timestamp.valueOf("1802-06-29 00:00:00.0"), Timestamp.valueOf("1902-12-30 00:00:00.0")), Row(null, Timestamp.valueOf("2016-12-30 00:00:00.0")), Row(Timestamp.valueOf("2038-11-14 00:00:00.0"), Timestamp.valueOf("2041-12-29 00:00:00.0")), Row(null, null), Row(Timestamp.valueOf("2014-09-15 00:00:00.0"), Timestamp.valueOf("2016-05-29 00:00:00.0")) ) ) } test("select projectjoindate, projectenddate from timestamp_nodictionary where in filter") { checkAnswer( sql("select projectjoindate, projectenddate from timestamp_nodictionary where projectjoindate in" + "('1800-02-17 00:00:00','3000-10-22 00:00:00') or projectenddate in ('1900-11-29 00:00:00'," + "'3002-11-15 00:00:00','2041-12-29 00:00:00')"), Seq(Row(Timestamp.valueOf("1800-02-17 00:00:00.0"), Timestamp.valueOf("1900-11-29 00:00:00.0")), Row(Timestamp.valueOf("3000-10-22 00:00:00.0"), Timestamp.valueOf("3002-11-15 00:00:00.0")), Row(Timestamp.valueOf("2038-11-14 00:00:00.0"), Timestamp.valueOf("2041-12-29 00:00:00.0"))) ) } override def afterAll { CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) sql("drop table timestamp_nodictionary") } }
Example 98
Source File: Commons.scala From spark-structured-streaming with MIT License | 5 votes |
package com.kafkaToSparkToCass import java.sql.Timestamp import java.text.{DateFormat, SimpleDateFormat} object Commons { case class UserEvent(user_id: String, time: Timestamp, event: String) extends Serializable def getTimeStamp(timeStr: String): Timestamp = { val dateFormat1: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") val dateFormat2: DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss") val date: Option[Timestamp] = { try { Some(new Timestamp(dateFormat1.parse(timeStr).getTime)) } catch { case e: java.text.ParseException => Some(new Timestamp(dateFormat2.parse(timeStr).getTime)) } } date.getOrElse(Timestamp.valueOf(timeStr)) } }
Example 99
Source File: Statements.scala From spark-structured-streaming with MIT License | 5 votes |
package com.kafkaToSparkToCass import java.sql.Timestamp import com.datastax.driver.core.Session object Statements extends Serializable { def cql(id: String, time: Timestamp, ename: String): String = s""" insert into my_keyspace.test_table (user_id,time,event) values('$id', '$time', '$ename event')""" def createKeySpaceAndTable(session: Session, dropTable: Boolean = false) = { session.execute( """CREATE KEYSPACE if not exists my_keyspace WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };""") if (dropTable) session.execute("""drop table if exists my_keyspace.test_table""") session.execute( """create table if not exists my_keyspace.test_table ( user_id text, time timestamp, event text, primary key((user_id), time) ) WITH CLUSTERING ORDER BY (time DESC)""") } }
Example 100
Source File: database.scala From franklin with Apache License 2.0 | 5 votes |
package com.azavea.franklin import cats.implicits._ import com.azavea.stac4s.TemporalExtent import doobie.implicits.javasql._ import doobie.util.meta.Meta import doobie.util.{Read, Write} import io.circe.{Decoder, Encoder} import java.sql.Timestamp import java.time.Instant package object database extends CirceJsonbMeta with GeotrellisWktMeta with Filterables { implicit val instantMeta: Meta[Instant] = Meta[Timestamp].imap(_.toInstant)(Timestamp.from) implicit val instantRead: Read[Instant] = Read[Timestamp].imap(_.toInstant)(Timestamp.from) implicit val instantWrite: Write[Instant] = Write[Timestamp].imap(_.toInstant)(Timestamp.from) def stringToInstant: String => Either[Throwable, Instant] = (s: String) => Either.catchNonFatal(Instant.parse(s)) def temporalExtentToString(te: TemporalExtent): String = { te.value match { case Some(start) :: Some(end) :: _ if start != end => s"${start.toString}/${end.toString}" case Some(start) :: Some(end) :: _ if start == end => s"${start.toString}" case Some(start) :: None :: _ => s"${start.toString}/.." case None :: Some(end) :: _ => s"../${end.toString}" } } def temporalExtentFromString(str: String): Either[String, TemporalExtent] = { str.split("/").toList match { case ".." :: endString :: _ => val parsedEnd: Either[Throwable, Instant] = stringToInstant(endString) parsedEnd match { case Left(_) => Left(s"Could not decode instant: $str") case Right(end: Instant) => Right(TemporalExtent(None, end)) } case startString :: ".." :: _ => val parsedStart: Either[Throwable, Instant] = stringToInstant(startString) parsedStart match { case Left(_) => Left(s"Could not decode instant: $str") case Right(start: Instant) => Right(TemporalExtent(start, None)) } case startString :: endString :: _ => val parsedStart: Either[Throwable, Instant] = stringToInstant(startString) val parsedEnd: Either[Throwable, Instant] = stringToInstant(endString) (parsedStart, parsedEnd).tupled match { case Left(_) => Left(s"Could not decode instant: $str") case Right((start: Instant, end: Instant)) => Right(TemporalExtent(start, end)) } case _ => Either.catchNonFatal(Instant.parse(str)) match { case Left(_) => Left(s"Could not decode instant: $str") case Right(t: Instant) => Right(TemporalExtent(t, t)) } } } implicit val encoderTemporalExtent: Encoder[TemporalExtent] = Encoder.encodeString.contramap[TemporalExtent] { extent => temporalExtentToString(extent) } implicit val decoderTemporalExtent: Decoder[TemporalExtent] = Decoder.decodeString.emap { str => temporalExtentFromString(str) } }
Example 101
Source File: MQTTStreamWordCount.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.examples.sql.streaming.mqtt import java.sql.Timestamp import org.apache.spark.sql.SparkSession object MQTTStreamWordCount { def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: MQTTStreamWordCount <brokerUrl> <topic>") // scalastyle:off println System.exit(1) } val brokerUrl = args(0) val topic = args(1) val spark = SparkSession .builder .appName("MQTTStreamWordCount") .master("local[4]") .getOrCreate() import spark.implicits._ // Create DataFrame representing the stream of input lines from connection to mqtt server val lines = spark.readStream .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSourceProvider") .option("topic", topic).option("persistence", "memory") .load(brokerUrl).selectExpr("CAST(payload AS STRING)").as[String] // Split the lines into words val words = lines.flatMap(_.split(" ")) // Generate running word count val wordCounts = words.groupBy("value").count() // Start running the query that prints the running counts to the console val query = wordCounts.writeStream .outputMode("complete") .format("console") .start() query.awaitTermination() } }
Example 102
Source File: AkkaStreamWordCount.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.examples.sql.streaming.akka import java.sql.Timestamp import org.apache.spark.sql.SparkSession object AkkaStreamWordCount { def main(args: Array[String]): Unit = { if (args.length < 1) { System.err.println("Usage: AkkaStreamWordCount <urlOfPublisher>") // scalastyle:off println System.exit(1) } val urlOfPublisher = args(0) val spark = SparkSession .builder() .appName("AkkaStreamWordCount") .master("local[4]") .getOrCreate() import spark.implicits._ // Create DataFrame representing the stream of input lines from connection // to publisher or feeder actor val lines = spark.readStream .format("org.apache.bahir.sql.streaming.akka.AkkaStreamSourceProvider") .option("urlOfPublisher", urlOfPublisher) .load().as[(String, Timestamp)] // Split the lines into words val words = lines.map(_._1).flatMap(_.split(" ")) // Generate running word count val wordCounts = words.groupBy("value").count() // Start running the query that prints the running counts to the console val query = wordCounts.writeStream .outputMode("complete") .format("console") .start() query.awaitTermination() } }
Example 103
Source File: NetezzaFilters.scala From spark-netezza with Apache License 2.0 | 5 votes |
package com.ibm.spark.netezza import java.sql.{Date, Timestamp} import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.sources._ def generateFilterExpr(f: Filter): Option[String] = { Option(f match { case EqualTo(attr, value) => s"$attr = ${quoteValue(value)}" case EqualNullSafe(attr, value) => s"(NOT ($attr != ${quoteValue(value)} OR $attr IS NULL OR " + s"${quoteValue(value)} IS NULL) OR ($attr IS NULL AND ${quoteValue(value)} IS NULL))" case LessThan(attr, value) => s"$attr < ${quoteValue(value)}" case GreaterThan(attr, value) => s"$attr > ${quoteValue(value)}" case LessThanOrEqual(attr, value) => s"$attr <= ${quoteValue(value)}" case GreaterThanOrEqual(attr, value) => s"$attr >= ${quoteValue(value)}" case IsNull(attr) => s"$attr IS NULL" case IsNotNull(attr) => s"$attr IS NOT NULL" case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'" case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'" case StringContains(attr, value) => s"${attr} LIKE '%${value}%'" case In(attr, value) => s"$attr IN (${quoteValue(value)})" case Not(f) => generateFilterExpr(f).map(p => s"(NOT ($p))").getOrElse(null) case Or(f1, f2) => val or = Seq(f1, f2).flatMap(generateFilterExpr(_)) if (or.size == 2) { or.map(p => s"($p)").mkString(" OR ") } else { null } case And(f1, f2) => val and = Seq(f1, f2).flatMap(generateFilterExpr(_)) if (and.size == 2) { and.map(p => s"($p)").mkString(" AND ") } else { null } case _ => null }) } }
Example 104
Source File: DefaultDatabaseOperationsTest.scala From Conseil with Apache License 2.0 | 5 votes |
package tech.cryptonomic.conseil.api.sql import java.sql.Timestamp import java.time.LocalDateTime import org.scalatest.concurrent.ScalaFutures import org.scalatest.{Matchers, WordSpec} import slick.jdbc.PostgresProfile.api._ import tech.cryptonomic.conseil.api.TezosInMemoryDatabaseSetup import tech.cryptonomic.conseil.api.sql.DefaultDatabaseOperations._ import tech.cryptonomic.conseil.common.testkit.InMemoryDatabase import tech.cryptonomic.conseil.common.tezos.Tables import tech.cryptonomic.conseil.common.tezos.Tables.FeesRow import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration._ import scala.language.postfixOps class DefaultDatabaseOperationsTest extends WordSpec with Matchers with InMemoryDatabase with TezosInMemoryDatabaseSetup with ScalaFutures { "The default database operations" should { val fees: List[FeesRow] = List.tabulate(5) { i => FeesRow( 1 + i, 3 + i, 5 + i, Timestamp.valueOf(LocalDateTime.of(2018, 11, 22, 12, 30)), s"$i-example", None, None ) } "count distinct elements in column properly" in { dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true dbHandler.run(countDistinct("tezos", "fees", "timestamp")).futureValue shouldBe 1 dbHandler.run(countDistinct("tezos", "fees", "low")).futureValue shouldBe 5 } "select distinct elements from column properly" in { dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true dbHandler.run(selectDistinct("tezos", "fees", "timestamp")).futureValue shouldBe List( "2018-11-22 12:30:00" ) dbHandler.run(selectDistinct("tezos", "fees", "low")).futureValue should contain theSameElementsAs List( "1", "2", "3", "4", "5" ) } "select distinct elements from column with 'like' properly" in { dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true dbHandler.run(selectDistinctLike("tezos", "fees", "kind", "1-")).futureValue shouldBe List( "1-example" ) } } }
Example 105
Source File: DefaultDatabaseOperationsTest.scala From Conseil with Apache License 2.0 | 5 votes |
package tech.cryptonomic.conseil.indexer.sql import java.sql.Timestamp import java.time.LocalDateTime import org.scalatest.concurrent.ScalaFutures import org.scalatest.{Matchers, WordSpec} import slick.jdbc.PostgresProfile.api._ import tech.cryptonomic.conseil.common.testkit.InMemoryDatabase import tech.cryptonomic.conseil.common.tezos.Tables import tech.cryptonomic.conseil.common.tezos.Tables.{Fees, FeesRow} import tech.cryptonomic.conseil.indexer.sql.DefaultDatabaseOperations._ import tech.cryptonomic.conseil.indexer.tezos.TezosInMemoryDatabaseSetup import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration._ import scala.language.postfixOps class DefaultDatabaseOperationsTest extends WordSpec with Matchers with InMemoryDatabase with TezosInMemoryDatabaseSetup with ScalaFutures { "The default database operations" should { val fees: List[FeesRow] = List.tabulate(5) { i => FeesRow( 1 + i, 3 + i, 5 + i, Timestamp.valueOf(LocalDateTime.of(2018, 11, 22, 12, 30)), s"$i-example", None, None ) } "insert data when table is empty" in { dbHandler.run(insertWhenEmpty[Fees](Tables.Fees, fees)).futureValue shouldBe Some(5) } "do not insert data when table is not empty" in { dbHandler.run(Tables.Fees ++= fees).isReadyWithin(5 seconds) shouldBe true dbHandler.run(insertWhenEmpty[Fees](Tables.Fees, fees)).futureValue.value shouldBe Some(0) } } }
Example 106
Source File: CustomerSerializers.scala From quiz-management-service with Apache License 2.0 | 5 votes |
package com.danielasfregola.quiz.management.serializers import java.sql.Timestamp import org.json4s.CustomSerializer import org.json4s.JsonAST.{JInt, JNull} object CustomSerializers { val all = List(CustomTimestampSerializer) } case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format => ({ case JInt(x) => new Timestamp(x.longValue * 1000) case JNull => null }, { case date: Timestamp => JInt(date.getTime / 1000) }))
Example 107
Source File: CustomerSerializers.scala From quiz-management-service with Apache License 2.0 | 5 votes |
package com.danielasfregola.quiz.management.serializers import java.sql.Timestamp import org.json4s.CustomSerializer import org.json4s.JsonAST.{JInt, JNull} object CustomSerializers { val all = List(CustomTimestampSerializer) } case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format => ({ case JInt(x) => new Timestamp(x.longValue * 1000) case JNull => null }, { case date: Timestamp => JInt(date.getTime / 1000) }))
Example 108
Source File: CustomerSerializers.scala From quiz-management-service with Apache License 2.0 | 5 votes |
package com.danielasfregola.quiz.management.serializers import java.sql.Timestamp import org.json4s.CustomSerializer import org.json4s.JsonAST.{JInt, JNull} object CustomSerializers { val all = List(CustomTimestampSerializer) } case object CustomTimestampSerializer extends CustomSerializer[Timestamp](format => ({ case JInt(x) => new Timestamp(x.longValue * 1000) case JNull => null }, { case date: Timestamp => JInt(date.getTime / 1000) }))
Example 109
Source File: DataFrameExtensions.scala From spark-powerbi-connector with Apache License 2.0 | 5 votes |
package com.microsoft.azure.powerbi.extensions import java.sql.Timestamp import java.util.Date import scala.collection.mutable.ListBuffer import com.microsoft.azure.powerbi.authentication.PowerBIAuthentication import com.microsoft.azure.powerbi.common.PowerBIUtils import com.microsoft.azure.powerbi.models.{table, PowerBIDatasetDetails} import org.apache.spark.sql.DataFrame object DataFrameExtensions { implicit def PowerBIDataFrame(dataFrame: DataFrame): PowerBIDataFrame = new PowerBIDataFrame(dataFrame: DataFrame) class PowerBIDataFrame(dataFrame: DataFrame) extends Serializable{ def toPowerBI(powerbiDatasetDetails: PowerBIDatasetDetails, powerbiTable: table, powerBIAuthentication: PowerBIAuthentication): Unit = { var authenticationToken: String = powerBIAuthentication.getAccessToken dataFrame.foreachPartition { partition => // PowerBI row limit in single request is 10,000. We limit it to 1000. partition.grouped(1000).foreach { group => { val powerbiRowListBuffer: ListBuffer[Map[String, Any]] = ListBuffer[Map[String, Any]]() group.foreach { record => { var powerbiRow: Map[String, Any] = Map[String, Any]() for (i <- 0 until record.length) { powerbiRow += (powerbiTable.columns(i).name -> record(i)) } powerbiRowListBuffer += powerbiRow } var attemptCount = 0 var pushSuccessful = false while (!pushSuccessful && attemptCount < this.retryCount) { try { PowerBIUtils.addMultipleRows(powerbiDatasetDetails, powerbiTable, powerbiRowListBuffer, authenticationToken) pushSuccessful = true } catch { case e: Exception => println(f"Exception inserting multiple rows: ${e.getMessage}") Thread.sleep(secondsBetweenRetry * 1000) attemptCount += 1 authenticationToken = powerBIAuthentication.refreshAccessToken } } } } } } } def countTimelineToPowerBI(powerbiDatasetDetails: PowerBIDatasetDetails, powerbiTable: table, powerBIAuthentication: PowerBIAuthentication): Unit = { var authenticationToken: String = powerBIAuthentication.getAccessToken val currentTimestamp = new Timestamp(new Date().getTime) val powerbiRow = Map(powerbiTable.columns.head.name -> currentTimestamp, powerbiTable.columns(1).name -> dataFrame.count()) var attemptCount = 0 var pushSuccessful = false while (!pushSuccessful && attemptCount < this.retryCount) { try { PowerBIUtils.addRow(powerbiDatasetDetails, powerbiTable, powerbiRow, authenticationToken) pushSuccessful = true } catch { case e: Exception => println("Exception inserting row: " + e.getMessage) Thread.sleep(secondsBetweenRetry * 1000) attemptCount += 1 authenticationToken = powerBIAuthentication.refreshAccessToken } } } private val retryCount: Int = 3 private val secondsBetweenRetry: Int = 1 } }
Example 110
Source File: RepositoryMetadata.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp.pretrained import java.sql.Timestamp case class RepositoryMetadata ( // Path to repository metadata file metadataFile: String, // Path to repository folder repoFolder: String, // Aws file metadata.json version version: String, // Last time metadata was downloaded lastMetadataDownloaded: Timestamp, // List of all available resources in repository metadata: List[ResourceMetadata] )
Example 111
Source File: TrainingHelper.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.util import java.io.File import java.nio.file.{Files, Paths, StandardCopyOption} import java.sql.Timestamp import java.util.Date import com.johnsnowlabs.nlp.pretrained.ResourceType.ResourceType import com.johnsnowlabs.nlp.pretrained.{ResourceMetadata, ResourceType} import org.apache.commons.io.FileUtils import org.apache.spark.ml.util.MLWriter object TrainingHelper { def saveModel(name: String, language: Option[String], libVersion: Option[Version], sparkVersion: Option[Version], modelWriter: MLWriter, folder: String, category: Option[ResourceType] = Some(ResourceType.NOT_DEFINED) ): Unit = { // 1. Get current timestamp val timestamp = new Timestamp(new Date().getTime) // 2. Save model to file val file = Paths.get(folder, timestamp.toString).toString.replaceAllLiterally("\\", "/") modelWriter.save(file) // 3. Zip file val tempzipFile = Paths.get(folder, timestamp + ".zip") ZipArchiveUtil.zip(file, tempzipFile.toString) // 4. Set checksum val checksum = FileHelper.generateChecksum(tempzipFile.toString) // 5. Create resource metadata val meta = new ResourceMetadata(name, language, libVersion, sparkVersion, true, timestamp, true, category = category, checksum) val zipfile = Paths.get(meta.fileName) // 6. Move the zip Files.move(tempzipFile, zipfile, StandardCopyOption.REPLACE_EXISTING) // 7. Remove original file try { FileUtils.deleteDirectory(new File(file)) } catch { case _: java.io.IOException => //file lock may prevent deletion, ignore and continue } // 6. Add to metadata.json info about resource val metadataFile = Paths.get(folder, "metadata.json").toString ResourceMetadata.addMetadataToFile(metadataFile, meta) } }
Example 112
Source File: CloudTestResources.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp.pretrained import java.sql.Timestamp import com.johnsnowlabs.util.Version object CloudTestResources { val name_en_123_345_new = new ResourceMetadata( "name", Some("en"), Some(Version(1, 2, 3)), Some(Version(3, 4, 5)), true, new Timestamp(50) ) val name_en_12_34_old = new ResourceMetadata( "name", Some("en"), Some(Version(1, 2)), Some(Version(3, 4)), true, new Timestamp(1) ) val name_en_old = new ResourceMetadata( "name", Some("en"), None, None, true, new Timestamp(1) ) val name_en_new_disabled = new ResourceMetadata( "name", Some("en"), None, None, false, new Timestamp(1) ) val name_de = new ResourceMetadata( "name", Some("de"), None, None, true, new Timestamp(1) ) val all = List(name_en_123_345_new, name_en_12_34_old, name_en_old, name_en_new_disabled, name_de) }
Example 113
Source File: ResourceDownloaderSpec.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp.pretrained import java.sql.Timestamp import com.johnsnowlabs.util.Version import org.scalatest.FlatSpec class ResourceDownloaderSpec extends FlatSpec { val b = CloudTestResources "CloudResourceMetadata" should "serialize and deserialize correctly" in { val resource = new ResourceMetadata("name", Some("en"), Some(Version(1,2,3)), Some(Version(5,4,3)), true, new Timestamp(123213)) val json = ResourceMetadata.toJson(resource) val deserialized = ResourceMetadata.parseJson(json) assert(deserialized == resource) } "CloudResourceDownloader" should "choose the newest versions" in { val found = ResourceMetadata.resolveResource(b.all, ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5))) assert(found.isDefined) assert(found.get == b.name_en_123_345_new) } "CloudResourceDownloader" should "filter disabled resources" in { val found = ResourceMetadata.resolveResource(List(b.name_en_new_disabled), ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5))) assert(found.isEmpty) } "CloudResourceDownloader" should "filter language and allow empty versions" in { val found = ResourceMetadata.resolveResource(List(b.name_en_old, b.name_de), ResourceRequest("name", Some("en"), "", Version(1, 2, 3), Version(3, 4, 5))) assert(found.isDefined) assert(found.get == b.name_en_old) } }
Example 114
Source File: TimeBasedDataService.scala From kafka-jdbc-connector with Apache License 2.0 | 5 votes |
package com.agoda.kafka.connector.jdbc.services import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp} import java.util.{Date, GregorianCalendar, TimeZone} import com.agoda.kafka.connector.jdbc.JdbcSourceConnectorConstants import com.agoda.kafka.connector.jdbc.models.DatabaseProduct import com.agoda.kafka.connector.jdbc.models.DatabaseProduct.{MsSQL, MySQL} import com.agoda.kafka.connector.jdbc.models.Mode.TimestampMode import com.agoda.kafka.connector.jdbc.utils.DataConverter import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer import scala.util.Try case class TimeBasedDataService(databaseProduct: DatabaseProduct, storedProcedureName: String, batchSize: Int, batchSizeVariableName: String, timestampVariableName: String, var timestampOffset: Long, timestampFieldName: String, topic: String, keyFieldOpt: Option[String], dataConverter: DataConverter, calendar: GregorianCalendar = new GregorianCalendar(TimeZone.getTimeZone("UTC")) ) extends DataService { override def createPreparedStatement(connection: Connection): Try[PreparedStatement] = Try { val preparedStatement = databaseProduct match { case MsSQL => connection.prepareStatement(s"EXECUTE $storedProcedureName @$timestampVariableName = ?, @$batchSizeVariableName = ?") case MySQL => connection.prepareStatement(s"CALL $storedProcedureName (@$timestampVariableName := ?, @$batchSizeVariableName := ?)") } preparedStatement.setTimestamp(1, new Timestamp(timestampOffset), calendar) preparedStatement.setObject(2, batchSize) preparedStatement } override def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]] = Try { val sourceRecords = ListBuffer.empty[SourceRecord] var max = timestampOffset while (resultSet.next()) { dataConverter.convertRecord(schema, resultSet) map { record => val time = record.get(timestampFieldName).asInstanceOf[Date].getTime max = if(time > max) { keyFieldOpt match { case Some(keyField) => sourceRecords += new SourceRecord( Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava, Map(TimestampMode.entryName -> time).asJava, topic, null, schema, record.get(keyField), schema, record ) case None => sourceRecords += new SourceRecord( Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava, Map(TimestampMode.entryName -> time).asJava, topic, schema, record ) } time } else max } } timestampOffset = max sourceRecords } override def toString: String = { s""" |{ | "name" : "${this.getClass.getSimpleName}" | "mode" : "${TimestampMode.entryName}" | "stored-procedure.name" : "$storedProcedureName" |} """.stripMargin } }
Example 115
Source File: OAuthAuthorizationTokensDal.scala From slick-akka-http-oauth2 with Apache License 2.0 | 5 votes |
package persistence.dals import java.security.SecureRandom import java.sql.Timestamp import org.joda.time.DateTime import persistence.entities.SlickTables.OauthAccessTokenTable import persistence.entities.{Account, OAuthAccessToken, OAuthClient} import slick.driver.H2Driver.api._ import slick.driver.JdbcProfile import utils.{Configuration, PersistenceModule} import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import scala.util.Random trait OAuthAccessTokensDal extends BaseDalImpl[OauthAccessTokenTable,OAuthAccessToken]{ def create(account: Account, client: OAuthClient): Future[OAuthAccessToken] def delete(account: Account, client: OAuthClient): Future[Int] def refresh(account: Account, client: OAuthClient): Future[OAuthAccessToken] def findByAccessToken(accessToken: String): Future[Option[OAuthAccessToken]] def findByAuthorized(account: Account, clientId: String): Future[Option[OAuthAccessToken]] def findByRefreshToken(refreshToken: String): Future[Option[OAuthAccessToken]] } class OAuthAccessTokensDalImpl (modules: Configuration with PersistenceModule)(implicit override val db: JdbcProfile#Backend#Database) extends OAuthAccessTokensDal { override def create(account: Account, client: OAuthClient): Future[OAuthAccessToken] = { def randomString(length: Int) = new Random(new SecureRandom()).alphanumeric.take(length).mkString val accessToken = randomString(40) val refreshToken = randomString(40) val createdAt = new Timestamp(new DateTime().getMillis) val oauthAccessToken = new OAuthAccessToken( id = 0, accountId = account.id, oauthClientId = client.id, accessToken = accessToken, refreshToken = refreshToken, createdAt = createdAt ) insert(oauthAccessToken).map(id => oauthAccessToken.copy(id = id)) } override def delete(account: Account, client: OAuthClient): Future[Int] = { deleteByFilter( oauthToken => oauthToken.accountId === account.id && oauthToken.oauthClientId === client.id) } override def refresh(account: Account, client: OAuthClient): Future[OAuthAccessToken] = { delete(account, client) create(account, client) } override def findByAuthorized(account: Account, clientId: String): Future[Option[OAuthAccessToken]] = { val query = for { oauthClient <- modules.oauthClientsDal.tableQ token <- tableQ if oauthClient.id === token.oauthClientId && oauthClient.clientId === clientId && token.accountId === account.id } yield token db.run(query.result).map(_.headOption) } override def findByAccessToken(accessToken: String): Future[Option[OAuthAccessToken]] = { findByFilter(_.accessToken === accessToken).map(_.headOption) } override def findByRefreshToken(refreshToken: String): Future[Option[OAuthAccessToken]] = { val expireAt = new Timestamp(new DateTime().minusMonths(1).getMillis) findByFilter( token => token.refreshToken === refreshToken && token.createdAt > expireAt).map(_.headOption) } }
Example 116
Source File: OAuthAuthorizationCodesDal.scala From slick-akka-http-oauth2 with Apache License 2.0 | 5 votes |
package persistence.dals import java.sql.Timestamp import org.joda.time.DateTime import persistence.entities.OAuthAuthorizationCode import persistence.entities.SlickTables.OauthAuthorizationCodeTable import slick.driver.H2Driver.api._ import slick.driver.JdbcProfile import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future trait OAuthAuthorizationCodesDal extends BaseDalImpl[OauthAuthorizationCodeTable,OAuthAuthorizationCode]{ def findByCode(code: String): Future[Option[OAuthAuthorizationCode]] def delete(code: String): Future[Int] } class OAuthAuthorizationCodesDalImpl()(implicit override val db: JdbcProfile#Backend#Database) extends OAuthAuthorizationCodesDal { override def findByCode(code: String): Future[Option[OAuthAuthorizationCode]] = { val expireAt = new Timestamp(new DateTime().minusMinutes(30).getMillis) findByFilter(authCode => authCode.code === code && authCode.createdAt > expireAt).map(_.headOption) } override def delete(code: String): Future[Int] = deleteByFilter(_.code === code) }
Example 117
Source File: Boot.scala From slick-akka-http-oauth2 with Apache License 2.0 | 5 votes |
import java.sql.Timestamp import akka.http.scaladsl.Http import akka.http.scaladsl.server.RouteConcatenation import akka.stream.ActorMaterializer import org.joda.time.DateTime import persistence.entities.{Account, OAuthClient} import rest.OAuthRoutes import utils._ object Main extends App with RouteConcatenation { // configuring modules for application, cake pattern for DI val modules = new ConfigurationModuleImpl with ActorModuleImpl with PersistenceModuleImpl implicit val system = modules.system implicit val materializer = ActorMaterializer() implicit val ec = modules.system.dispatcher modules.generateDDL() for { createAccounts <- modules.accountsDal.insert(Seq( Account(0, "[email protected]", "48181acd22b3edaebc8a447868a7df7ce629920a", new Timestamp(new DateTime().getMillis)) // password:bob )) createOauthClients <- modules.oauthClientsDal.insert(Seq( OAuthClient(0, 1, "client_credentials", "bob_client_id", "bob_client_secret", Some("redirectUrl"), new Timestamp(new DateTime().getMillis)))) } yield { println(s"Database initialized with default values for bob and alice") } val bindingFuture = Http().bindAndHandle( new OAuthRoutes(modules).routes, "localhost", 8080) println(s"Server online at http://localhost:8080/") }
Example 118
Source File: Schema.scala From osmesa with Apache License 2.0 | 5 votes |
package osmesa.analytics.updater import java.sql.Timestamp import java.time.Instant import geotrellis.vectortile.Layer import org.apache.log4j.Logger import osmesa.analytics.updater.Implicits._ trait Schema { val layer: Layer val features: Map[String, (Option[AugmentedDiffFeature], AugmentedDiffFeature)] val newFeatures: Seq[VTFeature] lazy val replacementFeatures: Seq[VTFeature] = Seq.empty[VTFeature] lazy val retainedFeatures: Seq[VTFeature] = Seq.empty[VTFeature] protected lazy val logger: Logger = Logger.getLogger(getClass) protected lazy val touchedFeatures: Map[String, Seq[VTFeature]] = Map.empty[String, Seq[VTFeature]] protected lazy val versionInfo: Map[String, (Int, Int, Timestamp)] = touchedFeatures .mapValues(_.last) .mapValues( f => ( f.data("__version").toInt, f.data("__minorVersion").toInt, Timestamp.from(Instant.ofEpochMilli(f.data("__updated"))) )) protected lazy val minorVersions: Map[String, Int] = features .mapValues { case (_, curr) => curr.data } .map { case (id, f) => versionInfo.get(id) match { case Some((prevVersion, _, _)) if prevVersion < f.version => (id, 0) case Some((prevVersion, prevMinorVersion, _)) if prevVersion == f.version => (id, prevMinorVersion + 1) case _ => (id, 0) } } } trait SchemaBuilder { val layerName: String def apply(layer: Layer, features: Map[String, (Option[AugmentedDiffFeature], AugmentedDiffFeature)]): Schema }
Example 119
Source File: PostgresBookingViewRepository.scala From ticket-booking-aecor with Apache License 2.0 | 5 votes |
package ru.pavkin.booking.booking.view import java.sql.Timestamp import java.time.Instant import cats.Monad import cats.implicits._ import doobie._ import doobie.implicits._ import doobie.util.transactor.Transactor import io.circe.{ Decoder, Encoder, Json } import io.circe.parser._ import org.postgresql.util.PGobject import ru.pavkin.booking.common.models._ class PostgresBookingViewRepository[F[_]: Monad](transactor: Transactor[F], tableName: String = "bookings") extends BookingViewRepository[F] { implicit val jsonMeta: Meta[Json] = Meta.Advanced .other[PGobject]("json") .timap[Json](a => parse(a.getValue).leftMap[Json](e => throw e).merge)(a => { val o = new PGobject o.setType("json") o.setValue(a.noSpaces) o }) implicit val seatsMeta: Meta[List[Seat]] = jsonMeta.timap( j => Decoder[List[Seat]].decodeJson(j).right.get )(s => Encoder[List[Seat]].apply(s)) implicit val ticketsMeta: Meta[List[Ticket]] = jsonMeta.timap( j => Decoder[List[Ticket]].decodeJson(j).right.get )(s => Encoder[List[Ticket]].apply(s)) implicit val instantMeta: Meta[Instant] = Meta[Timestamp].timap(_.toInstant)(Timestamp.from) implicit val bookingStatusMeta: Meta[BookingStatus] = Meta[String].timap(BookingStatus.withName)(_.entryName) def get(bookingId: BookingKey): F[Option[BookingView]] = queryView(bookingId).option.transact(transactor) def byClient(clientId: ClientId): F[List[BookingView]] = queryForClient(clientId).to[List].transact(transactor) def set(view: BookingView): F[Unit] = Update[BookingView](setViewQuery).run(view).transact(transactor).void def expired(now: Instant): fs2.Stream[F, BookingKey] = queryExpired(now).stream.transact(transactor) def createTable: F[Unit] = createTableQuery.transact(transactor).void private val setViewQuery = s"""INSERT INTO $tableName (booking_id, client_id, concert_id, seats, tickets, status, confirmed_at, expires_at, version) VALUES (?,?,?,?,?,?,?,?,?) ON CONFLICT (booking_id) DO UPDATE SET tickets = EXCLUDED.tickets, status = EXCLUDED.status, confirmed_at = EXCLUDED.confirmed_at, expires_at = EXCLUDED.expires_at, version = EXCLUDED.version;""" private def queryView(bookingId: BookingKey) = (fr"SELECT * FROM " ++ Fragment.const(tableName) ++ fr"WHERE booking_id = $bookingId;") .query[BookingView] private def queryExpired(now: Instant) = (fr"SELECT booking_id FROM " ++ Fragment.const(tableName) ++ fr"WHERE status = ${BookingStatus.Confirmed: BookingStatus} AND expires_at < $now;") .query[BookingKey] private def queryForClient(clientId: ClientId) = (fr"SELECT * FROM " ++ Fragment.const(tableName) ++ fr"WHERE client_id = $clientId;") .query[BookingView] private val createTableQuery = (fr""" CREATE TABLE IF NOT EXISTS """ ++ Fragment.const(tableName) ++ fr""" ( booking_id text NOT NULL PRIMARY KEY, client_id text NOT NULL, concert_id text NOT NULL, seats json NOT NULL, tickets json NOT NULL, status text NOT NULL, confirmed_at timestamptz, expires_at timestamptz, version bigint NOT NULL ); """).update.run }
Example 120
Source File: SetDifferenceAndFilter.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.util import java.sql.Timestamp import org.apache.spark.sql.{Dataset, SparkSession} object SetDifferenceAndFilter { def apply(uuids1: Dataset[KeyFields], uuids2: Dataset[KeyFields], consistencyThreshold: Long, filterOutMeta: Boolean = false) (implicit spark: SparkSession): Dataset[KeyFields] = { import spark.implicits._ // The original setDifference implementation used the SQL except function, but that ignores any pre-partitioning. // The next implementation used a left-anti join, but that created a weird execution plan that caused poor performance. // The current implementation uses a outer join - which uses an efficient sort-merge join. def setDifference(uuids1: Dataset[KeyFields], uuids2: Dataset[KeyFields]): Dataset[KeyFields] = uuids1.join(uuids2, uuids1("uuid") === uuids2("uuid"), "left_outer") .filter(uuids2("uuid").isNull) .select(uuids1("*")) .as[KeyFields] // Calculate the set difference between the two sets of uuids. // The anti-join produces just the left side, and only the ones that are not in the right side. val positives = setDifference(uuids1, uuids2) val timeToConsistencyFilter = positives("lastModified") < new Timestamp(consistencyThreshold) val overallFilter = if (filterOutMeta) timeToConsistencyFilter && (positives("path") =!= "/" && positives("path") =!= "/meta" && !positives("path").startsWith("/meta/")) else timeToConsistencyFilter // Filter out any positives that occurred after the current threshold positives.filter(overallFilter) } }
Example 121
Source File: RowComparer.scala From spark-fast-tests with MIT License | 5 votes |
package com.github.mrpowers.spark.fast.tests import org.apache.spark.sql.Row import java.sql.Timestamp import scala.math.abs object RowComparer { def areRowsEqual(r1: Row, r2: Row, tol: Double): Boolean = { if (r1.length != r2.length) { return false } else { (0 until r1.length).foreach(idx => { if (r1.isNullAt(idx) != r2.isNullAt(idx)) { return false } if (!r1.isNullAt(idx)) { val o1 = r1.get(idx) val o2 = r2.get(idx) o1 match { case b1: Array[Byte] => if (!java.util.Arrays.equals( b1, o2.asInstanceOf[Array[Byte]] )) { return false } case f1: Float => if (java.lang.Float.isNaN(f1) != java.lang.Float.isNaN(o2.asInstanceOf[Float])) { return false } if (abs(f1 - o2.asInstanceOf[Float]) > tol) { return false } case d1: Double => if (java.lang.Double.isNaN(d1) != java.lang.Double.isNaN(o2.asInstanceOf[Double])) { return false } if (abs(d1 - o2.asInstanceOf[Double]) > tol) { return false } case d1: java.math.BigDecimal => if (d1.compareTo(o2.asInstanceOf[java.math.BigDecimal]) != 0) { return false } case t1: Timestamp => if (abs(t1.getTime - o2.asInstanceOf[Timestamp].getTime) > tol) { return false } case _ => if (o1 != o2) return false } } }) } true } }
Example 122
Source File: BigQueryUtilsSpec.scala From comet-data-pipeline with Apache License 2.0 | 5 votes |
package com.ebiznext.comet.utils.conversion import java.sql.{Date, Timestamp} import com.ebiznext.comet.TestHelper import com.ebiznext.comet.config.SparkEnv import com.ebiznext.comet.utils.conversion.BigQueryUtils._ import com.ebiznext.comet.utils.conversion.syntax._ import org.apache.spark.sql.SparkSession import com.google.cloud.bigquery.{Field, StandardSQLTypeName, Schema => BQSchema} class BigQueryUtilsSpec extends TestHelper { new WithSettings() { val sparkEnv: SparkEnv = new SparkEnv("test") val session: SparkSession = sparkEnv.session import session.implicits._ "Spark Types" should "be converted to corresponding BQ Types" in { val res: BQSchema = List( ( 1, true, 2.5, "hello", 'x'.asInstanceOf[Byte], new Date(System.currentTimeMillis()), new Timestamp(System.currentTimeMillis()) ) ).toDF().to[BQSchema] //Schema{fields=[Field{name=value, type=INTEGER, mode=NULLABLE, description=, policyTags=null}]} val fields = List( Field .newBuilder("_1", StandardSQLTypeName.INT64) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_2", StandardSQLTypeName.BOOL) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_3", StandardSQLTypeName.FLOAT64) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_4", StandardSQLTypeName.STRING) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_5", StandardSQLTypeName.INT64) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_6", StandardSQLTypeName.DATE) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_7", StandardSQLTypeName.TIMESTAMP) .setDescription("") .setMode(Field.Mode.NULLABLE) .build() ) res.getFields should contain theSameElementsInOrderAs fields } } }
Example 123
Source File: StructuredNetworkWordCountWindowed.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.sql.streaming import java.sql.Timestamp import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ object StructuredNetworkWordCountWindowed { def main(args: Array[String]) { if (args.length < 3) { System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" + " <window duration in seconds> [<slide duration in seconds>]") System.exit(1) } val host = args(0) val port = args(1).toInt val windowSize = args(2).toInt val slideSize = if (args.length == 3) windowSize else args(3).toInt if (slideSize > windowSize) { System.err.println("<slide duration> must be less than or equal to <window duration>") } val windowDuration = s"$windowSize seconds" val slideDuration = s"$slideSize seconds" val spark = SparkSession .builder .appName("StructuredNetworkWordCountWindowed") .getOrCreate() import spark.implicits._ // Create DataFrame representing the stream of input lines from connection to host:port val lines = spark.readStream .format("socket") .option("host", host) .option("port", port) .option("includeTimestamp", true) .load() // Split the lines into words, retaining timestamps val words = lines.as[(String, Timestamp)].flatMap(line => line._1.split(" ").map(word => (word, line._2)) ).toDF("word", "timestamp") // Group the data by window and word and compute the count of each group val windowedCounts = words.groupBy( window($"timestamp", windowDuration, slideDuration), $"word" ).count().orderBy("window") // Start running the query that prints the windowed word counts to the console val query = windowedCounts.writeStream .outputMode("complete") .format("console") .option("truncate", "false") .start() query.awaitTermination() } } // scalastyle:on println
Example 124
Source File: SchemaColumnSelection.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import scala.reflect.runtime.universe.TypeTag import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.{rand, udf} case class SchemaColumnSelection[T](override val name: String, values: List[T])(implicit tag: TypeTag[T]) extends SchemaColumn { override def column(rowID: Option[Column] = None): Column = { val intToSelectionUDF = udf((index: Int) => { values(index) }) intToSelectionUDF(rand() * values.length % values.length) } } object SchemaColumnSelectionProtocol extends SchemaColumnSelectionProtocol trait SchemaColumnSelectionProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnSelectionFormat extends YamlFormat[SchemaColumnSelection[_]] { override def read(yaml: YamlValue): SchemaColumnSelection[_] = { val fields = yaml.asYamlObject.fields val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set")) val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val values = fields.getOrElse(YamlString("values"), deserializationError("selection values not set")) dataType match { case SchemaColumnDataType.Int => SchemaColumnSelection(name, values.convertTo[List[Int]]) case SchemaColumnDataType.Long => SchemaColumnSelection(name, values.convertTo[List[Long]]) case SchemaColumnDataType.Float => SchemaColumnSelection(name, values.convertTo[List[Float]]) case SchemaColumnDataType.Double => SchemaColumnSelection(name, values.convertTo[List[Double]]) case SchemaColumnDataType.Date => SchemaColumnSelection(name, values.convertTo[List[Date]]) case SchemaColumnDataType.Timestamp => SchemaColumnSelection(name, values.convertTo[List[Timestamp]]) case SchemaColumnDataType.String => SchemaColumnSelection(name, values.convertTo[List[String]]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Selection}") } } override def write(obj: SchemaColumnSelection[_]): YamlValue = ??? } }
Example 125
Source File: SchemaColumnRandom.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.{to_utc_timestamp, round, rand, from_unixtime, to_date} import org.apache.spark.sql.types.{IntegerType, LongType} trait SchemaColumnRandom[T] extends SchemaColumn object SchemaColumnRandom { val FloatDP = 3 val DoubleDP = 3 def apply(name: String, min: Int, max: Int): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Long, max: Long): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Float, max: Float): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Double, max: Double): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Date, max: Date): SchemaColumn = SchemaColumnRandomDate(name, min, max) def apply(name: String, min: Timestamp, max: Timestamp): SchemaColumn = SchemaColumnRandomTimestamp(name, min, max) def apply(name: String): SchemaColumn = SchemaColumnRandomBoolean(name) } private case class SchemaColumnRandomNumeric[T: Numeric](override val name: String, min: T, max: T) extends SchemaColumnRandom[T] { override def column(rowID: Option[Column] = None): Column = { import Numeric.Implicits._ (min, max) match { case (_: Int, _: Int) => round(rand() * (max - min) + min, 0).cast(IntegerType) case (_: Long, _: Long) => round(rand() * (max - min) + min, 0).cast(LongType) case (_: Float, _: Float) => round(rand() * (max - min) + min, SchemaColumnRandom.FloatDP) case (_: Double, _: Double) => round(rand() * (max - min) + min, SchemaColumnRandom.DoubleDP) } } } private case class SchemaColumnRandomTimestamp(override val name: String, min: Timestamp, max: Timestamp) extends SchemaColumnRandom[Timestamp] { override def column(rowID: Option[Column] = None): Column = { val minTime = min.getTime / 1000 val maxTime = max.getTime / 1000 to_utc_timestamp(from_unixtime(rand() * (maxTime - minTime) + minTime), "UTC") } } private case class SchemaColumnRandomDate(override val name: String, min: Date, max: Date) extends SchemaColumnRandom[Date] { val timestamp = SchemaColumnRandomTimestamp(name, new Timestamp(min.getTime), new Timestamp(max.getTime + 86400000)) override def column(rowID: Option[Column] = None): Column = to_date(timestamp.column()) } private case class SchemaColumnRandomBoolean(override val name: String) extends SchemaColumnRandom[Boolean] { override def column(rowID: Option[Column] = None): Column = rand() < 0.5f } object SchemaColumnRandomProtocol extends SchemaColumnRandomProtocol trait SchemaColumnRandomProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnRandomFormat extends YamlFormat[SchemaColumnRandom[_]] { override def read(yaml: YamlValue): SchemaColumnRandom[_] = { val fields = yaml.asYamlObject.fields val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name")) if (dataType == SchemaColumnDataType.Boolean) { SchemaColumnRandomBoolean(name) } else { val min = fields.getOrElse(YamlString("min"), deserializationError(s"min not set for $name")) val max = fields.getOrElse(YamlString("max"), deserializationError(s"max not set for $name")) dataType match { case SchemaColumnDataType.Int => SchemaColumnRandomNumeric(name, min.convertTo[Int], max.convertTo[Int]) case SchemaColumnDataType.Long => SchemaColumnRandomNumeric(name, min.convertTo[Long], max.convertTo[Long]) case SchemaColumnDataType.Float => SchemaColumnRandomNumeric(name, min.convertTo[Float], max.convertTo[Float]) case SchemaColumnDataType.Double => SchemaColumnRandomNumeric(name, min.convertTo[Double], max.convertTo[Double]) case SchemaColumnDataType.Date => SchemaColumnRandomDate(name, min.convertTo[Date], max.convertTo[Date]) case SchemaColumnDataType.Timestamp => SchemaColumnRandomTimestamp(name, min.convertTo[Timestamp], max.convertTo[Timestamp]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Random}") } } } override def write(obj: SchemaColumnRandom[_]): YamlValue = ??? } }
Example 126
Source File: SchemaColumnSequential.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.{to_utc_timestamp, from_unixtime, monotonically_increasing_id, to_date} trait SchemaColumnSequential[T] extends SchemaColumn object SchemaColumnSequential { def apply(name: String, start: Int, step: Int): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Long, step: Long): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Float, step: Float): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Double, step: Double): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Date, step: Int): SchemaColumn = SchemaColumnSequentialDate(name, start, step) def apply(name: String, start: Timestamp, step: Int): SchemaColumn = SchemaColumnSequentialTimestamp(name, start, step) } private case class SchemaColumnSequentialNumeric[T: Numeric](override val name: String, start: T, step: T) extends SchemaColumnSequential[T] { override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = (rowID.get * step) + start } private case class SchemaColumnSequentialTimestamp(override val name: String, start: Timestamp, stepSeconds: Int) extends SchemaColumnSequential[Timestamp] { override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = { val startTime = start.getTime / 1000 to_utc_timestamp(from_unixtime(rowID.get * stepSeconds + startTime), "UTC") } } private case class SchemaColumnSequentialDate(override val name: String, start: Date, stepDays: Int) extends SchemaColumnSequential[Date] { val timestamp = SchemaColumnSequentialTimestamp(name, new Timestamp(start.getTime), stepDays * 86400) override def column(rowID: Option[Column]): Column = to_date(timestamp.column()) } object SchemaColumnSequentialProtocol extends SchemaColumnSequentialProtocol trait SchemaColumnSequentialProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnSequentialFormat extends YamlFormat[SchemaColumnSequential[_]] { override def read(yaml: YamlValue): SchemaColumnSequential[_] = { val fields = yaml.asYamlObject.fields val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set")) val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val start = fields.getOrElse(YamlString("start"), deserializationError("start not set")) val step = fields.getOrElse(YamlString("step"), deserializationError("step not set")) dataType match { case "Int" => SchemaColumnSequentialNumeric(name, start.convertTo[Int], step.convertTo[Int]) case "Long" => SchemaColumnSequentialNumeric(name, start.convertTo[Long], step.convertTo[Long]) case "Float" => SchemaColumnSequentialNumeric(name, start.convertTo[Float], step.convertTo[Float]) case "Double" => SchemaColumnSequentialNumeric(name, start.convertTo[Double], step.convertTo[Double]) case "Date" => SchemaColumnSequentialDate(name, start.convertTo[Date], step.convertTo[Int]) case "Timestamp" => SchemaColumnSequentialTimestamp(name, start.convertTo[Timestamp], step.convertTo[Int]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Sequential}") } } override def write(obj: SchemaColumnSequential[_]): YamlValue = ??? } }
Example 127
Source File: SchemaColumnFixed.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.lit case class SchemaColumnFixed[T](override val name: String, value: T) extends SchemaColumn { override def column(rowID: Option[Column] = None): Column = lit(value) } object SchemaColumnFixedProtocol extends SchemaColumnFixedProtocol trait SchemaColumnFixedProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnFixedFormat extends YamlFormat[SchemaColumnFixed[_]] { override def read(yaml: YamlValue): SchemaColumnFixed[_] = { val fields = yaml.asYamlObject.fields val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name")) val value = fields.getOrElse(YamlString("value"), deserializationError(s"value not set for $name")) dataType match { case SchemaColumnDataType.Int => SchemaColumnFixed(name, value.convertTo[Int]) case SchemaColumnDataType.Long => SchemaColumnFixed(name, value.convertTo[Long]) case SchemaColumnDataType.Float => SchemaColumnFixed(name, value.convertTo[Float]) case SchemaColumnDataType.Double => SchemaColumnFixed(name, value.convertTo[Double]) case SchemaColumnDataType.Date => SchemaColumnFixed(name, value.convertTo[Date]) case SchemaColumnDataType.Timestamp => SchemaColumnFixed(name, value.convertTo[Timestamp]) case SchemaColumnDataType.String => SchemaColumnFixed(name, value.convertTo[String]) case SchemaColumnDataType.Boolean => SchemaColumnFixed(name, value.convertTo[Boolean]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Fixed}") } } override def write(obj: SchemaColumnFixed[_]): YamlValue = ??? } }
Example 128
Source File: YamlParserTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class YamlParserTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._ import net.jcazevedo.moultingyaml._ "YamlParser" must { "convert a YamlDate to java.sql.Date" in { val date = "1998-06-03" val string = s"""$date""".stripMargin string.parseYaml.convertTo[Date] mustBe Date.valueOf(date) } "convert a YamlDate to java.sql.Timestamp" in { val timestamp = "1998-06-03 01:23:45" val string = s"""$timestamp""".stripMargin string.parseYaml.convertTo[Timestamp] mustBe Timestamp.valueOf(timestamp) } } }
Example 129
Source File: SchemaColumnFixedTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class SchemaColumnFixedTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnFixedProtocol._ import net.jcazevedo.moultingyaml._ val name = "test" val column_type = "Fixed" val baseString = s"""name: $name |column_type: $column_type """.stripMargin "SchemaColumnFixed" must { "read an Int column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Int} |value: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1) } "read a Long column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Long} |value: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1l) } "read a Float column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Float} |value: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1f) } "read a Double column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Double} |value: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1d) } "read a Date column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Date} |value: 1998-06-03 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Date.valueOf("1998-06-03")) } "read a Timestamp column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Timestamp} |value: 1998-06-03 01:23:45 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Timestamp.valueOf("1998-06-03 01:23:45")) } "read a String column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.String} |value: test """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, "test") } "read a Boolean column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Boolean} |value: true """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, true) } } }
Example 130
Source File: SchemaColumnSequentialTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class SchemaColumnSequentialTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnSequentialProtocol._ import net.jcazevedo.moultingyaml._ val name = "test" val column_type = "Sequential" val baseString = s"""name: $name |column_type: $column_type """.stripMargin "SchemaColumnSequential" must { "read an Int column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Int} |start: 1 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1, 1) } "read a Long column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Long} |start: 1 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1l, 1l) } "read a Float column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Float} |start: 1.0 |step: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1f, 1f) } "read a Double column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Double} |start: 1.0 |step: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1d, 1d) } "read a Date column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Date} |start: 1998-06-03 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Date.valueOf("1998-06-03"), 1) } "read a Timestamp column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Timestamp} |start: 1998-06-03 01:23:45 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Timestamp.valueOf("1998-06-03 01:23:45"), 1) } } }
Example 131
Source File: ArgsParserTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class ArgsParserTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._ import net.jcazevedo.moultingyaml._ "ArgsParser" must { "accepts --file arg" in { ArgsParser.parseArgs(List("--file", "test")) mustBe Map("file" -> "test") } "accepts --database arg" in { ArgsParser.parseArgs(List("--database", "test")) mustBe Map("database" -> "test") } } }
Example 132
Source File: literals.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ object Literal { def apply(v: Any): Literal = v match { case i: Int => Literal(i, IntegerType) case l: Long => Literal(l, LongType) case d: Double => Literal(d, DoubleType) case f: Float => Literal(f, FloatType) case b: Byte => Literal(b, ByteType) case s: Short => Literal(s, ShortType) case s: String => Literal(UTF8String(s), StringType) case b: Boolean => Literal(b, BooleanType) case d: BigDecimal => Literal(Decimal(d), DecimalType.Unlimited) case d: java.math.BigDecimal => Literal(Decimal(d), DecimalType.Unlimited) case d: Decimal => Literal(d, DecimalType.Unlimited) case t: Timestamp => Literal(t, TimestampType) case d: Date => Literal(DateUtils.fromJavaDate(d), DateType) case a: Array[Byte] => Literal(a, BinaryType) case null => Literal(null, NullType) case _ => throw new RuntimeException("Unsupported literal type " + v.getClass + " " + v) } def create(v: Any, dataType: DataType): Literal = { Literal(CatalystTypeConverters.convertToCatalyst(v), dataType) } } case class Literal protected (value: Any, dataType: DataType) extends LeafExpression { override def foldable: Boolean = true override def nullable: Boolean = value == null override def toString: String = if (value != null) value.toString else "null" type EvaluatedType = Any override def eval(input: Row): Any = value } // TODO: Specialize case class MutableLiteral(var value: Any, dataType: DataType, nullable: Boolean = true) extends LeafExpression { type EvaluatedType = Any def update(expression: Expression, input: Row): Unit = { value = expression.eval(input) } override def eval(input: Row): Any = value }
Example 133
Source File: DataFrameDateSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.sql.{Date, Timestamp} class DataFrameDateTimeSuite extends QueryTest { private lazy val ctx = org.apache.spark.sql.test.TestSQLContext import ctx.implicits._ test("timestamp comparison with date strings") { val df = Seq( (1, Timestamp.valueOf("2015-01-01 00:00:00")), (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t") checkAnswer( df.select("t").filter($"t" <= "2014-06-01"), Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil) checkAnswer( df.select("t").filter($"t" >= "2014-06-01"), Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil) } test("date comparison with date strings") { val df = Seq( (1, Date.valueOf("2015-01-01")), (2, Date.valueOf("2014-01-01"))).toDF("i", "t") checkAnswer( df.select("t").filter($"t" <= "2014-06-01"), Row(Date.valueOf("2014-01-01")) :: Nil) checkAnswer( df.select("t").filter($"t" >= "2015"), Row(Date.valueOf("2015-01-01")) :: Nil) } }
Example 134
Source File: ColumnarTestUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.columnar import java.sql.Timestamp import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericMutableRow import org.apache.spark.sql.types.{UTF8String, DataType, Decimal, AtomicType} object ColumnarTestUtils { def makeNullRow(length: Int): GenericMutableRow = { val row = new GenericMutableRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case STRING => UTF8String(Random.nextString(Random.nextInt(32))) case BOOLEAN => Random.nextBoolean() case BINARY => randomBytes(Random.nextInt(32)) case DATE => Random.nextInt() case TIMESTAMP => val timestamp = new Timestamp(Random.nextLong()) timestamp.setNanos(Random.nextInt(999999999)) timestamp case _ => // Using a random one-element map instead of an arbitrary object Map(Random.nextInt() -> Random.nextString(Random.nextInt(32))) }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_ <: DataType, _], tail: ColumnType[_ <: DataType, _]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[T <: DataType, JvmType]( columnType: ColumnType[T, JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_ <: DataType, _], tail: ColumnType[_ <: DataType, _]*): Row = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Row = { val row = new GenericMutableRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericMutableRow(1) row(0) = value row } (values, rows) } }
Example 135
Source File: ArrayEncoders.scala From quill with Apache License 2.0 | 5 votes |
package io.getquill.context.jasync import java.sql.Timestamp import java.time.LocalDate import java.util.Date import io.getquill.PostgresJAsyncContext import io.getquill.context.sql.encoding.ArrayEncoding import org.joda.time.{ DateTime => JodaDateTime, LocalDate => JodaLocalDate, LocalDateTime => JodaLocalDateTime } trait ArrayEncoders extends ArrayEncoding { self: PostgresJAsyncContext[_] => implicit def arrayStringEncoder[Col <: Seq[String]]: Encoder[Col] = arrayRawEncoder[String, Col] implicit def arrayBigDecimalEncoder[Col <: Seq[BigDecimal]]: Encoder[Col] = arrayRawEncoder[BigDecimal, Col] implicit def arrayBooleanEncoder[Col <: Seq[Boolean]]: Encoder[Col] = arrayRawEncoder[Boolean, Col] implicit def arrayByteEncoder[Col <: Seq[Byte]]: Encoder[Col] = arrayRawEncoder[Byte, Col] implicit def arrayShortEncoder[Col <: Seq[Short]]: Encoder[Col] = arrayRawEncoder[Short, Col] implicit def arrayIntEncoder[Col <: Seq[Index]]: Encoder[Col] = arrayRawEncoder[Index, Col] implicit def arrayLongEncoder[Col <: Seq[Long]]: Encoder[Col] = arrayRawEncoder[Long, Col] implicit def arrayFloatEncoder[Col <: Seq[Float]]: Encoder[Col] = arrayRawEncoder[Float, Col] implicit def arrayDoubleEncoder[Col <: Seq[Double]]: Encoder[Col] = arrayRawEncoder[Double, Col] implicit def arrayDateEncoder[Col <: Seq[Date]]: Encoder[Col] = arrayEncoder[Date, Col](d => Timestamp.from(d.toInstant)) implicit def arrayJodaDateTimeEncoder[Col <: Seq[JodaDateTime]]: Encoder[Col] = arrayEncoder[JodaDateTime, Col](_.toLocalDateTime) implicit def arrayJodaLocalDateTimeEncoder[Col <: Seq[JodaLocalDateTime]]: Encoder[Col] = arrayRawEncoder[JodaLocalDateTime, Col] implicit def arrayJodaLocalDateEncoder[Col <: Seq[JodaLocalDate]]: Encoder[Col] = arrayRawEncoder[JodaLocalDate, Col] implicit def arrayLocalDateEncoder[Col <: Seq[LocalDate]]: Encoder[Col] = arrayEncoder[LocalDate, Col](encodeLocalDate.f) def arrayEncoder[T, Col <: Seq[T]](mapper: T => Any): Encoder[Col] = encoder[Col]((col: Col) => col.toIndexedSeq.map(mapper).mkString("{", ",", "}"), SqlTypes.ARRAY) def arrayRawEncoder[T, Col <: Seq[T]]: Encoder[Col] = arrayEncoder[T, Col](identity) }
Example 136
Source File: FinagleMysqlEncoders.scala From quill with Apache License 2.0 | 5 votes |
package io.getquill.context.finagle.mysql import java.sql.Timestamp import java.time.{ LocalDate, LocalDateTime } import java.util.{ Date, UUID } import com.twitter.finagle.mysql.CanBeParameter._ import com.twitter.finagle.mysql.Parameter.wrap import com.twitter.finagle.mysql._ import io.getquill.FinagleMysqlContext trait FinagleMysqlEncoders { this: FinagleMysqlContext[_] => type Encoder[T] = FinagleMySqlEncoder[T] case class FinagleMySqlEncoder[T](encoder: BaseEncoder[T]) extends BaseEncoder[T] { override def apply(index: Index, value: T, row: PrepareRow) = encoder(index, value, row) } def encoder[T](f: T => Parameter): Encoder[T] = FinagleMySqlEncoder((index, value, row) => row :+ f(value)) def encoder[T](implicit cbp: CanBeParameter[T]): Encoder[T] = encoder[T]((v: T) => v: Parameter) private[this] val nullEncoder = encoder((_: Null) => Parameter.NullParameter) implicit def optionEncoder[T](implicit e: Encoder[T]): Encoder[Option[T]] = FinagleMySqlEncoder { (index, value, row) => value match { case None => nullEncoder.encoder(index, null, row) case Some(v) => e.encoder(index, v, row) } } implicit def mappedEncoder[I, O](implicit mapped: MappedEncoding[I, O], e: Encoder[O]): Encoder[I] = FinagleMySqlEncoder(mappedBaseEncoder(mapped, e.encoder)) implicit val stringEncoder: Encoder[String] = encoder[String] implicit val bigDecimalEncoder: Encoder[BigDecimal] = encoder[BigDecimal] { (value: BigDecimal) => BigDecimalValue(value): Parameter } implicit val booleanEncoder: Encoder[Boolean] = encoder[Boolean] implicit val byteEncoder: Encoder[Byte] = encoder[Byte] implicit val shortEncoder: Encoder[Short] = encoder[Short] implicit val intEncoder: Encoder[Int] = encoder[Int] implicit val longEncoder: Encoder[Long] = encoder[Long] implicit val floatEncoder: Encoder[Float] = encoder[Float] implicit val doubleEncoder: Encoder[Double] = encoder[Double] implicit val byteArrayEncoder: Encoder[Array[Byte]] = encoder[Array[Byte]] implicit val dateEncoder: Encoder[Date] = encoder[Date] { (value: Date) => timestampValue(new Timestamp(value.getTime)): Parameter } implicit val localDateEncoder: Encoder[LocalDate] = encoder[LocalDate] { (d: LocalDate) => DateValue(java.sql.Date.valueOf(d)): Parameter } implicit val localDateTimeEncoder: Encoder[LocalDateTime] = encoder[LocalDateTime] { (d: LocalDateTime) => timestampValue(new Timestamp(d.atZone(injectionTimeZone.toZoneId).toInstant.toEpochMilli)): Parameter } implicit val uuidEncoder: Encoder[UUID] = mappedEncoder(MappedEncoding(_.toString), stringEncoder) }
Example 137
Source File: ArrayDecoders.scala From quill with Apache License 2.0 | 5 votes |
package io.getquill.context.jdbc import java.sql.Timestamp import java.time.LocalDate import java.util.Date import java.sql.{ Date => SqlDate } import java.math.{ BigDecimal => JBigDecimal } import io.getquill.context.sql.encoding.ArrayEncoding import io.getquill.util.Messages.fail import scala.collection.compat._ import scala.reflect.ClassTag trait ArrayDecoders extends ArrayEncoding { self: JdbcContextBase[_, _] => implicit def arrayStringDecoder[Col <: Seq[String]](implicit bf: CBF[String, Col]): Decoder[Col] = arrayRawDecoder[String, Col] implicit def arrayBigDecimalDecoder[Col <: Seq[BigDecimal]](implicit bf: CBF[BigDecimal, Col]): Decoder[Col] = arrayDecoder[JBigDecimal, BigDecimal, Col](BigDecimal.apply) implicit def arrayBooleanDecoder[Col <: Seq[Boolean]](implicit bf: CBF[Boolean, Col]): Decoder[Col] = arrayRawDecoder[Boolean, Col] implicit def arrayByteDecoder[Col <: Seq[Byte]](implicit bf: CBF[Byte, Col]): Decoder[Col] = arrayRawDecoder[Byte, Col] implicit def arrayShortDecoder[Col <: Seq[Short]](implicit bf: CBF[Short, Col]): Decoder[Col] = arrayRawDecoder[Short, Col] implicit def arrayIntDecoder[Col <: Seq[Int]](implicit bf: CBF[Int, Col]): Decoder[Col] = arrayRawDecoder[Int, Col] implicit def arrayLongDecoder[Col <: Seq[Long]](implicit bf: CBF[Long, Col]): Decoder[Col] = arrayRawDecoder[Long, Col] implicit def arrayFloatDecoder[Col <: Seq[Float]](implicit bf: CBF[Float, Col]): Decoder[Col] = arrayRawDecoder[Float, Col] implicit def arrayDoubleDecoder[Col <: Seq[Double]](implicit bf: CBF[Double, Col]): Decoder[Col] = arrayRawDecoder[Double, Col] implicit def arrayDateDecoder[Col <: Seq[Date]](implicit bf: CBF[Date, Col]): Decoder[Col] = arrayRawDecoder[Date, Col] implicit def arrayTimestampDecoder[Col <: Seq[Timestamp]](implicit bf: CBF[Timestamp, Col]): Decoder[Col] = arrayRawDecoder[Timestamp, Col] implicit def arrayLocalDateDecoder[Col <: Seq[LocalDate]](implicit bf: CBF[LocalDate, Col]): Decoder[Col] = arrayDecoder[SqlDate, LocalDate, Col](_.toLocalDate) def arrayRawDecoder[T: ClassTag, Col <: Seq[T]](implicit bf: CBF[T, Col]): Decoder[Col] = arrayDecoder[T, T, Col](identity) }
Example 138
Source File: Encoders.scala From quill with Apache License 2.0 | 5 votes |
package io.getquill.context.jdbc import java.sql.{ Date, Timestamp, Types } import java.time.{ LocalDate, LocalDateTime } import java.util.{ Calendar, TimeZone } import java.{ sql, util } trait Encoders { this: JdbcContextBase[_, _] => type Encoder[T] = JdbcEncoder[T] protected val dateTimeZone = TimeZone.getDefault case class JdbcEncoder[T](sqlType: Int, encoder: BaseEncoder[T]) extends BaseEncoder[T] { override def apply(index: Index, value: T, row: PrepareRow) = encoder(index + 1, value, row) } def encoder[T](sqlType: Int, f: (Index, T, PrepareRow) => Unit): Encoder[T] = JdbcEncoder(sqlType, (index: Index, value: T, row: PrepareRow) => { f(index, value, row) row }) def encoder[T](sqlType: Int, f: PrepareRow => (Index, T) => Unit): Encoder[T] = encoder(sqlType, (index: Index, value: T, row: PrepareRow) => f(row)(index, value)) implicit def mappedEncoder[I, O](implicit mapped: MappedEncoding[I, O], e: Encoder[O]): Encoder[I] = JdbcEncoder(e.sqlType, mappedBaseEncoder(mapped, e.encoder)) private[this] val nullEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setNull) implicit def optionEncoder[T](implicit d: Encoder[T]): Encoder[Option[T]] = JdbcEncoder( d.sqlType, (index, value, row) => value match { case Some(v) => d.encoder(index, v, row) case None => nullEncoder.encoder(index, d.sqlType, row) } ) implicit val stringEncoder: Encoder[String] = encoder(Types.VARCHAR, _.setString) implicit val bigDecimalEncoder: Encoder[BigDecimal] = encoder(Types.NUMERIC, (index, value, row) => row.setBigDecimal(index, value.bigDecimal)) implicit val byteEncoder: Encoder[Byte] = encoder(Types.TINYINT, _.setByte) implicit val shortEncoder: Encoder[Short] = encoder(Types.SMALLINT, _.setShort) implicit val intEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setInt) implicit val longEncoder: Encoder[Long] = encoder(Types.BIGINT, _.setLong) implicit val floatEncoder: Encoder[Float] = encoder(Types.FLOAT, _.setFloat) implicit val doubleEncoder: Encoder[Double] = encoder(Types.DOUBLE, _.setDouble) implicit val byteArrayEncoder: Encoder[Array[Byte]] = encoder(Types.VARBINARY, _.setBytes) implicit val dateEncoder: Encoder[util.Date] = encoder(Types.TIMESTAMP, (index, value, row) => row.setTimestamp(index, new sql.Timestamp(value.getTime), Calendar.getInstance(dateTimeZone))) implicit val localDateEncoder: Encoder[LocalDate] = encoder(Types.DATE, (index, value, row) => row.setDate(index, Date.valueOf(value), Calendar.getInstance(dateTimeZone))) implicit val localDateTimeEncoder: Encoder[LocalDateTime] = encoder(Types.TIMESTAMP, (index, value, row) => row.setTimestamp(index, Timestamp.valueOf(value), Calendar.getInstance(dateTimeZone))) }
Example 139
Source File: ArrayJdbcEncodingSpec.scala From quill with Apache License 2.0 | 5 votes |
package io.getquill.context.jdbc.postgres import java.sql.Timestamp import java.time.LocalDate import java.util.UUID import io.getquill.context.sql.encoding.ArrayEncodingBaseSpec import io.getquill.{ Literal, PostgresJdbcContext } class ArrayJdbcEncodingSpec extends ArrayEncodingBaseSpec { val ctx = testContext import ctx._ val q = quote(query[ArraysTestEntity]) val corrected = e.copy(timestamps = e.timestamps.map(d => new Timestamp(d.getTime))) "Support all sql base types and `Seq` implementers" in { ctx.run(q.insert(lift(corrected))) val actual = ctx.run(q).head actual mustEqual corrected baseEntityDeepCheck(actual, corrected) } "Support Seq encoding basing on MappedEncoding" in { val wrapQ = quote(querySchema[WrapEntity]("ArraysTestEntity")) ctx.run(wrapQ.insert(lift(wrapE))) ctx.run(wrapQ).head.texts mustBe wrapE.texts } "Timestamps" in { case class Timestamps(timestamps: List[Timestamp]) val tE = Timestamps(List(new Timestamp(System.currentTimeMillis()))) val tQ = quote(querySchema[Timestamps]("ArraysTestEntity")) ctx.run(tQ.insert(lift(tE))) ctx.run(tQ).head.timestamps mustBe tE.timestamps } "Catch invalid decoders" in { val newCtx = new PostgresJdbcContext(Literal, "testPostgresDB") { // avoid transforming from java.sql.Date to java.time.LocalDate override implicit def arrayLocalDateDecoder[Col <: Seq[LocalDate]](implicit bf: CBF[LocalDate, Col]): Decoder[Col] = arrayDecoder[LocalDate, LocalDate, Col](identity) } import newCtx._ newCtx.run(query[ArraysTestEntity].insert(lift(corrected))) intercept[IllegalStateException] { newCtx.run(query[ArraysTestEntity]).head mustBe corrected } newCtx.close() } "Custom decoders/encoders" in { case class Entity(uuids: List[UUID]) val e = Entity(List(UUID.randomUUID(), UUID.randomUUID())) val q = quote(querySchema[Entity]("ArraysTestEntity")) implicit def arrayUUIDEncoder[Col <: Seq[UUID]]: Encoder[Col] = arrayRawEncoder[UUID, Col]("uuid") implicit def arrayUUIDDecoder[Col <: Seq[UUID]](implicit bf: CBF[UUID, Col]): Decoder[Col] = arrayRawDecoder[UUID, Col] ctx.run(q.insert(lift(e))) ctx.run(q).head.uuids mustBe e.uuids } "Arrays in where clause" in { ctx.run(q.insert(lift(corrected))) val actual1 = ctx.run(q.filter(_.texts == lift(List("test")))) val actual2 = ctx.run(q.filter(_.texts == lift(List("test2")))) actual1 mustEqual List(corrected) actual2 mustEqual List() } "empty array on found null" in { case class ArraysTestEntity(texts: Option[List[String]]) ctx.run(query[ArraysTestEntity].insert(lift(ArraysTestEntity(None)))) case class E(texts: List[String]) ctx.run(querySchema[E]("ArraysTestEntity")).headOption.map(_.texts) mustBe Some(Nil) } override protected def beforeEach(): Unit = { ctx.run(q.delete) () } }
Example 140
Source File: DateTimeConverter.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.commons.datetime import java.sql.Timestamp import org.joda.time.format.{DateTimeFormatter, ISODateTimeFormat} import org.joda.time.{DateTime, DateTimeZone} trait DateTimeConverter { val zone: DateTimeZone = DateTimeZone.getDefault val dateTimeFormatter: DateTimeFormatter = ISODateTimeFormat.dateTime() def toString(dateTime: DateTime): String = dateTime.toString(dateTimeFormatter) def parseDateTime(s: String): DateTime = dateTimeFormatter.parseDateTime(s).withZone(zone) def parseTimestamp(s: String): Timestamp = new Timestamp(parseDateTime(s).getMillis) def now: DateTime = new DateTime(zone) def fromMillis(millis: Long): DateTime = new DateTime(zone).withMillis(millis) def dateTime( year: Int, monthOfyear: Int, dayOfMonth: Int, hourOfDay: Int = 0, minutesOfHour: Int = 0, secondsOfMinute: Int = 0): DateTime = new DateTime(year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, zone) def dateTimeFromUTC( year: Int, monthOfyear: Int, dayOfMonth: Int, hourOfDay: Int = 0, minutesOfHour: Int = 0, secondsOfMinute: Int = 0): DateTime = new DateTime( year, monthOfyear, dayOfMonth, hourOfDay, minutesOfHour, secondsOfMinute, DateTimeZone.UTC).withZone(DateTimeConverter.zone) } object DateTimeConverter extends DateTimeConverter
Example 141
Source File: CsvSchemaStringifierBeforeCsvWriting.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import ai.deepsense.commons.datetime.DateTimeConverter import ai.deepsense.deeplang.ExecutionContext import ai.deepsense.deeplang.doperables.dataframe.DataFrame import ai.deepsense.deeplang.doperations.exceptions.UnsupportedColumnTypeException object CsvSchemaStringifierBeforeCsvWriting { def preprocess(dataFrame: DataFrame) (implicit context: ExecutionContext): DataFrame = { requireNoComplexTypes(dataFrame) val schema = dataFrame.sparkDataFrame.schema def stringifySelectedTypes(schema: StructType): StructType = { StructType( schema.map { case field: StructField => field.copy(dataType = StringType) } ) } context.dataFrameBuilder.buildDataFrame( stringifySelectedTypes(schema), dataFrame.sparkDataFrame.rdd.map(stringifySelectedCells(schema))) } private def requireNoComplexTypes(dataFrame: DataFrame): Unit = { dataFrame.sparkDataFrame.schema.fields.map(structField => (structField.dataType, structField.name) ).foreach { case (dataType, columnName) => dataType match { case _: ArrayType | _: MapType | _: StructType => throw UnsupportedColumnTypeException(columnName, dataType) case _ => () } } } private def stringifySelectedCells(originalSchema: StructType)(row: Row): Row = { Row.fromSeq( row.toSeq.zipWithIndex map { case (value, index) => (value, originalSchema(index).dataType) match { case (null, _) => "" case (_, BooleanType) => if (value.asInstanceOf[Boolean]) "1" else "0" case (_, TimestampType) => DateTimeConverter.toString( DateTimeConverter.fromMillis(value.asInstanceOf[Timestamp].getTime)) case (x, _) => value.toString } }) } }
Example 142
Source File: WriteReadDataFrameWithDriverFilesIntegSpec.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations import java.sql.Timestamp import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import org.scalatest.BeforeAndAfter import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport} import ai.deepsense.deeplang.doperables.dataframe.DataFrame import ai.deepsense.deeplang.doperations.inout._ class WriteReadDataFrameWithDriverFilesIntegSpec extends DeeplangIntegTestSupport with BeforeAndAfter with TestFiles { import DeeplangIntegTestSupport._ val schema: StructType = StructType(Seq( StructField("boolean", BooleanType), StructField("double", DoubleType), StructField("string", StringType) )) val rows = { val base = Seq( Row(true, 0.45, "3.14"), Row(false, null, "\"testing...\""), Row(false, 3.14159, "Hello, world!"), // in case of CSV, an empty string is the same as null - no way around it Row(null, null, "") ) val repeatedFewTimes = (1 to 10).flatMap(_ => base) repeatedFewTimes } lazy val dataFrame = createDataFrame(rows, schema) "WriteDataFrame and ReadDataFrame" should { "write and read CSV file" in { val wdf = new WriteDataFrame() .setStorageType( new OutputStorageTypeChoice.File() .setOutputFile(absoluteTestsDirPath.fullPath + "/test_files") .setFileFormat( new OutputFileFormatChoice.Csv() .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Comma()) .setNamesIncluded(true))) wdf.executeUntyped(Vector(dataFrame))(executionContext) val rdf = new ReadDataFrame() .setStorageType( new InputStorageTypeChoice.File() .setSourceFile(absoluteTestsDirPath.fullPath + "/test_files") .setFileFormat(new InputFileFormatChoice.Csv() .setCsvColumnSeparator(CsvParameters.ColumnSeparatorChoice.Comma()) .setNamesIncluded(true) .setShouldConvertToBoolean(true))) val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame] assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false) } "write and read JSON file" in { val wdf = new WriteDataFrame() .setStorageType(new OutputStorageTypeChoice.File() .setOutputFile(absoluteTestsDirPath.fullPath + "json") .setFileFormat(new OutputFileFormatChoice.Json())) wdf.executeUntyped(Vector(dataFrame))(executionContext) val rdf = new ReadDataFrame() .setStorageType(new InputStorageTypeChoice.File() .setSourceFile(absoluteTestsDirPath.fullPath + "json") .setFileFormat(new InputFileFormatChoice.Json())) val loadedDataFrame = rdf.executeUntyped(Vector())(executionContext).head.asInstanceOf[DataFrame] assertDataFramesEqual(loadedDataFrame, dataFrame, checkRowOrder = false) } } }
Example 143
Source File: DataFrameReportPerformanceSpec.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.dataframe import java.sql.Timestamp import java.text.{DateFormat, SimpleDateFormat} import java.util.TimeZone import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType} import org.scalatest.{BeforeAndAfter, Ignore} import ai.deepsense.commons.utils.{DoubleUtils, Logging} import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport} // It's ignored because it does not have got assertions, it only prints report generation time. @Ignore class DataFrameReportPerformanceSpec extends DeeplangIntegTestSupport with BeforeAndAfter with TestFiles with Logging { val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv" "DataFrame" should { "generate report" when { "DataFrame has 17K of rows" in { val numberOfTries = 10 var results: Seq[Double] = Seq() for (i <- 1 to numberOfTries) { val dataFrame: DataFrame = demandDataFrame() val start = System.nanoTime() val report = dataFrame.report() val end = System.nanoTime() val time1: Double = (end - start).toDouble / 1000000000.0 results = results :+ time1 logger.debug("Report generation time: {}", DoubleUtils.double2String(time1)) } logger.debug( "Mean report generation time: {}", DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble)) } } } private def demandDataFrame(): DataFrame = { val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile) val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row) executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data) } private def demandSchema: StructType = StructType(Seq( StructField("datetime", TimestampType), StructField("log_count", DoubleType), StructField("workingday", DoubleType), StructField("holiday", DoubleType), StructField("season2", DoubleType), StructField("season3", DoubleType), StructField("season4", DoubleType))) private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } } private object DataFrameHelpers { def demandString2Row(s: String): Row = { val split = s.split(",") Row( timestamp(split(0)), split(1).toDouble, split(2).toDouble, split(3).toDouble, split(4).toDouble, split(5).toDouble, split(6).toDouble ) } private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } }
Example 144
Source File: StatisticsForContinuousIntegSpec.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.dataframe.report.distribution import java.sql.Timestamp import org.apache.spark.rdd.RDD import org.apache.spark.sql import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import ai.deepsense.commons.datetime.DateTimeConverter import ai.deepsense.deeplang.DeeplangIntegTestSupport import ai.deepsense.deeplang.doperables.dataframe.{DataFrame, DataFrameTestFactory} import ai.deepsense.reportlib.model._ class StatisticsForContinuousIntegSpec extends DeeplangIntegTestSupport with DataFrameTestFactory { "Statistics (Min, max and mean values)" should { "be calculated for each continuous column in distribution" when { "data is of type int" in { val distribution = distributionForInt(1, 2, 3, 4, 5) distribution.statistics.min shouldEqual Some("1") distribution.statistics.max shouldEqual Some("5") distribution.statistics.mean shouldEqual Some("3") } "data is of type Timestamp" in { val distribution = distributionForTimestamps(new Timestamp(1000), new Timestamp(2000), new Timestamp(3000)) distribution.statistics.min shouldEqual Some(formatDate(1000)) distribution.statistics.max shouldEqual Some(formatDate(3000)) distribution.statistics.mean shouldEqual Some(formatDate(2000)) } } } "Null value in data" should { val distribution = distributionForDouble(1, 2, 3, 4, Double.NaN, 5) "not be skipped in calculating min and max" in { distribution.statistics.min shouldEqual Some("1") distribution.statistics.max shouldEqual Some("5") } "result in mean value NaN" in { distribution.statistics.mean shouldEqual Some("NaN") } } lazy val columnName = "column_name" private def distributionForDouble(data: Double*): ContinuousDistribution = { distributionFor(data, DoubleType) } private def distributionForInt(data: Int*): ContinuousDistribution = { distributionFor(data, IntegerType) } private def distributionForTimestamps(data: Timestamp*): ContinuousDistribution = { distributionFor(data, TimestampType) } private def distributionFor(data: Seq[Any], dataType: DataType): ContinuousDistribution = { val schema = StructType(Array( StructField(columnName, dataType) )) val rows = data.map(v => Row(v)) val dataFrame = createDataFrame(rows, schema) val report = dataFrame.report() report.content.distributions(columnName).asInstanceOf[ContinuousDistribution] } def buildDataFrame(schema: StructType, data: RDD[Row]): DataFrame = { val dataFrame: sql.DataFrame = sparkSQLSession.createDataFrame(data, schema) DataFrame.fromSparkDataFrame(dataFrame) } def formatDate(millis: Long): String = { DateTimeConverter.toString(DateTimeConverter.fromMillis(millis)) } }
Example 145
Source File: DateEncoderTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.record.encoder import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} import com.sksamuel.avro4s.{AvroSchema, DefaultFieldMapper, Encoder, ImmutableRecord} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers //noinspection ScalaDeprecation class DateEncoderTest extends AnyFunSuite with Matchers { test("encode LocalTime as TIME-MILLIS") { case class Foo(s: LocalTime) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(LocalTime.of(12, 50, 45))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(46245000000L))) } test("encode LocalDate as DATE") { case class Foo(s: LocalDate) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(LocalDate.of(2018, 9, 10))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784))) } test("encode java.sql.Date as DATE") { case class Foo(s: Date) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(Date.valueOf(LocalDate.of(2018, 9, 10)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784))) } test("encode LocalDateTime as timestamp-nanos") { case class Foo(s: LocalDateTime) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000000123L))) Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123009))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000123009L))) Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 328187943))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739328187943L))) } test("encode Timestamp as TIMESTAMP-MILLIS") { case class Foo(s: Timestamp) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(Timestamp.from(Instant.ofEpochMilli(1538312231000L)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L))) } test("encode Instant as TIMESTAMP-MILLIS") { case class Foo(s: Instant) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(Instant.ofEpochMilli(1538312231000L))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L))) } }
Example 146
Source File: DateDecoderTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.record.decoder import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} import com.sksamuel.avro4s.SchemaFor.TimestampNanosLogicalType import com.sksamuel.avro4s.{AvroSchema, Decoder, SchemaFor} import org.apache.avro.generic.GenericData import org.apache.avro.{LogicalTypes, SchemaBuilder} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers //noinspection ScalaDeprecation class DateDecoderTest extends AnyFunSuite with Matchers { case class WithLocalTime(z: LocalTime) case class WithLocalDate(z: LocalDate) case class WithDate(z: Date) case class WithLocalDateTime(z: LocalDateTime) case class WithTimestamp(z: Timestamp) case class WithInstant(z: Instant) test("decode int to LocalTime") { val schema = AvroSchema[WithLocalTime] val record = new GenericData.Record(schema) record.put("z", 46245000000L) Decoder[WithLocalTime].decode(record) shouldBe WithLocalTime(LocalTime.of(12, 50, 45)) } test("decode int to LocalDate") { val schema = AvroSchema[WithLocalDate] val record = new GenericData.Record(schema) record.put("z", 17784) Decoder[WithLocalDate].decode(record) shouldBe WithLocalDate(LocalDate.of(2018, 9, 10)) } test("decode int to java.sql.Date") { val schema = AvroSchema[WithDate] val record = new GenericData.Record(schema) record.put("z", 17784) Decoder[WithDate].decode(record) shouldBe WithDate(Date.valueOf(LocalDate.of(2018, 9, 10))) } test("decode timestamp-millis to LocalDateTime") { val dateSchema = LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder.longType) val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord() val record = new GenericData.Record(schema) record.put("z", 1572707106376L) Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime( LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000000)) } test("decode timestamp-micros to LocalDateTime") { val dateSchema = LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder.longType) val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord() val record = new GenericData.Record(schema) record.put("z", 1572707106376001L) Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime( LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376001000)) } test("decode timestamp-nanos to LocalDateTime") { val dateSchema = TimestampNanosLogicalType.addToSchema(SchemaBuilder.builder.longType) val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord() val record = new GenericData.Record(schema) record.put("z", 1572707106376000002L) Decoder[WithLocalDateTime].decode(record) shouldBe WithLocalDateTime( LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000002)) } test("decode long to Timestamp") { val schema = AvroSchema[WithTimestamp] val record = new GenericData.Record(schema) record.put("z", 1538312231000L) Decoder[WithTimestamp].decode(record) shouldBe WithTimestamp(new Timestamp(1538312231000L)) } test("decode long to Instant") { val schema = AvroSchema[WithInstant] val record = new GenericData.Record(schema) record.put("z", 1538312231000L) Decoder[WithInstant].decode(record) shouldBe WithInstant(Instant.ofEpochMilli(1538312231000L)) } }
Example 147
Source File: DateSchemaTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.schema import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} import com.sksamuel.avro4s.AvroSchema import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers class DateSchemaTest extends AnyFunSuite with Matchers { test("generate date logical type for LocalDate") { case class LocalDateTest(date: LocalDate) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdate.json")) val schema = AvroSchema[LocalDateTest] schema.toString(true) shouldBe expected.toString(true) } test("generate date logical type for Date") { case class DateTest(date: Date) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/date.json")) val schema = AvroSchema[DateTest] schema.toString(true) shouldBe expected.toString(true) } test("generate time logical type for LocalTime") { case class LocalTimeTest(time: LocalTime) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localtime.json")) val schema = AvroSchema[LocalTimeTest] schema.toString(true) shouldBe expected.toString(true) } test("generate timestamp-nanos for LocalDateTime") { case class LocalDateTimeTest(time: LocalDateTime) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdatetime.json")) val schema = AvroSchema[LocalDateTimeTest] schema.toString(true) shouldBe expected.toString(true) } test("generate timestamp-millis logical type for Instant") { case class InstantTest(instant: Instant) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/instant.json")) val schema = AvroSchema[InstantTest] schema.toString(true) shouldBe expected.toString(true) } test("generate timestamp-millis logical type for Timestamp") { case class TimestampTest(ts: Timestamp) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/timestamp.json")) val schema = AvroSchema[TimestampTest] schema.toString(true) shouldBe expected.toString(true) } }
Example 148
Source File: SparkUtilities.scala From spark-practice with MIT License | 5 votes |
package utilities import java.sql.Timestamp import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} import probelms.customerInsights.CIConstants object SparkUtilities { def getSparkContext(appName:String):SparkContext={ val conf = new SparkConf().setAppName(appName).setMaster("local") // .set("spark.serializer","spark.kryo.registrator") val sc = new SparkContext(conf) sc } def getSparkSession(appName:String):SparkSession={ val spark = SparkSession.builder() .appName(appName) .master("local") // .config("spark.serializer","spark.kryo.registrator") .getOrCreate() spark } def convertCurrencyToDouble(currency:String):Double={ currency.stripPrefix("$").trim.toDouble } def getDate(date:String):Timestamp={ new java.sql.Timestamp(CIConstants.formatter.parseDateTime(date).getMillis) } }
Example 149
Source File: StructuredNetworkWordCountWindowed.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.sql.streaming import java.sql.Timestamp import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ object StructuredNetworkWordCountWindowed { def main(args: Array[String]) { if (args.length < 3) { System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" + " <window duration in seconds> [<slide duration in seconds>]") System.exit(1) } val host = args(0) val port = args(1).toInt val windowSize = args(2).toInt val slideSize = if (args.length == 3) windowSize else args(3).toInt if (slideSize > windowSize) { System.err.println("<slide duration> must be less than or equal to <window duration>") } val windowDuration = s"$windowSize seconds" val slideDuration = s"$slideSize seconds" val spark = SparkSession .builder .appName("StructuredNetworkWordCountWindowed") .getOrCreate() import spark.implicits._ // Create DataFrame representing the stream of input lines from connection to host:port val lines = spark.readStream .format("socket") .option("host", host) .option("port", port) .option("includeTimestamp", true) .load() // Split the lines into words, retaining timestamps val words = lines.as[(String, Timestamp)].flatMap(line => line._1.split(" ").map(word => (word, line._2)) ).toDF("word", "timestamp") // Group the data by window and word and compute the count of each group val windowedCounts = words.groupBy( window($"timestamp", windowDuration, slideDuration), $"word" ).count().orderBy("window") // Start running the query that prints the windowed word counts to the console val query = windowedCounts.writeStream .outputMode("complete") .format("console") .option("truncate", "false") .start() query.awaitTermination() } } // scalastyle:on println
Example 150
Source File: QueryPartitionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import java.io.File import java.sql.Timestamp import com.google.common.io.Files import org.apache.hadoop.fs.FileSystem import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.Utils class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { import spark.implicits._ test("SPARK-5068: query data when path doesn't exist") { withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "true")) { val testData = sparkContext.parallelize( (1 to 10).map(i => TestData(i, i.toString))).toDF() testData.createOrReplaceTempView("testData") val tmpDir = Files.createTempDir() // create the table for test sql(s"CREATE TABLE table_with_partition(key int,value string) " + s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " + "SELECT key,value FROM testData") // test for the exist path checkAnswer(sql("select key,value from table_with_partition"), testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect) // delete the path of one partition tmpDir.listFiles .find { f => f.isDirectory && f.getName().startsWith("ds=") } .foreach { f => Utils.deleteRecursively(f) } // test for after delete the path checkAnswer(sql("select key,value from table_with_partition"), testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect) sql("DROP TABLE IF EXISTS table_with_partition") sql("DROP TABLE IF EXISTS createAndInsertTest") } } test("SPARK-21739: Cast expression should initialize timezoneId") { withTable("table_with_timestamp_partition") { sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)") sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " + "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)") // test for Cast expression in TableReader checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"), Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000")))) // test for Cast expression in HiveTableScanExec checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " + "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1)) } } }
Example 151
Source File: TypeCast.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.datasource.google.spreadsheet import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.NumberFormat import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try object TypeCast { private[spreadsheet] def castTo( datum: String, castType: DataType, nullable: Boolean = true ): Any = { castType match { case _: ByteType => datum.toByte case _: ShortType => datum.toShort case _: IntegerType => datum.toInt case _: LongType => datum.toLong case _: FloatType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue()) case _: DoubleType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue()) case _: BooleanType => datum.toBoolean case _: DecimalType => new BigDecimal(datum.replaceAll(",", "")) case _: TimestampType => Timestamp.valueOf(datum) case _: DateType => Date.valueOf(datum) case _: StringType => datum case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } }
Example 152
Source File: TestResultSetDataConverter.scala From ohara with Apache License 2.0 | 4 votes |
package oharastream.ohara.connector.jdbc.source import java.sql.{ResultSet, Time, Timestamp} import oharastream.ohara.client.configurator.InspectApi.RdbColumn import oharastream.ohara.common.rule.OharaTest import oharastream.ohara.connector.jdbc.datatype.{MySQLDataTypeConverter, RDBDataTypeConverter} import oharastream.ohara.connector.jdbc.util.{ColumnInfo, DateTimeUtils} import org.junit.Test import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.matchers.should.Matchers._ class TestResultSetDataConverter extends OharaTest { private[this] val VARCHAR: String = "VARCHAR" private[this] val TIMESTAMP: String = "TIMESTAMP" private[this] val INT: String = "INT" private[this] val DATE: String = "DATE" private[this] val TIME: String = "TIME" @Test def testConverterRecord(): Unit = { val resultSet: ResultSet = Mockito.mock(classOf[ResultSet]) when(resultSet.getTimestamp("column1", DateTimeUtils.CALENDAR)).thenReturn(new Timestamp(0L)) when(resultSet.getString("column2")).thenReturn("aaa") when(resultSet.getInt("column3")).thenReturn(10) val columnList = Seq( RdbColumn("column1", TIMESTAMP, true), RdbColumn("column2", VARCHAR, false), RdbColumn("column3", INT, false) ) val dataTypeConverter: RDBDataTypeConverter = new MySQLDataTypeConverter() val result: Seq[ColumnInfo[_]] = ResultSetDataConverter.converterRecord(dataTypeConverter, resultSet, columnList) result.head.columnName shouldBe "column1" result.head.columnType shouldBe TIMESTAMP result.head.value.toString shouldBe "1970-01-01 08:00:00.0" result(1).columnName shouldBe "column2" result(1).columnType shouldBe VARCHAR result(1).value shouldBe "aaa" result(2).columnName shouldBe "column3" result(2).columnType shouldBe INT result(2).value shouldBe 10 } @Test def testNullValue(): Unit = { val resultSet: ResultSet = Mockito.mock(classOf[ResultSet]) when(resultSet.getTimestamp("column1", DateTimeUtils.CALENDAR)).thenReturn(new Timestamp(0L)) when(resultSet.getString("column2")).thenReturn(null) when(resultSet.getDate("column3")).thenReturn(null) when(resultSet.getTime("column4")).thenReturn(null) val columnList = Seq( RdbColumn("column1", TIMESTAMP, true), RdbColumn("column2", VARCHAR, false), RdbColumn("column3", DATE, false), RdbColumn("column4", TIME, false) ) val dataTypeConverter: RDBDataTypeConverter = new MySQLDataTypeConverter() val result: Seq[ColumnInfo[_]] = ResultSetDataConverter.converterRecord(dataTypeConverter, resultSet, columnList) result(1).columnName shouldBe "column2" result(1).columnType shouldBe VARCHAR result(1).value shouldBe "null" result(2).columnName shouldBe "column3" result(2).columnType shouldBe DATE result(2).value.toString shouldBe "1970-01-01" result(3).columnName shouldBe "column4" result(3).columnType shouldBe TIME result(3).value.toString shouldBe new Time(0).toString } }
Example 153
Source File: StreamingProducer.scala From Scala-Programming-Projects with MIT License | 4 votes |
package coinyser import java.sql.Timestamp import java.text.SimpleDateFormat import java.util.TimeZone import cats.effect.IO import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.pusher.client.Client import com.pusher.client.channel.SubscriptionEventListener import com.typesafe.scalalogging.StrictLogging object StreamingProducer extends StrictLogging { def subscribe(pusher: Client)(onTradeReceived: String => Unit): IO[Unit] = for { _ <- IO(pusher.connect()) channel <- IO(pusher.subscribe("live_trades")) _ <- IO(channel.bind("trade", new SubscriptionEventListener() { override def onEvent(channel: String, event: String, data: String): Unit = { logger.info(s"Received event: $event with data: $data") onTradeReceived(data) } })) } yield () val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") // Very important: the storage must be in UTC sdf.setTimeZone(TimeZone.getTimeZone("UTC")) m.setDateFormat(sdf) } def deserializeWebsocketTransaction(s: String): WebsocketTransaction = mapper.readValue(s, classOf[WebsocketTransaction]) def convertWsTransaction(wsTx: WebsocketTransaction): Transaction = Transaction( timestamp = new Timestamp(wsTx.timestamp.toLong * 1000), tid = wsTx.id, price = wsTx.price, sell = wsTx.`type` == 1, amount = wsTx.amount) def serializeTransaction(tx: Transaction): String = mapper.writeValueAsString(tx) }
Example 154
Source File: StreamingPredictionsSpec.scala From odsc-east-realish-predictions with Apache License 2.0 | 4 votes |
package com.twilio.open.odsc.realish import java.sql.Timestamp import java.time.Instant import java.util.{Random, UUID} import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoders, SQLContext, SparkSession} import org.scalatest.{FunSuite, Matchers} import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions._ import org.apache.spark.sql.streaming.{OutputMode, Trigger} import scala.concurrent.duration._ class StreamingPredictionsSpec extends FunSuite with Matchers with SharedSparkSql { override def conf: SparkConf = { new SparkConf() .setMaster("local[*]") .setAppName("odsc-spark-utils") .set("spark.ui.enabled", "false") .set("spark.app.id", appID) .set("spark.driver.host", "localhost") .set("spark.sql.session.timeZone", "UTC") } final val notRandomRandom = { val generator = new Random generator.setSeed(100L) generator } test("should stream in some mock data for fun") { implicit val spark: SparkSession = sparkSql import spark.implicits._ implicit val sqlContext: SQLContext = spark.sqlContext implicit val metricEncoder = Encoders.product[Metric] val metricData = MemoryStream[Metric] val startingInstant = Instant.now() val backingData = (1 to 10000).map(offset => { val metric = if (offset % 2 == 0) "loss_percentage" else "connect_duration" val nextLoss = notRandomRandom.nextDouble() * notRandomRandom.nextInt(100) Metric( Timestamp.from(startingInstant.minusSeconds(offset)), UUID.randomUUID().toString, metric, value = if (metric == "loss_percentage") nextLoss else notRandomRandom.nextDouble() * notRandomRandom.nextInt(240), countryCode = if (offset % 8 == 0) "US" else "BR", callDirection = if (metric == "loss_percentage") "inbound" else "outbound" ) }) val processingTimeTrigger = Trigger.ProcessingTime(2.seconds) val streamingQuery = metricData.toDF() .withWatermark("timestamp", "2 hours") .groupBy(col("metric"), col("countryCode"), window($"timestamp", "5 minutes")) .agg( min("value") as "min", avg("value") as "mean", max("value") as "max", count("*") as "total" ) .writeStream .format("memory") .queryName("datastream") .outputMode(OutputMode.Append()) .trigger(processingTimeTrigger) .start() metricData.addData(backingData) streamingQuery.processAllAvailable() spark.sql("select * from datastream").show(20, false) val checkChange = spark.sql("select * from datastream") .groupBy("metric","countryCode") .agg( sum("total") as "total", avg("mean") as "mean" ) checkChange.show(20, false) // now can do interesting things with minor back tracking... streamingQuery.stop() } }