org.apache.spark.unsafe.types.CalendarInterval Scala Examples
The following examples show how to use org.apache.spark.unsafe.types.CalendarInterval.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: EventTimeWatermarkExec.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection} import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.AccumulatorV2 case class EventTimeWatermarkExec( eventTime: Attribute, delay: CalendarInterval, child: SparkPlan) extends SparkPlan { val eventTimeStats = new EventTimeStatsAccum() sparkContext.register(eventTimeStats) override protected def doExecute(): RDD[InternalRow] = { child.execute().mapPartitions { iter => val getEventTime = UnsafeProjection.create(eventTime :: Nil, child.output) iter.map { row => eventTimeStats.add(getEventTime(row).getLong(0) / 1000) row } } } // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delay.milliseconds) .build() a.withMetadata(updatedMetadata) } else { a } } override def children: Seq[SparkPlan] = child :: Nil }
Example 2
Source File: EventTimeWatermarkExec.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection} import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.AccumulatorV2 case class EventTimeWatermarkExec( eventTime: Attribute, delay: CalendarInterval, child: SparkPlan) extends UnaryExecNode { val eventTimeStats = new EventTimeStatsAccum() val delayMs = EventTimeWatermark.getDelayMs(delay) sparkContext.register(eventTimeStats) override protected def doExecute(): RDD[InternalRow] = { child.execute().mapPartitions { iter => val getEventTime = UnsafeProjection.create(eventTime :: Nil, child.output) iter.map { row => eventTimeStats.add(getEventTime(row).getLong(0) / 1000) row } } } // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delayMs) .build() a.withMetadata(updatedMetadata) } else if (a.metadata.contains(EventTimeWatermark.delayKey)) { // Remove existing watermark val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .remove(EventTimeWatermark.delayKey) .build() a.withMetadata(updatedMetadata) } else { a } } }
Example 3
Source File: ContinuousTrigger.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.continuous import java.util.concurrent.TimeUnit import scala.concurrent.duration.Duration import org.apache.commons.lang3.StringUtils import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql.streaming.{ProcessingTime, Trigger} import org.apache.spark.unsafe.types.CalendarInterval @InterfaceStability.Evolving case class ContinuousTrigger(intervalMs: Long) extends Trigger { require(intervalMs >= 0, "the interval of trigger should not be negative") } private[sql] object ContinuousTrigger { def apply(interval: String): ContinuousTrigger = { if (StringUtils.isBlank(interval)) { throw new IllegalArgumentException( "interval cannot be null or blank.") } val cal = if (interval.startsWith("interval")) { CalendarInterval.fromString(interval) } else { CalendarInterval.fromString("interval " + interval) } if (cal == null) { throw new IllegalArgumentException(s"Invalid interval: $interval") } if (cal.months > 0) { throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval") } new ContinuousTrigger(cal.microseconds / 1000) } def apply(interval: Duration): ContinuousTrigger = { ContinuousTrigger(interval.toMillis) } def create(interval: String): ContinuousTrigger = { apply(interval) } def create(interval: Long, unit: TimeUnit): ContinuousTrigger = { ContinuousTrigger(unit.toMillis(interval)) } }
Example 4
Source File: GenerateUnsafeProjectionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.BoundReference import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types.{DataType, Decimal, StringType, StructType} import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class GenerateUnsafeProjectionSuite extends SparkFunSuite { test("Test unsafe projection string access pattern") { val dataType = (new StructType).add("a", StringType) val exprs = BoundReference(0, dataType, nullable = true) :: Nil val projection = GenerateUnsafeProjection.generate(exprs) val result = projection.apply(InternalRow(AlwaysNull)) assert(!result.isNullAt(0)) assert(result.getStruct(0, 1).isNullAt(0)) } } object AlwaysNull extends InternalRow { override def numFields: Int = 1 override def setNullAt(i: Int): Unit = {} override def copy(): InternalRow = this override def anyNull: Boolean = true override def isNullAt(ordinal: Int): Boolean = true override def update(i: Int, value: Any): Unit = notSupported override def getBoolean(ordinal: Int): Boolean = notSupported override def getByte(ordinal: Int): Byte = notSupported override def getShort(ordinal: Int): Short = notSupported override def getInt(ordinal: Int): Int = notSupported override def getLong(ordinal: Int): Long = notSupported override def getFloat(ordinal: Int): Float = notSupported override def getDouble(ordinal: Int): Double = notSupported override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = notSupported override def getUTF8String(ordinal: Int): UTF8String = notSupported override def getBinary(ordinal: Int): Array[Byte] = notSupported override def getInterval(ordinal: Int): CalendarInterval = notSupported override def getStruct(ordinal: Int, numFields: Int): InternalRow = notSupported override def getArray(ordinal: Int): ArrayData = notSupported override def getMap(ordinal: Int): MapData = notSupported override def get(ordinal: Int, dataType: DataType): AnyRef = notSupported private def notSupported: Nothing = throw new UnsupportedOperationException }
Example 5
Source File: EventTimeWatermark.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval object EventTimeWatermark { case class EventTimeWatermark( eventTime: Attribute, delay: CalendarInterval, child: LogicalPlan) extends UnaryNode { // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val delayMs = EventTimeWatermark.getDelayMs(delay) val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delayMs) .build() a.withMetadata(updatedMetadata) } else if (a.metadata.contains(EventTimeWatermark.delayKey)) { // Remove existing watermark val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .remove(EventTimeWatermark.delayKey) .build() a.withMetadata(updatedMetadata) } else { a } } }
Example 6
Source File: EventTimeWatermarkExec.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection} import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.AccumulatorV2 case class EventTimeWatermarkExec( eventTime: Attribute, delay: CalendarInterval, child: SparkPlan) extends SparkPlan { override def user: String = child.user val eventTimeStats = new EventTimeStatsAccum() sparkContext.register(eventTimeStats) override protected def doExecute(): RDD[InternalRow] = { child.execute().mapPartitions { iter => val getEventTime = UnsafeProjection.create(eventTime :: Nil, child.output) iter.map { row => eventTimeStats.add(getEventTime(row).getLong(0) / 1000) row } } } // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delay.milliseconds) .build() a.withMetadata(updatedMetadata) } else { a } } override def children: Seq[SparkPlan] = child :: Nil }
Example 7
Source File: EventTimeWatermark.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval object EventTimeWatermark { case class EventTimeWatermark( eventTime: Attribute, delay: CalendarInterval, child: LogicalPlan) extends LogicalPlan { // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delay.milliseconds) .build() a.withMetadata(updatedMetadata) } else { a } } override val children: Seq[LogicalPlan] = child :: Nil }
Example 8
Source File: TemporalUdafs.scala From morpheus with Apache License 2.0 | 5 votes |
package org.opencypher.morpheus.impl.temporal import org.apache.logging.log4j.scala.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} import org.apache.spark.sql.types.{CalendarIntervalType, DataType, LongType, StructField, StructType} import org.apache.spark.unsafe.types.CalendarInterval import org.opencypher.okapi.impl.temporal.TemporalConstants import org.opencypher.morpheus.impl.temporal.TemporalConversions._ object TemporalUdafs extends Logging { abstract class SimpleDurationAggregation(aggrName: String) extends UserDefinedAggregateFunction { override def inputSchema: StructType = StructType(Array(StructField("duration", CalendarIntervalType))) override def bufferSchema: StructType = StructType(Array(StructField(aggrName, CalendarIntervalType))) override def dataType: DataType = CalendarIntervalType override def deterministic: Boolean = true override def initialize(buffer: MutableAggregationBuffer): Unit = { buffer(0) = new CalendarInterval(0, 0L) } override def evaluate(buffer: Row): Any = buffer.getAs[CalendarInterval](0) } class DurationSum extends SimpleDurationAggregation("sum") { override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { buffer(0) = buffer.getAs[CalendarInterval](0).add(input.getAs[CalendarInterval](0)) } override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { buffer1(0) = buffer2.getAs[CalendarInterval](0).add(buffer1.getAs[CalendarInterval](0)) } } class DurationMax extends SimpleDurationAggregation("max") { override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { val currMaxInterval = buffer.getAs[CalendarInterval](0) val inputInterval = input.getAs[CalendarInterval](0) buffer(0) = if (currMaxInterval.toDuration.compare(inputInterval.toDuration) >= 0) currMaxInterval else inputInterval } override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { val interval1 = buffer1.getAs[CalendarInterval](0) val interval2 = buffer2.getAs[CalendarInterval](0) buffer1(0) = if (interval1.toDuration.compare(interval2.toDuration) >= 0) interval1 else interval2 } } class DurationMin extends SimpleDurationAggregation("min") { override def initialize(buffer: MutableAggregationBuffer): Unit = { buffer(0) = new CalendarInterval(Integer.MAX_VALUE, Long.MaxValue) } override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { val currMinInterval = buffer.getAs[CalendarInterval](0) val inputInterval = input.getAs[CalendarInterval](0) buffer(0) = if (inputInterval.toDuration.compare(currMinInterval.toDuration) >= 0) currMinInterval else inputInterval } override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { val interval1 = buffer1.getAs[CalendarInterval](0) val interval2 = buffer2.getAs[CalendarInterval](0) buffer1(0) = if (interval2.toDuration.compare(interval1.toDuration) >= 0) interval1 else interval2 } } class DurationAvg extends UserDefinedAggregateFunction { override def inputSchema: StructType = StructType(Array(StructField("duration", CalendarIntervalType))) override def bufferSchema: StructType = StructType(Array(StructField("sum", CalendarIntervalType), StructField("cnt", LongType))) override def dataType: DataType = CalendarIntervalType override def deterministic: Boolean = true override def initialize(buffer: MutableAggregationBuffer): Unit = { buffer(0) = new CalendarInterval(0, 0L) buffer(1) = 0L } override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { buffer(0) = buffer.getAs[CalendarInterval](0).add(input.getAs[CalendarInterval](0)) buffer(1) = buffer.getLong(1) + 1 } override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { buffer1(0) = buffer2.getAs[CalendarInterval](0).add(buffer1.getAs[CalendarInterval](0)) buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1) } override def evaluate(buffer: Row): Any = { val sumInterval = buffer.getAs[CalendarInterval](0) val cnt = buffer.getLong(1) new CalendarInterval((sumInterval.months / cnt).toInt, sumInterval.microseconds / cnt) } } val durationSum = new DurationSum() val durationAvg = new DurationAvg() val durationMin = new DurationMin() val durationMax = new DurationMax() }
Example 9
Source File: DeltaConfigSuite.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import java.util.concurrent.TimeUnit import org.apache.spark.sql.delta.DeltaConfigs.{isValidIntervalConfigValue, parseCalendarInterval} import org.apache.spark.SparkFunSuite import org.apache.spark.unsafe.types.CalendarInterval class DeltaConfigSuite extends SparkFunSuite { test("parseCalendarInterval") { for (input <- Seq("5 MINUTES", "5 minutes", "5 Minutes", "inTERval 5 minutes")) { assert(parseCalendarInterval(input) === new CalendarInterval(0, 0, TimeUnit.MINUTES.toMicros(5))) } for (input <- Seq(null, "", " ")) { val e = intercept[IllegalArgumentException] { parseCalendarInterval(input) } assert(e.getMessage.contains("cannot be null or blank")) } for (input <- Seq("interval", "interval1 day", "foo", "foo 1 day")) { val e = intercept[IllegalArgumentException] { parseCalendarInterval(input) } assert(e.getMessage.contains("Invalid interval")) } } test("isValidIntervalConfigValue") { for (input <- Seq( // Allow 0 microsecond because we always convert microseconds to milliseconds so 0 // microsecond is the same as 100 microseconds. "0 microsecond", "1 microsecond", "1 millisecond", "1 day", "-1 day 86400001 milliseconds", // This is 1 millisecond "1 day -1 microseconds")) { assert(isValidIntervalConfigValue(parseCalendarInterval(input))) } for (input <- Seq( "-1 microseconds", "-1 millisecond", "-1 day", "1 day -86400001 milliseconds", // This is -1 millisecond "1 month", "1 year")) { assert(!isValidIntervalConfigValue(parseCalendarInterval(input)), s"$input") } } }
Example 10
Source File: TimeWindow.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval case class TimeWindow( timeColumn: Expression, windowDuration: Long, slideDuration: Long, startTime: Long) extends UnaryExpression with ImplicitCastInputTypes with Unevaluable with NonSQLExpression { ////////////////////////// // SQL Constructors ////////////////////////// def this( timeColumn: Expression, windowDuration: Expression, slideDuration: Expression, startTime: Expression) = { this(timeColumn, TimeWindow.parseExpression(windowDuration), TimeWindow.parseExpression(slideDuration), TimeWindow.parseExpression(startTime)) } def this(timeColumn: Expression, windowDuration: Expression, slideDuration: Expression) = { this(timeColumn, TimeWindow.parseExpression(windowDuration), TimeWindow.parseExpression(slideDuration), 0) } def this(timeColumn: Expression, windowDuration: Expression) = { this(timeColumn, windowDuration, windowDuration) } override def child: Expression = timeColumn override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) override def dataType: DataType = new StructType() .add(StructField("start", TimestampType)) .add(StructField("end", TimestampType)) // This expression is replaced in the analyzer. override lazy val resolved = false case class PreciseTimestamp(child: Expression) extends UnaryExpression with ExpectsInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) override def dataType: DataType = LongType override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) ev.copy(code = eval.code + s"""boolean ${ev.isNull} = ${eval.isNull}; |${ctx.javaType(dataType)} ${ev.value} = ${eval.value}; """.stripMargin) } }
Example 11
Source File: EventTimeWatermark.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval object EventTimeWatermark { case class EventTimeWatermark( eventTime: Attribute, delay: CalendarInterval, child: LogicalPlan) extends LogicalPlan { // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delay.milliseconds) .build() a.withMetadata(updatedMetadata) } else { a } } override val children: Seq[LogicalPlan] = child :: Nil }
Example 12
Source File: EventTimeWatermarkExec.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection} import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.AccumulatorV2 case class EventTimeWatermarkExec( eventTime: Attribute, delay: CalendarInterval, child: SparkPlan) extends UnaryExecNode { val eventTimeStats = new EventTimeStatsAccum() val delayMs = EventTimeWatermark.getDelayMs(delay) sparkContext.register(eventTimeStats) override protected def doExecute(): RDD[InternalRow] = { child.execute().mapPartitions { iter => val getEventTime = UnsafeProjection.create(eventTime :: Nil, child.output) iter.map { row => eventTimeStats.add(getEventTime(row).getLong(0) / 1000) row } } } // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delayMs) .build() a.withMetadata(updatedMetadata) } else if (a.metadata.contains(EventTimeWatermark.delayKey)) { // Remove existing watermark val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .remove(EventTimeWatermark.delayKey) .build() a.withMetadata(updatedMetadata) } else { a } } }
Example 13
Source File: ContinuousTrigger.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.continuous import java.util.concurrent.TimeUnit import scala.concurrent.duration.Duration import org.apache.commons.lang3.StringUtils import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql.streaming.{ProcessingTime, Trigger} import org.apache.spark.unsafe.types.CalendarInterval @InterfaceStability.Evolving case class ContinuousTrigger(intervalMs: Long) extends Trigger { require(intervalMs >= 0, "the interval of trigger should not be negative") } private[sql] object ContinuousTrigger { def apply(interval: String): ContinuousTrigger = { if (StringUtils.isBlank(interval)) { throw new IllegalArgumentException( "interval cannot be null or blank.") } val cal = if (interval.startsWith("interval")) { CalendarInterval.fromString(interval) } else { CalendarInterval.fromString("interval " + interval) } if (cal == null) { throw new IllegalArgumentException(s"Invalid interval: $interval") } if (cal.months > 0) { throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval") } new ContinuousTrigger(cal.microseconds / 1000) } def apply(interval: Duration): ContinuousTrigger = { ContinuousTrigger(interval.toMillis) } def create(interval: String): ContinuousTrigger = { apply(interval) } def create(interval: Long, unit: TimeUnit): ContinuousTrigger = { ContinuousTrigger(unit.toMillis(interval)) } }
Example 14
Source File: LiteralGenerator.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.scalacheck.{Arbitrary, Gen} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval object LiteralGenerator { lazy val byteLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType) lazy val shortLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType) lazy val integerLiteralGen: Gen[Literal] = for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType) lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) lazy val floatLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) } yield Literal.create(f, DoubleType) // TODO cache the generated data def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = { assert(scale >= 0) assert(precision >= scale) Arbitrary.arbBigInt.arbitrary.map { s => val a = (s % BigInt(10).pow(precision - scale)).toString() val b = (s % BigInt(10).pow(scale)).abs.toString() Literal.create( Decimal(BigDecimal(s"$a.$b"), precision, scale), DecimalType(precision, scale)) } } lazy val stringLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType) lazy val binaryLiteralGen: Gen[Literal] = for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) } yield Literal.create(ab.toArray, BinaryType) lazy val booleanLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType) lazy val dateLiteralGen: Gen[Literal] = for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType) lazy val timestampLiteralGen: Gen[Literal] = for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType) lazy val calendarIntervalLiterGen: Gen[Literal] = for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary} yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType) // Sometimes, it would be quite expensive when unlimited value is used, // for example, the `times` arguments for StringRepeat would hang the test 'forever' // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited // range is more reasonable lazy val limitedIntegerLiteralGen: Gen[Literal] = for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType) def randomGen(dt: DataType): Gen[Literal] = { dt match { case ByteType => byteLiteralGen case ShortType => shortLiteralGen case IntegerType => integerLiteralGen case LongType => longLiteralGen case DoubleType => doubleLiteralGen case FloatType => floatLiteralGen case DateType => dateLiteralGen case TimestampType => timestampLiteralGen case BooleanType => booleanLiteralGen case StringType => stringLiteralGen case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } }
Example 15
Source File: TimeWindow.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval case class TimeWindow( timeColumn: Expression, windowDuration: Long, slideDuration: Long, startTime: Long) extends UnaryExpression with ImplicitCastInputTypes with Unevaluable with NonSQLExpression { ////////////////////////// // SQL Constructors ////////////////////////// def this( timeColumn: Expression, windowDuration: Expression, slideDuration: Expression, startTime: Expression) = { this(timeColumn, TimeWindow.parseExpression(windowDuration), TimeWindow.parseExpression(slideDuration), TimeWindow.parseExpression(startTime)) } def this(timeColumn: Expression, windowDuration: Expression, slideDuration: Expression) = { this(timeColumn, TimeWindow.parseExpression(windowDuration), TimeWindow.parseExpression(slideDuration), 0) } def this(timeColumn: Expression, windowDuration: Expression) = { this(timeColumn, windowDuration, windowDuration) } override def child: Expression = timeColumn override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType) override def dataType: DataType = new StructType() .add(StructField("start", TimestampType)) .add(StructField("end", TimestampType)) // This expression is replaced in the analyzer. override lazy val resolved = false case class PreciseTimestampConversion( child: Expression, fromType: DataType, toType: DataType) extends UnaryExpression with ExpectsInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(fromType) override def dataType: DataType = toType override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) ev.copy(code = eval.code + code"""boolean ${ev.isNull} = ${eval.isNull}; |${CodeGenerator.javaType(dataType)} ${ev.value} = ${eval.value}; """.stripMargin) } override def nullSafeEval(input: Any): Any = input }
Example 16
Source File: EventTimeWatermark.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.types.MetadataBuilder import org.apache.spark.unsafe.types.CalendarInterval object EventTimeWatermark { case class EventTimeWatermark( eventTime: Attribute, delay: CalendarInterval, child: LogicalPlan) extends UnaryNode { // Update the metadata on the eventTime column to include the desired delay. override val output: Seq[Attribute] = child.output.map { a => if (a semanticEquals eventTime) { val delayMs = EventTimeWatermark.getDelayMs(delay) val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .putLong(EventTimeWatermark.delayKey, delayMs) .build() a.withMetadata(updatedMetadata) } else if (a.metadata.contains(EventTimeWatermark.delayKey)) { // Remove existing watermark val updatedMetadata = new MetadataBuilder() .withMetadata(a.metadata) .remove(EventTimeWatermark.delayKey) .build() a.withMetadata(updatedMetadata) } else { a } } }
Example 17
Source File: LiteralGenerator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.scalacheck.{Arbitrary, Gen} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval object LiteralGenerator { lazy val byteLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType) lazy val shortLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType) lazy val integerLiteralGen: Gen[Literal] = for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType) lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) lazy val floatLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) } yield Literal.create(f, DoubleType) // TODO cache the generated data def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = { assert(scale >= 0) assert(precision >= scale) Arbitrary.arbBigInt.arbitrary.map { s => val a = (s % BigInt(10).pow(precision - scale)).toString() val b = (s % BigInt(10).pow(scale)).abs.toString() Literal.create( Decimal(BigDecimal(s"$a.$b"), precision, scale), DecimalType(precision, scale)) } } lazy val stringLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType) lazy val binaryLiteralGen: Gen[Literal] = for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) } yield Literal.create(ab.toArray, BinaryType) lazy val booleanLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType) lazy val dateLiteralGen: Gen[Literal] = for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType) lazy val timestampLiteralGen: Gen[Literal] = for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType) lazy val calendarIntervalLiterGen: Gen[Literal] = for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary} yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType) // Sometimes, it would be quite expensive when unlimited value is used, // for example, the `times` arguments for StringRepeat would hang the test 'forever' // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited // range is more reasonable lazy val limitedIntegerLiteralGen: Gen[Literal] = for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType) def randomGen(dt: DataType): Gen[Literal] = { dt match { case ByteType => byteLiteralGen case ShortType => shortLiteralGen case IntegerType => integerLiteralGen case LongType => longLiteralGen case DoubleType => doubleLiteralGen case FloatType => floatLiteralGen case DateType => dateLiteralGen case TimestampType => timestampLiteralGen case BooleanType => booleanLiteralGen case StringType => stringLiteralGen case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } }