java.text.SimpleDateFormat Scala Example

Source File: JacksonMessageWriter.scala From drizzle-spark with Apache License 2.0

6 votes

package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
}

Source File: PMMLModelExport.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
}

Source File: SimpleDateParam.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
}

Source File: Train.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.models.vgg

import java.text.SimpleDateFormat
import java.util.Date

import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.dataset.DataSet
import com.intel.analytics.bigdl.dataset.image._
import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, Module}
import com.intel.analytics.bigdl.optim._
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, OptimizerV1, OptimizerV2, T, Table}
import com.intel.analytics.bigdl.visualization.{TrainSummary, ValidationSummary}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext

object Train {
  LoggerFilter.redirectSparkInfoLogs()


  import Utils._

  def main(args: Array[String]): Unit = {
    trainParser.parse(args, new TrainParams()).map(param => {
      val conf = Engine.createSparkConf().setAppName("Train Vgg on Cifar10")
        // Will throw exception without this config when has only one executor
          .set("spark.rpc.message.maxSize", "200")
      val sc = new SparkContext(conf)
      Engine.init

      val trainDataSet = DataSet.array(Utils.loadTrain(param.folder), sc) ->
        BytesToBGRImg() -> BGRImgNormalizer(trainMean, trainStd) ->
        BGRImgToBatch(param.batchSize)

      val model = if (param.modelSnapshot.isDefined) {
        Module.load[Float](param.modelSnapshot.get)
      } else {
        if (param.graphModel) VggForCifar10.graph(classNum = 10) else VggForCifar10(classNum = 10)
      }

      if (param.optimizerVersion.isDefined) {
        param.optimizerVersion.get.toLowerCase match {
          case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1)
          case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2)
        }
      }

      val optimMethod = if (param.stateSnapshot.isDefined) {
        OptimMethod.load[Float](param.stateSnapshot.get)
      } else {
        new SGD[Float](learningRate = param.learningRate, learningRateDecay = 0.0,
          weightDecay = param.weightDecay, momentum = 0.9, dampening = 0.0, nesterov = false,
          learningRateSchedule = SGD.EpochStep(25, 0.5))
      }

      val optimizer = Optimizer(
        model = model,
        dataset = trainDataSet,
        criterion = new ClassNLLCriterion[Float]()
      )

      val validateSet = DataSet.array(Utils.loadTest(param.folder), sc) ->
        BytesToBGRImg() -> BGRImgNormalizer(testMean, testStd) ->
        BGRImgToBatch(param.batchSize)

      if (param.checkpoint.isDefined) {
        optimizer.setCheckpoint(param.checkpoint.get, Trigger.everyEpoch)
      }

      if (param.overWriteCheckpoint) {
        optimizer.overWriteCheckpoint()
      }

      if (param.summaryPath.isDefined) {
        val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
        val timeStamp = sdf.format(new Date())
        val trainSummry = new TrainSummary(param.summaryPath.get,
          s"vgg-on-cifar10-train-$timeStamp")
        optimizer.setTrainSummary(trainSummry)
        val validationSummary = new ValidationSummary(param.summaryPath.get,
          s"vgg-on-cifar10-val-$timeStamp")
        optimizer.setValidationSummary(validationSummary)
      }

      optimizer
        .setValidation(Trigger.everyEpoch, validateSet, Array(new Top1Accuracy[Float]))
        .setOptimMethod(optimMethod)
        .setEndWhen(Trigger.maxEpoch(param.maxEpoch))
        .optimize()

      sc.stop()
    })
  }
}

Source File: WikiETL.scala From CarbonDataLearning with GNU General Public License v3.0

5 votes

package org.github.xubo245.carbonDataLearning.etl

import java.io.{File, PrintWriter}
import java.text.SimpleDateFormat
import java.util.Date

import scala.io.Source
import scala.util.Random

object WikiETL {
  def main(args: Array[String]): Unit = {
    val directory = "/root/xubo/data"
    val files = new File(directory)
    val out = new PrintWriter("/root/xubo/data/pageviews-20150505time")
    var flag:Int = 10000000;
    var typeMap= Map (("b","wikibooks")
      ,("d","wiktionary")
      ,("m","wikimedia")
      ,("mw","wikipedia mobile")
      ,("n","wikinews")
      ,("q","wikiquote")
      ,("s","wikisource")
      ,("v","wikiversity")
      ,("w","mediawiki"))

    for (file <- files.listFiles().sorted.filter(_.getCanonicalFile.getName.contains("pageviews-20150505-"))) {
      val filePath = file.getCanonicalPath
      println(filePath)
      //            val out = new PrintWriter(filePath + "WithTime")
      val reader = Source.fromFile(filePath)
      val fileName = file.getCanonicalFile.getName
      val delimiter = "\t"
      for (line <- reader.getLines()) {
        val stringBuffer = new StringBuffer()
        val random = new Random()
        val id = flag+random.nextInt(1000000)
        stringBuffer
          .append(id).append(delimiter)
          .append(fileName.substring(10, 14)).append(delimiter)
          .append(fileName.substring(14, 16)).append(delimiter)
          .append(fileName.substring(16, 18)).append(delimiter)
          .append(fileName.substring(19, 21)).append(delimiter)
        val array=line.mkString.split("\\s+")

        if (array.length == 4 && array(2).matches("[0-9]*") && !array(1).contains("\"")) {
          val domain = array(0).split('.')
          stringBuffer.append(domain(0)).append(delimiter)
          if (domain.length > 1) {
            var value: String = typeMap.getOrElse(domain(1), "wiki")
            stringBuffer.append(value).append(delimiter)
          } else {
            stringBuffer.append("wiki").append(delimiter)
          }
          val time = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
          val tid= id*10+random.nextInt(5)
          stringBuffer.append(array(1).replace('_',' ')).append(delimiter)
            .append(tid).append(delimiter)
            .append(array(2)).append(delimiter)
            .append(random.nextInt(100000)).append(delimiter)
            .append(time)

          //          for (i <- 0 until array.length-1){
          //            stringBuffer.append(array(i)).append(delimiter)
          //          }
          //          stringBuffer.append(array(array.length-1))

          //        if (array.length == 4 && array(2).matches("[0-9]*")) {
          //          id = id + 1
          out.println(stringBuffer.toString)
        }
      }
    }
    out.close()
  }
}

Source File: DataConverter.scala From spark-cdm with MIT License

5 votes

package com.microsoft.cdm.utils

import java.text.SimpleDateFormat
import java.util.{Locale, TimeZone}
import java.sql.Timestamp

import org.apache.commons.lang.time.DateUtils
import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String


class DataConverter() extends Serializable {

  val dateFormatter = new SimpleDateFormat(Constants.SINGLE_DATE_FORMAT)
  val timestampFormatter = TimestampFormatter(Constants.TIMESTAMP_FORMAT, TimeZone.getTimeZone("UTC"))


  val toSparkType: Map[CDMDataType.Value, DataType] = Map(
    CDMDataType.int64 -> LongType,
    CDMDataType.dateTime -> DateType,
    CDMDataType.string -> StringType,
    CDMDataType.double -> DoubleType,
    CDMDataType.decimal -> DecimalType(Constants.DECIMAL_PRECISION,0),
    CDMDataType.boolean -> BooleanType,
    CDMDataType.dateTimeOffset -> TimestampType
  )

  def jsonToData(dt: DataType, value: String): Any = {
    return dt match {
      case LongType => value.toLong
      case DoubleType => value.toDouble
      case DecimalType() => Decimal(value)
      case BooleanType => value.toBoolean
      case DateType => dateFormatter.parse(value)
      case TimestampType => timestampFormatter.parse(value)
      case _ => UTF8String.fromString(value)
    }
  }

  def toCdmType(dt: DataType): CDMDataType.Value = {
    return dt match {
      case IntegerType => CDMDataType.int64
      case LongType => CDMDataType.int64
      case DateType => CDMDataType.dateTime
      case StringType => CDMDataType.string
      case DoubleType => CDMDataType.double
      case DecimalType() => CDMDataType.decimal
      case BooleanType => CDMDataType.boolean
      case TimestampType => CDMDataType.dateTimeOffset
    }
  }  

  def dataToString(data: Any, dataType: DataType): String = {
    (dataType, data) match {
      case (_, null) => null
      case (DateType, _) => dateFormatter.format(data)
      case (TimestampType, v: Number) => timestampFormatter.format(data.asInstanceOf[Long])
      case _ => data.toString
    }
  }

}

Source File: CORSFilter.scala From daf-semantics with Apache License 2.0

5 votes

package it.almawave.kb.http.providers

import javax.ws.rs.container.ContainerRequestContext
import javax.ws.rs.container.ContainerResponseContext
import javax.ws.rs.container.ContainerResponseFilter
import javax.ws.rs.ext.Provider
import java.text.SimpleDateFormat
import java.util.Date
import java.net.URI

@Provider
class CORSFilter extends ContainerResponseFilter {

  override def filter(request: ContainerRequestContext, response: ContainerResponseContext) {

    val headers = response.getHeaders()

    headers.add("Access-Control-Allow-Origin", "*")
    headers.add("Access-Control-Allow-Headers", "origin, content-type, accept, authorization")
    headers.add("Access-Control-Allow-Credentials", "true")
    headers.add("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS, HEAD")

    // custom headers
    //    headers.add("Server", "Simple Jersey/Jetty HTTP server for RDF")
    //    headers.add("Pragma", "Pragma: no-cache")
    //    headers.add("Link", new URI("http://almawave.it"))

  }

}

Source File: EachRunNewFileAppender.scala From sddf with GNU General Public License v3.0

5 votes

package de.unihamburg.vsis.sddf.logging

import java.io.File
import java.text.SimpleDateFormat
import java.util.Date

import org.apache.log4j.FileAppender

import de.unihamburg.vsis.sddf.config.Config

class EachRunNewFileAppender extends FileAppender {
  override def setFile(fileName: String, append: Boolean, bufferedIO: Boolean, bufferSize: Int) = {
    val oldFile = new File(fileName)
    val dir = if (oldFile.isDirectory()) oldFile else oldFile.getParentFile
    val fileSuffix = if (oldFile.isDirectory()) ".log" else oldFile.getName
    val newFileName = EachRunNewFileAppender.runUuid + fileSuffix
    val newLogFile = new File(dir, newFileName)
    super.setFile(newLogFile.getPath, append, bufferedIO, bufferSize)
  }
}

object EachRunNewFileAppender {
  val dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
  val runUuid = System.currentTimeMillis().toString() + "-" + dateFormat.format(new Date())
}

Source File: DataWorkCloudEngineApplication.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine

import java.text.SimpleDateFormat
import java.util.Date

import com.webank.wedatasphere.linkis.DataWorkCloudApplication
import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.engine.conf.EngineConfiguration
import com.webank.wedatasphere.linkis.server.conf.ServerConfiguration
import org.apache.commons.lang.StringUtils
import org.slf4j.LoggerFactory


object DataWorkCloudEngineApplication {

  val userName:String = System.getProperty("user.name")
  val hostName:String = Utils.getComputerName
  val appName:String = EngineConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue
  val prefixName:String = EngineConfiguration.ENGINE_LOG_PREFIX.getValue
  val timeStamp:Long = System.currentTimeMillis()
  private val timeFormat = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss")
  private val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
  val time:String = timeFormat.format(new Date(timeStamp))
  val date:String = dateFormat.format(new Date(timeStamp))

  val isTimeStampSuffix:Boolean = "true".equalsIgnoreCase(EngineConfiguration.ENGINE_LOG_TIME_STAMP_SUFFIX.getValue)
  val shortLogFile:String =
    if (isTimeStampSuffix) appName + "_" + hostName + "_" + userName + "_"  + time + ".log"
    else appName + "_" + hostName + "_" + userName + ".log"
  val logName:String =
    if(isTimeStampSuffix) prefixName + "/" + userName + "/" + shortLogFile
    else prefixName + "/" + shortLogFile
  System.setProperty("engineLogFile", logName)
  System.setProperty("shortEngineLogFile", shortLogFile)
//  System.setProperty("engineLogFile", logName)
//  val context:LoggerContext = LogManager.getContext(false).asInstanceOf[LoggerContext]
//  val path:String = getClass.getResource("/").getPath
//  val log4j2XMLFile:File = new File(path + "/log4j2-engine.xml")
//  val configUri:URI = log4j2XMLFile.toURI
//  context.setConfigLocation(configUri)
  private val logger = LoggerFactory.getLogger(getClass)
  logger.info(s"Now log4j2 Rolling File is set to be $logName")
  logger.info(s"Now shortLogFile is set to be $shortLogFile")
  def main(args: Array[String]): Unit = {
    val parser = DWCArgumentsParser.parse(args)
    DWCArgumentsParser.setDWCOptionMap(parser.getDWCConfMap)
    val existsExcludePackages = ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.getValue
    if(StringUtils.isEmpty(existsExcludePackages))
      DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, "com.webank.wedatasphere.linkis.enginemanager")
    else
      DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, existsExcludePackages + ",com.webank.wedatasphere.linkis.enginemanager")
    DataWorkCloudApplication.main(DWCArgumentsParser.formatSpringOptions(parser.getSpringConfMap))
  }
}

Source File: ApplicationUtil.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.application.util

import java.text.SimpleDateFormat
import java.util.Date

import com.webank.wedatasphere.linkis.application.conf.{ApplicationConfiguration, ApplicationScalaConfiguration}


object ApplicationUtil {
  def getFlowsJson(user:String,date:Date):String ={
    val initExamplePath = ApplicationScalaConfiguration.INIT_EXAMPLE_PATH.getValue.toString + user + "/application/dataStudio/"
    val sqlName = ApplicationScalaConfiguration.INIT_EXAMPLE_SQL_NAME.getValue.toString
    val scalaName = ApplicationScalaConfiguration.INIT_EXAMPLE_SCALA_NAME.getValue.toString
    val spyName = ApplicationScalaConfiguration.INIT_EXAMPLE_SPY_NAME.getValue.toString
    val hqlName = ApplicationScalaConfiguration.INIT_EXAMPLE_HQL_NAME.getValue.toString
    val pythonName = ApplicationScalaConfiguration.INIT_EXAMPLE_PYTHON_NAME.getValue.toString
    val formateDate =  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(date)
    s"""[{"id":1,"name":"Default business process(默认业务流程)","createTime":"$formateDate","lastUpdateTime":"","description":"Default business process(默认业务流程)","version":"1.0.0","owner":"$user","canPublished":false,"params":{},"relations":[],"projectChildren":[],"flowChildren":[],"nodeChildren":{"dataExchange":[],"dataStudio":[{"id":1,"name":"$sqlName","type":"${sqlName.substring(sqlName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + sqlName}"}},{"id":2,"name":"$scalaName","type":"${scalaName.substring(scalaName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + scalaName}"}},{"id":3,"name":"$spyName","type":"${spyName.substring(spyName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + spyName}"}},{"id":4,"name":"$hqlName","type":"${hqlName.substring(hqlName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + hqlName}"}},{"id":5,"name":"$pythonName","type":"${pythonName.substring(pythonName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + pythonName}"}}],"dataBI":[],"resources":[]}}]"""
  }
}

Source File: DWSHttpClient.scala From Linkis with Apache License 2.0

5 votes

class DWSHttpClient(clientConfig: DWSClientConfig, clientName: String)
  extends AbstractHttpClient(clientConfig, clientName) {

  override protected def createDiscovery(): Discovery = new DWSGatewayDiscovery


  override protected def prepareAction(requestAction: HttpAction): HttpAction = {
    requestAction match {
      case dwsAction: DWSHttpAction => dwsAction.setDWSVersion(clientConfig.getDWSVersion)
      case _ =>
    }
    requestAction
  }

  override protected def httpResponseToResult(response: HttpResponse, requestAction: HttpAction, responseBody: String): Option[Result] = {
    var entity = response.getEntity
    val statusCode: Int = response.getStatusLine.getStatusCode
    val url: String = requestAction.getURL
    val contentType: String = entity.getContentType.getValue
    DWSHttpMessageFactory.getDWSHttpMessageResult(url).map { case DWSHttpMessageResultInfo(_, clazz) =>
      clazz match {
        case c if ClassUtils.isAssignable(c, classOf[DWSResult]) =>
          val dwsResult = clazz.getConstructor().newInstance().asInstanceOf[DWSResult]
          dwsResult.set(responseBody, statusCode, url, contentType)
          BeanUtils.populate(dwsResult, dwsResult.getData)
          return Some(dwsResult)
        case _ =>
      }

      def transfer(value: Result, map: Map[String, Object]): Unit = {
        value match {
          case httpResult: HttpResult =>
            httpResult.set(responseBody, statusCode, url, contentType)
          case _ =>
        }
        val javaMap = mapAsJavaMap(map)
        BeanUtils.populate(value, javaMap)
        fillResultFields(javaMap, value)
      }
      deserializeResponseBody(response) match {
        case map: Map[String, Object] =>
          val value = clazz.getConstructor().newInstance().asInstanceOf[Result]
          transfer(value, map)
          value
        case list: List[Map[String, Object]] =>
          val results = list.map { map =>
            val value = clazz.getConstructor().newInstance().asInstanceOf[Result]
            transfer(value, map)
            value
          }.toArray
          new ListResult(responseBody, results)
      }
    }.orElse(nonDWSResponseToResult(response, requestAction))
  }

  protected def nonDWSResponseToResult(response: HttpResponse, requestAction: HttpAction): Option[Result] = None

  protected def fillResultFields(responseMap: util.Map[String, Object], value: Result): Unit = {}

  //TODO Consistent with workspace, plus expiration time(与workspace保持一致，加上过期时间)
  override protected def getFsByUser(user: String, path: FsPath): Fs = FSFactory.getFsByProxyUser(path, user)

}
object DWSHttpClient {
  val jacksonJson = new ObjectMapper().setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"))
}

Source File: TypeCast.scala From spark-select with Apache License 2.0

5 votes

package io.minio.spark.select.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.{SimpleDateFormat, NumberFormat}
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try


  @throws[IllegalArgumentException]
  private[select] def toChar(str: String): Char = {
    if (str.charAt(0) == '\\') {
      str.charAt(1)
      match {
        case 't' => '\t'
        case 'r' => '\r'
        case 'b' => '\b'
        case 'f' => '\f'
        case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
        case '\'' => '\''
        case 'u' if str == """\u0000""" => '\u0000'
        case _ =>
          throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
      }
    } else if (str.length == 1) {
      str.charAt(0)
    } else {
      throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
    }
  }
}

Source File: Authorize.scala From keycloak-benchmark with Apache License 2.0

5 votes

package io.gatling.keycloak

import java.text.SimpleDateFormat
import java.util.{Date, Collections}

import akka.actor.ActorDSL.actor
import akka.actor.ActorRef
import io.gatling.core.action.Interruptable
import io.gatling.core.action.builder.ActionBuilder
import io.gatling.core.config.Protocols
import io.gatling.core.result.writer.DataWriterClient
import io.gatling.core.session._
import io.gatling.core.validation._
import org.jboss.logging.Logger
import org.keycloak.adapters.spi.AuthOutcome
import org.keycloak.adapters.KeycloakDeploymentBuilder
import org.keycloak.adapters.spi.HttpFacade.Cookie
import org.keycloak.common.enums.SslRequired
import org.keycloak.representations.adapters.config.AdapterConfig

import scala.collection.JavaConverters._

case class AuthorizeAttributes(
  requestName: Expression[String],
  uri: Expression[String],
  cookies: Expression[List[Cookie]],
  sslRequired: SslRequired = SslRequired.EXTERNAL,
  resource: String = null,
  password: String = null,
  realm: String = null,
  realmKey: String = null,
  authServerUrl: Expression[String] = _ => Failure("no server url")
) {
  def toAdapterConfig(session: Session) = {
    val adapterConfig = new AdapterConfig
    adapterConfig.setSslRequired(sslRequired.toString)
    adapterConfig.setResource(resource)
    adapterConfig.setCredentials(Collections.singletonMap("secret", password))
    adapterConfig.setRealm(realm)
    adapterConfig.setRealmKey(realmKey)
    adapterConfig.setAuthServerUrl(authServerUrl(session).get)
    adapterConfig
  }
}

class AuthorizeActionBuilder(attributes: AuthorizeAttributes) extends ActionBuilder {
  def newInstance(attributes: AuthorizeAttributes) = new AuthorizeActionBuilder(attributes)

  def sslRequired(sslRequired: SslRequired) = newInstance(attributes.copy(sslRequired = sslRequired))
  def resource(resource: String) = newInstance(attributes.copy(resource = resource))
  def clientCredentials(password: String) = newInstance(attributes.copy(password = password))
  def realm(realm: String) = newInstance(attributes.copy(realm = realm))
  def realmKey(realmKey: String) = newInstance(attributes.copy(realmKey = realmKey))
  def authServerUrl(authServerUrl: Expression[String]) = newInstance(attributes.copy(authServerUrl = authServerUrl))

  override def build(next: ActorRef, protocols: Protocols): ActorRef = {
    actor(actorName("authorize"))(new AuthorizeAction(attributes, next))
  }
}

object AuthorizeAction {
  val logger = Logger.getLogger(classOf[AuthorizeAction])
}

class AuthorizeAction(
                       attributes: AuthorizeAttributes,
                       val next: ActorRef
                     ) extends Interruptable with ExitOnFailure with DataWriterClient {
  override def executeOrFail(session: Session): Validation[_] = {
    val facade = new MockHttpFacade()
    val deployment = KeycloakDeploymentBuilder.build(attributes.toAdapterConfig(session));
    facade.request.setURI(attributes.uri(session).get);
    facade.request.setCookies(attributes.cookies(session).get.map(c => (c.getName, c)).toMap.asJava)
    var nextSession = session
    val requestAuth: MockRequestAuthenticator = session(MockRequestAuthenticator.KEY).asOption[MockRequestAuthenticator] match {
      case Some(ra) => ra
      case None =>
        val tmp = new MockRequestAuthenticator(facade, deployment, new MockTokenStore, -1, session.userId)
        nextSession = session.set(MockRequestAuthenticator.KEY, tmp)
        tmp
    }

    Blocking(() => {
      AuthorizeAction.logger.debugf("%s: Authenticating %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit)
      Stopwatch(() => requestAuth.authenticate())
        .check(result => result == AuthOutcome.AUTHENTICATED, result => {
          AuthorizeAction.logger.warnf("%s: Failed auth %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit)
          result.toString
        })
        .recordAndContinue(AuthorizeAction.this, nextSession, attributes.requestName(session).get)
    })
  }
}

Source File: SessionServlet.scala From jboss-wildfly-test with Apache License 2.0

5 votes

package servlet

import java.text.SimpleDateFormat
import java.util.Date
import javax.servlet.annotation._
import javax.servlet.http._

@WebServlet(value = Array("/SessionServlet"))
class SessionServlet extends HttpServlet {

  def formatTime(timestamp: Long): String = {
    val sdf = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.SSS")
    sdf.format(new Date(timestamp))
  }

  override def doGet(request: HttpServletRequest, response: HttpServletResponse) {
    response.setContentType("text/html")
    response.setCharacterEncoding("UTF-8")

    val out = response.getWriter
    out.println("<h3>Session Test Example</h3>")

    val session = request.getSession(true)
    out.println(
      s"""
        |Session Id: ${session.getId} <br/>
        |Created: ${formatTime(session.getCreationTime)} <br/>
        |Last Accessed: ${formatTime(session.getLastAccessedTime)} <br/>
      """.stripMargin)

    Option(request.getParameter("dataname")).foreach { dataName ⇒
      Option(request.getParameter("datavalue")).foreach { dataValue ⇒
        session.setAttribute(dataName, dataValue);
      }
    }

    import scala.collection.JavaConversions._
    val xs = session.getAttributeNames
    val sessionDataString = xs.map(name ⇒ s"$name = ${session.getAttribute(name)}").mkString("<br/>")
    out.println(
      s"""
        |<p>
        |The following data is in your session: <br/><br/>
        |$sessionDataString
        |</p>
        |
        |<p>
        |POST based form <br/>
        |<form action='${response.encodeURL("SessionServlet")}' method='post'>
        | Name of session attribute: <input type='text' size='20' name='dataname'/><br/>
        | Value of session attribute: <input type='text' size='20' name='datavalue'/><br/>
        | <input type='submit'/>
        |</form>
        |</p>
        |
        |<p>
        |GET based form <br/>
        |<form action='${response.encodeURL("SessionServlet")}' method='get'>
        | Name of session attribute: <input type='text' size='20' name='dataname'/><br/>
        | Value of session attribute: <input type='text' size='20' name='datavalue'/><br/>
        | <input type='submit'/>
        |</form>
        |</p>
        |
        |<p><a href='${response.encodeURL("SessionServlet?dataname=foo&datavalue=bar")}'>URL encoded</a>
      """.stripMargin)

    out.close()
  }

  override def doPost(req: HttpServletRequest, resp: HttpServletResponse): Unit =
    doGet(req, resp)
}

Source File: Total.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream_actor

import java.text.SimpleDateFormat
import java.util.{Date, TimeZone}

import akka.Done
import akka.actor.Actor
import sample.stream_actor.Total.Increment

object Total {
  case class Increment(value: Long, avg: Double, id: String)
}

class Total extends Actor {
  var total: Long = 0

  override def receive: Receive = {
    case Increment(value, avg, id) =>
      println(s"Received $value new measurements from turbine with id: $id -  Avg wind speed is: $avg")
      total = total + value

      val date = new Date()
      val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
      df.setTimeZone(TimeZone.getTimeZone("Europe/Zurich"))

      println(s"${df.format(date) } - Current total of all measurements: $total")
      sender ! Done
  }
}

Source File: Warn.scala From spatial with MIT License

5 votes

package emul

import java.text.SimpleDateFormat
import java.util.Calendar
import java.io.PrintStream

object Warn {
  val now = Calendar.getInstance().getTime
  val fmt = new SimpleDateFormat("dd_MM_yyyy_hh_mm_aa")
  val timestamp = fmt.format(now)
  var warns: Int = 0

  lazy val log = new PrintStream(timestamp + ".log")
  def apply(x: => String): Unit = {
    log.println(x)
    warns += 1
  }
  def close(): Unit = {
    if (warns > 0) {
      println(Warn.warns + " warnings occurred during program execution. See " + Warn.timestamp + ".log for details")
      log.close()
    }
  }
}

Source File: Worker.scala From EncryCore with GNU General Public License v3.0

5 votes

package encry.local.miner

import java.util.Date

import akka.actor.{Actor, ActorRef}
import encry.EncryApp._

import scala.concurrent.duration._
import encry.consensus.{CandidateBlock, ConsensusSchemeReaders}
import encry.local.miner.Miner.MinedBlock
import encry.local.miner.Worker.{MineBlock, NextChallenge}
import java.text.SimpleDateFormat

import com.typesafe.scalalogging.StrictLogging
import org.encryfoundation.common.utils.constants.TestNetConstants

class Worker(myIdx: Int, numberOfWorkers: Int, miner: ActorRef) extends Actor with StrictLogging {

  val sdf: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss")
  var challengeStartTime: Date = new Date(System.currentTimeMillis())

  val initialNonce: Long = Long.MaxValue / numberOfWorkers * myIdx

  override def preRestart(reason: Throwable, message: Option[Any]): Unit =
    logger.warn(s"Worker $myIdx is restarting because of: $reason")

  override def receive: Receive = {
    case MineBlock(candidate: CandidateBlock, nonce: Long) =>
      logger.info(s"Trying nonce: $nonce. Start nonce is: $initialNonce. " +
        s"Iter qty: ${nonce - initialNonce + 1} on worker: $myIdx with diff: ${candidate.difficulty}")
      ConsensusSchemeReaders
        .consensusScheme.verifyCandidate(candidate, nonce)
        .fold(
          e => {
            self ! MineBlock(candidate, nonce + 1)
            logger.info(s"Mining failed cause: $e")
          },
          block => {
            logger.info(s"New block is found: (${block.header.height}, ${block.header.encodedId}, ${block.payload.txs.size} " +
              s"on worker $self at ${sdf.format(new Date(System.currentTimeMillis()))}. Iter qty: ${nonce - initialNonce + 1}")
            miner ! MinedBlock(block, myIdx)
          })
    case NextChallenge(candidate: CandidateBlock) =>
      challengeStartTime = new Date(System.currentTimeMillis())
      logger.info(s"Start next challenge on worker: $myIdx at height " +
        s"${candidate.parentOpt.map(_.height + 1).getOrElse(TestNetConstants.PreGenesisHeight.toString)} at ${sdf.format(challengeStartTime)}")
      self ! MineBlock(candidate, Long.MaxValue / numberOfWorkers * myIdx)
  }

}

object Worker {

  case class NextChallenge(candidateBlock: CandidateBlock)

  case class MineBlock(candidateBlock: CandidateBlock, nonce: Long)

}

Source File: ISODateConverter.scala From hydra with Apache License 2.0

5 votes

package hydra.avro.convert

import java.text.SimpleDateFormat
import java.time._

import hydra.common.logging.LoggingAdapter
import org.apache.avro.{Conversion, LogicalType, Schema}

import scala.util.Try


class ISODateConverter extends Conversion[ZonedDateTime] with LoggingAdapter {

  private val utc = ZoneOffset.UTC

  override def getLogicalTypeName: String = IsoDate.IsoDateLogicalTypeName

  override def getConvertedType: Class[ZonedDateTime] = classOf[ZonedDateTime]

  private val simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX")

  override def fromCharSequence(
      value: CharSequence,
      schema: Schema,
      `type`: LogicalType
  ): ZonedDateTime = {
    Try(OffsetDateTime.parse(value).toInstant)
      .orElse {
        Try(LocalDateTime.parse(value).toInstant(ZoneOffset.UTC))
      }
      .orElse {
        Try(simpleDateFormat.parse(value.toString).toInstant)
      }
      .recover {
        case e: Throwable =>
          log.error(e.getMessage, e)
          Instant.EPOCH
      }
      .map(_.atZone(utc))
      .get
  }
}

object IsoDate extends LogicalType("iso-datetime") {
  val IsoDateLogicalTypeName = "iso-datetime"

  override def validate(schema: Schema): Unit = {
    if (schema.getType() != Schema.Type.STRING) {
      throw new IllegalArgumentException(
        "Iso-datetime can only be used with an underlying string type"
      )
    }
  }
}

Source File: Trip.scala From gihyo-spark-book-example with Apache License 2.0

5 votes

package jp.gihyo.spark.ch05

import java.text.SimpleDateFormat

case class Trip(id: Int, duration: Int,
  startDate: java.sql.Timestamp, startStation: String, startTerminal: Int,
  endDate: java.sql.Timestamp, endStation: String, endTerminal: Int,
  bikeNum: Int, subscriberType: String, zipcode: String)

object Trip {

  def parse(line: String): Trip = {
    val dateFormat = new SimpleDateFormat("MM/dd/yyy HH:mm")
    val elms = line.split(",")

    val id = elms(0).toInt
    val duration = elms(1).toInt
    val startDate = new java.sql.Timestamp(dateFormat.parse(elms(2)).getTime)
    val startStation = elms(3)
    val startTerminal = elms(4).toInt
    val endDate = new java.sql.Timestamp(dateFormat.parse(elms(5)).getTime)
    val endStation = elms(6)
    val endTerminal = elms(7).toInt
    val bikeNum = elms(8).toInt
    val subscriberType = elms(9)
    val zipcode = elms(10)
    Trip(id, duration,
      startDate, startStation, startTerminal,
      endDate, endStation, endTerminal,
      bikeNum, subscriberType, zipcode)
  }
}

Source File: Station.scala From gihyo-spark-book-example with Apache License 2.0

5 votes

package jp.gihyo.spark.ch05

import java.text.SimpleDateFormat

case class Station(id: Int, name: String, lat: Double, lon: Double,
    dockcount: Int, landmark: String, installation: java.sql.Date)

object Station {

  def parse(line: String): Station = {
    val dateFormat = new SimpleDateFormat("MM/dd/yyy")

    val elms = line.split(",")
    val id = elms(0).toInt
    val name = elms(1)
    val lat = elms(2).toDouble
    val lon = elms(3).toDouble
    val dockcount = elms(4).toInt
    val landmark = elms(5)
    val parsedInstallation = dateFormat.parse(elms(6))
    val installation = new java.sql.Date(parsedInstallation.getTime)
    Station(id, name, lat, lon, dockcount, landmark, installation)
  }
}

Source File: StationSuite.scala From gihyo-spark-book-example with Apache License 2.0

5 votes

package jp.gihyo.spark.ch05

import java.sql.Timestamp
import java.text.SimpleDateFormat

import org.scalatest.FunSuite

class StationSuite extends FunSuite {

  test("should be parse") {
    val line = "2,San Jose Diridon Caltrain Station,37.329732,-121.901782,27,San Jose,8/6/2013"
    val station = Station.parse(line)

    val dateFormat = new SimpleDateFormat("MM/dd/yyy")
    assert(station.id === 2)
    assert(station.name === "San Jose Diridon Caltrain Station")
    assert(station.lat === 37.329732)
    assert(station.lon === -121.901782)
    assert(station.dockcount === 27)
    assert(station.landmark === "San Jose")
    assert(station.installation === new Timestamp(dateFormat.parse("8/6/2013").getTime))
  }
}

Source File: IOUtils.scala From watr-works with Apache License 2.0

5 votes

package edu.umass.cs.iesl.watr
package utils

object PathUtils {

  import ammonite.{ops => fs}

  import java.nio.{file => nio}

  def appendTimestamp(path: String): String = {
    import java.text.SimpleDateFormat
    import java.util.Date
    val dateStamp = new SimpleDateFormat("yyyyMMddhhmmss").format(new Date())
    s"$path-$dateStamp"
  }

  def nioToAmm(nioPath: nio.Path): fs.Path = {
    fs.FilePath(nioPath) match {
      case p: fs.Path =>  p
      case p: fs.RelPath => fs.pwd / p
      case _ => ???
    }
  }

  def strToAmmPath(str: String): fs.Path = {
    fs.FilePath(str) match {
      case p: fs.Path =>  p
      case p: fs.RelPath => fs.pwd / p
      case _ => ???
    }
  }

  implicit class RicherPathUtils_String(val self: String) extends AnyVal {

    def toPath(): fs.Path = {
      strToAmmPath(self)
    }
  }

  implicit class RicherPathUtils_NioPath(val self: nio.Path) extends AnyVal {

    def toFsPath(): fs.Path = {
      nioToAmm(self)
    }
  }

}

Source File: GenericMainClass.scala From darwin with Apache License 2.0

5 votes

package it.agilelab.darwin.app.spark

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.config.{Config, ConfigFactory}
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._

trait GenericMainClass {
  self: SparkManager =>

  val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager")

  private def makeFileSystem(session: SparkSession): FileSystem = {
    if (session.sparkContext.isLocal) {
      FileSystem.getLocal(session.sparkContext.hadoopConfiguration)
    }
    else {
      FileSystem.get(session.sparkContext.hadoopConfiguration)
    }
  }


  
  // scalastyle:off
  private def getGlobalConfig: Config = {
    genericMainClassLogger.debug("system environment vars")
    for ((k, v) <- System.getenv().asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    genericMainClassLogger.debug("system properties")
    for ((k, v) <- System.getProperties.asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    ConfigFactory.load()
  }

  // scalastyle:on

}

Source File: MetricsReporter.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.text.SimpleDateFormat

import com.codahale.metrics.{Gauge, MetricRegistry}

import org.apache.spark.internal.Logging
import org.apache.spark.metrics.source.{Source => CodahaleSource}
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.streaming.StreamingQueryProgress


class MetricsReporter(
    stream: StreamExecution,
    override val sourceName: String) extends CodahaleSource with Logging {

  override val metricRegistry: MetricRegistry = new MetricRegistry

  // Metric names should not have . in them, so that all the metrics of a query are identified
  // together in Ganglia as a single metric group
  registerGauge("inputRate-total", _.inputRowsPerSecond, 0.0)
  registerGauge("processingRate-total", _.processedRowsPerSecond, 0.0)
  registerGauge("latency", _.durationMs.get("triggerExecution").longValue(), 0L)

  private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
  timestampFormat.setTimeZone(DateTimeUtils.getTimeZone("UTC"))

  registerGauge("eventTime-watermark",
    progress => convertStringDateToMillis(progress.eventTime.get("watermark")), 0L)

  registerGauge("states-rowsTotal", _.stateOperators.map(_.numRowsTotal).sum, 0L)
  registerGauge("states-usedBytes", _.stateOperators.map(_.memoryUsedBytes).sum, 0L)

  private def convertStringDateToMillis(isoUtcDateStr: String) = {
    if (isoUtcDateStr != null) {
      timestampFormat.parse(isoUtcDateStr).getTime
    } else {
      0L
    }
  }

  private def registerGauge[T](
      name: String,
      f: StreamingQueryProgress => T,
      default: T): Unit = {
    synchronized {
      metricRegistry.register(name, new Gauge[T] {
        override def getValue: T = Option(stream.lastProgress).map(f).getOrElse(default)
      })
    }
  }
}

Source File: ExecutorNumListener.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.monitor

import java.text.SimpleDateFormat
import java.util
import java.util.Date
import java.util.concurrent.atomic.AtomicBoolean

import com.fasterxml.jackson.annotation.JsonIgnore

import org.apache.spark.SparkContext
import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.{
  SparkListener,
  SparkListenerExecutorAdded,
  SparkListenerExecutorRemoved
}
import org.apache.spark.util.kvstore.KVIndex

class ExecutorNumListener extends SparkListener with Logging {

  lazy val kvstore = SparkContext.getActive.get.statusStore.store
  var initialized: AtomicBoolean = new AtomicBoolean(false)
  var lastPointTime: Long = new Date().getTime
  var recentEventTime: Long = new Date().getTime
  private val liveExecutors = new util.HashSet[String]()

  def initialize(): Unit = {
    SparkContext.getActive.map(_.ui).flatten.foreach {
      case ui =>
        ui.attachTab(new ExecutorNumTab(ui))
        ui.addStaticHandler("static", "/static/special")
    }
  }

  def maybeAddPoint(time: Long): Unit = {
    if (!initialized.get) {
      initialize()
      initialized.compareAndSet(false, true)
    }
    if (time - lastPointTime > 20 * 1000) {
      addPoint(recentEventTime)
      addPoint(time)
      lastPointTime = time
    }
    recentEventTime = time
  }
  def addPoint(time: Long): Unit = {
    val executorNum = liveExecutors.size
    kvstore.write(new ExecutorNumWrapper(new ExecutorNum(
      s"own ${executorNum} executors at ${new SimpleDateFormat("HH:mm:ss").format(new Date(time))}",
      IndexedSeq(time, executorNum))))
  }

  override def onExecutorAdded(event: SparkListenerExecutorAdded): Unit = {
    liveExecutors.add(event.executorId)
    maybeAddPoint(event.time)
  }

  override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = {
    liveExecutors.remove(event.executorId)
    maybeAddPoint(event.time)
  }

}

private[spark] class ExecutorNumWrapper(val point: ExecutorNum) {
  @JsonIgnore @KVIndex
  def id: Long = point.value(0)
}

private[spark] class ExecutorNum(val name: String, val value: IndexedSeq[Long])

Source File: ApplicationMonitor.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.monitor.application

import java.sql.{Connection, Timestamp}
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.spark.alarm.AlertMessage
import org.apache.spark.alarm.AlertType._
import org.apache.spark.monitor.Monitor
import org.apache.spark.monitor.MonitorItem.MonitorItem

abstract class ApplicationMonitor extends Monitor {
  override val alertType = Seq(Application)
}

class ApplicationInfo(
    title: MonitorItem,
    appName: String,
    appId: String,
    md5: String,
    startTime: Date,
    duration: Long,
    appUiUrl: String,
    historyUrl: String,
    eventLogDir: String,
    minExecutor: Int,
    maxExecutor: Int,
    executorCore: Int,
    executorMemoryMB: Long,
    executorAccu: Double,
    user: String)
  extends AlertMessage(title) {
  override def toCsv(): String = {
    s"${user},${appId}," +
      s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}," +
      s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}," +
      s"${executorMemoryMB},${executorCore},${executorAccu.formatted("%.2f")},${appName}"
  }
  // scalastyle:off
  override def toHtml(): String = {
    val html = <h1>任务完成! </h1>
        <h2>任务信息 </h2>
        <ul>
          <li>作业名：{appName}</li>
          <li>作业ID：{appId}</li>
          <li>开始时间：{new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}</li>
          <li>任务用时：{Duration(duration, TimeUnit.MILLISECONDS).toSeconds} s</li>
        </ul>
        <h2>资源用量</h2>
        <ul>
          <li>Executor个数：{minExecutor}~{maxExecutor}</li>
          <li>Executor内存：{executorMemoryMB} MB</li>
          <li>Executor核数：{executorCore}</li>
          <li>Executor累积用量：{executorAccu.formatted("%.2f")} executor*min</li>
        </ul>
        <h2>调试信息</h2>
        <ul>
          <li>回看链接1：<a href={appUiUrl.split(",").head}>{appUiUrl.split(",").head}</a></li>
          <li>回看链接2：<a href={historyUrl}>{historyUrl}</a></li>
          <li>日志文件所在目录：{eventLogDir}</li>
        </ul>
    html.mkString
  }

  override def toJdbc(conn: Connection, appId: String): Unit = {
    val query = "INSERT INTO `xsql_monitor`.`spark_history`(" +
      "`user`, `md5`, `appId`, `startTime`, `duration`, " +
      "`yarnURL`, `sparkHistoryURL`, `eventLogDir`, `coresPerExecutor`, `memoryPerExecutorMB`," +
      " `executorAcc`, `appName`, `minExecutors`, `maxExecutors`)" +
      " SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? FROM DUAL" +
      " WHERE NOT EXISTS (SELECT * FROM `xsql_monitor`.`spark_history` WHERE `appId` = ?);"

    val preparedStmt = conn.prepareStatement(query)
    preparedStmt.setString(1, user)
    preparedStmt.setString(2, md5)
    preparedStmt.setString(3, appId)
    preparedStmt.setTimestamp(4, new Timestamp(startTime.getTime))
    preparedStmt.setLong(5, Duration(duration, TimeUnit.MILLISECONDS).toSeconds)
    preparedStmt.setString(6, appUiUrl)
    preparedStmt.setString(7, historyUrl)
    preparedStmt.setString(8, eventLogDir)
    preparedStmt.setInt(9, executorCore)
    preparedStmt.setLong(10, executorMemoryMB)
    preparedStmt.setDouble(11, executorAccu)
    preparedStmt.setString(12, appName)
    preparedStmt.setInt(13, minExecutor)
    preparedStmt.setInt(14, maxExecutor)
    preparedStmt.setString(15, appId)
    preparedStmt.execute
  }
}

Source File: SQLMonitor.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.monitor.sql

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.spark.alarm.AlertMessage
import org.apache.spark.alarm.AlertType._
import org.apache.spark.monitor.Monitor
import org.apache.spark.monitor.MonitorItem.MonitorItem

abstract class SQLMonitor extends Monitor {
  override val alertType = Seq(SQL)

}

class SQLInfo(
    title: MonitorItem,
    sqlId: String,
    aeFlag: Boolean,
    appId: String,
    executionId: Long,
    submissionTime: Date,
    duration: Long)
  extends AlertMessage(title) {
  override def toCsv(): String = {
    s"${sqlId},${aeFlag},${appId},${executionId}," +
      s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(submissionTime)}," +
      s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}"
  }

}

Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0

5 votes

package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

}

Source File: BenchmarkUtil.scala From CodeAnalyzerTutorial with Apache License 2.0

5 votes

package tutor.utils

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.scalalogging.StrictLogging

object BenchmarkUtil extends StrictLogging {
  def record[T](actionDesc: String)(action: => T): T = {
    val beginTime = new Date
    logger.info(s"begin $actionDesc")
    val rs = action
    logger.info(s"end $actionDesc")
    val endTime = new Date
    val elapsed = new Date(endTime.getTime - beginTime.getTime)
    val sdf = new SimpleDateFormat("mm:ss.SSS")
    logger.info(s"$actionDesc total elapsed ${sdf.format(elapsed)}")
    rs
  }
  def recordStart(actionDesc: String):Date = {
    logger.info(s"$actionDesc begin")
    new Date
  }

  def recordElapse(actionDesc: String, beginFrom: Date):Unit = {
    logger.info(s"$actionDesc ended")
    val endTime = new Date
    val elapsed = new Date(endTime.getTime - beginFrom.getTime)
    val sdf = new SimpleDateFormat("mm:ss.SSS")
    logger.info(s"$actionDesc total elapsed ${sdf.format(elapsed)}")
  }
}

Source File: L3-DStreamMapping.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditMappingApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditMappingApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val sdf = new SimpleDateFormat("yyyy-MM-dd")
    val tsKey = "created_utc"
    val secs = 1000L
    val keyedByDay = comments.map(rec => {
      val ts = (parse(rec) \ tsKey).values
      (sdf.format(new Date(ts.toString.toLong * secs)), rec)
    })

    val keyedByDayPart = comments.mapPartitions(iter => {
      var ret = List[(String, String)]()
      while (iter.hasNext) {
        val rec = iter.next
        val ts = (parse(rec) \ tsKey).values
        ret.::=(sdf.format(new Date(ts.toString.toLong * secs)), rec)
      }
      ret.iterator
    })

    val wordTokens = comments.map(rec => {
      ((parse(rec) \ "body")).values.toString.split(" ")
    })

    val wordTokensFlat = comments.flatMap(rec => {
      ((parse(rec) \ "body")).values.toString.split(" ")
    })

    val filterSubreddit = comments.filter(rec =>
      (parse(rec) \ "subreddit").values.toString.equals("AskReddit"))

    val sortedByAuthor = comments.transform(rdd =>
      (rdd.sortBy(rec => (parse(rec) \ "author").values.toString)))

    ssc.start()
    ssc.awaitTermination()

  }
}

Source File: L3-DStreamKeyValue.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.HashPartitioner

object RedditKeyValueApp {
  def main(args: Array[String]) {
    if (args.length != 3) {
      System.err.println(
        "Usage: RedditKeyValueApp <appname> <input_path> <input_path_popular>")
      System.exit(1)
    }
    val Seq(appName, inputPath, inputPathPopular) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val popular = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPathPopular, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val topAuthors = comments.map(rec => ((parse(rec) \ "author").values.toString, 1))
      .groupByKey()
      .map(r => (r._2.sum, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val topAuthors2 = comments.map(rec => ((parse(rec) \ "author").values.toString, 1))
      .reduceByKey(_ + _)
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val topAuthorsByAvgContent = comments.map(rec => ((parse(rec) \ "author").values.toString, (parse(rec) \ "body").values.toString.split(" ").length))
      .combineByKey(
        (v) => (v, 1),
        (accValue: (Int, Int), v) => (accValue._1 + v, accValue._2 + 1),
        (accCombine1: (Int, Int), accCombine2: (Int, Int)) => (accCombine1._1 + accCombine2._1, accCombine1._2 + accCombine2._2),
        new HashPartitioner(ssc.sparkContext.defaultParallelism))
      .map({ case (k, v) => (k, v._1 / v._2.toFloat) })
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val keyedBySubreddit = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, rec))
    val keyedBySubreddit2 = popular.map(rec => ({
      val t = rec.split(",")
      (t(1).split("/")(4), t(0))
    }))
    val commentsWithIndustry = keyedBySubreddit.join(keyedBySubreddit2)

    val keyedBySubredditCo = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, rec))
    val keyedBySubredditCo2 = popular.map(rec => ({
      val t = rec.split(",")
      (t(1).split("/")(4), t(0))
    }))
    val commentsWithIndustryCo = keyedBySubreddit.cogroup(keyedBySubreddit2)

    val checkpointPath = "/tmp"
    ssc.checkpoint(checkpointPath)
    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
      val currentCount = values.sum
      val previousCount = state.getOrElse(0)
      Some(currentCount + previousCount)
    }
    val keyedBySubredditState = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, 1))
    val globalCount = keyedBySubredditState.updateStateByKey(updateFunc)
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    ssc.start()
    ssc.awaitTermination()

  }
}

Source File: L3-DStreamVariation.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditVariationApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditVariationApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val merged = comments.union(comments)

    val repartitionedComments = comments.repartition(4)

    val rddMin = comments.glom().map(arr =>
      arr.minBy(rec => ((parse(rec) \ "created_utc").values.toString.toInt)))

    ssc.start()
    ssc.awaitTermination()

  }
}

Source File: L3-DStreamWindowAndAction.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.HashPartitioner

object RedditWindowAndActionApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditWindowAndActionApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val checkpointPath = "/tmp"
    ssc.checkpoint(checkpointPath)
    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
      val currentCount = values.sum
      val previousCount = state.getOrElse(0)
      Some(currentCount + previousCount)
    }
    val keyedBySubredditState = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, 1))
    val globalCount = keyedBySubredditState.updateStateByKey(updateFunc)
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val distinctSubreddits = comments.map(rec => ((parse(rec)) \ "subreddit").values.toString)
    val windowedRecs = distinctSubreddits.window(Seconds(5), Seconds(5))
    val windowedCounts = windowedRecs.countByValue()

    windowedCounts.print(10)
    windowedCounts.saveAsObjectFiles("subreddit", "obj")
    windowedCounts.saveAsTextFiles("subreddit", "txt")

    globalCount.saveAsHadoopFiles("subreddit", "hadoop",
      classOf[IntWritable], classOf[Text], classOf[TextOutputFormat[IntWritable, Text]])
    globalCount.saveAsNewAPIHadoopFiles("subreddit", "newhadoop",
      classOf[IntWritable], classOf[Text], classOf[NewTextOutputFormat[IntWritable, Text]])
    comments.foreachRDD(rdd => {
      LOG.info("RDD: %s, Count: %d".format(rdd.id, rdd.count()))
    })

    ssc.start()
    ssc.awaitTermination()

  }
}

Source File: L3-DStreamAggregation.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditAggregationApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditAggregationApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val recCount = comments.count()

    val recCountValue = comments.countByValue()

    val totalWords = comments.map(rec => ((parse(rec) \ "body").values.toString))
      .flatMap(body => body.split(" "))
      .map(word => 1)
      .reduce(_ + _)

    ssc.start()
    ssc.awaitTermination()

  }
}

Source File: package.scala From sbt-flaky with Apache License 2.0

5 votes

package flaky

import java.io.File
import java.text.SimpleDateFormat
import java.util.Date

import scalatags.Text
import scalatags.Text.all.{a, hr, href, p, _}

package object web {
  def footer(): Text.TypedTag[String] = {
    p(
      hr(),
      p(
        ReportCss.footer,
        "Created with ",
        a(href := "https://github.com/otrebski/sbt-flaky", "sbt-flaky plugin"), br,
        s"Report generated at ${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())}",
        s"Fugue icons are on Creative Common license"
      )
    )
  }

  def indexHtml(reportFile: File, historyFile: Option[File]): String = {
    val history = historyFile match {
      case Some(fileName) => a(href := fileName.getName, "History trends")
      case None =>
        p(
          "History trends report is not created. To enable history check documentation at ",
          a(href := "https://github.com/otrebski/sbt-flaky", "https://github.com/otrebski/sbt-flaky")
        )
    }

    html(
      head(link(rel := "stylesheet", href := "report.css")),
      body(
        h1(ReportCss.title, "Flaky test report"),
        h4(ReportCss.subtitle, a(href := reportFile.getName, "Report for last build")),
        h4(ReportCss.subtitle, history),
        footer()
      )
    ).render
  }

  def anchorTest(test: Test): String = s"${test.clazz}_${test.test}"

  def anchorClass(test: Test): String = test.clazz

  def anchorTestRun(testCase: TestCase): String = testCase.runName

  def singleTestDir(test: Test): String = test.clazz

  def singleTestFileName(test: Test): String = s"${test.test.replaceAll("/", "_")}.html"

  def linkToSingleTest(test: Test): String = singleTestDir(test) + "/" + singleTestFileName(test)

  def linkToSingleTestClass(clazz: String): String = s"flaky-report.html#$clazz"

  def linkToRunNameInSingleTest(test: Test,runName:String) = s"${linkToSingleTest(test)}#$runName"
}

Source File: History.scala From sbt-flaky with Apache License 2.0

5 votes

package flaky.history

import java.io.{File, FileFilter, InputStream}
import java.text.SimpleDateFormat
import java.util.Date

import flaky.{Flaky, FlakyTestReport, Io}
import org.apache.commons.vfs2.VFS

import scala.xml.XML

class History(project: String, historyDir: File, flakyReportDir: File, projectDir: File) {

  private val zipFileFilter = new FileFilter {
    override def accept(pathname: File): Boolean = pathname.getName.endsWith(".zip")
  }

  private def runFiles(historyDir: File): List[File] = historyDir.listFiles(zipFileFilter).toList

  def addCurrentToHistory(): Unit = {
    val timestamp = System.currentTimeMillis()

    val date = new SimpleDateFormat(History.dateFormat).format(new Date(timestamp))
    val gitCommit = Git(projectDir).currentId().toOption
    val historyReportDescription = HistoryReportDescription(timestamp, gitCommit)
    HistoryReportDescription.save(historyReportDescription, new File(flakyReportDir, History.descriptorFile))
    Zip.compressFolder(new File(historyDir, s"$date.zip"), flakyReportDir)
  }

  def removeToOldFromHistory(maxToKeep: Int): Unit = {
    runFiles(historyDir)
      .take(Math.max(runFiles(historyDir).size - maxToKeep, 0))
      .foreach(_.delete())
  }

  def createHistoryReport(): HistoryReport = {

    val historicalRuns: List[HistoricalRun] = runFiles(historyDir)
      .map(History.loadHistory)
    val date = new SimpleDateFormat("HH:mm dd-MM-YYYY").format(new Date())
    HistoryReport(project, date, historicalRuns)
  }


  def processHistory(): HistoryReport = {
    historyDir.mkdirs()
    addCurrentToHistory()
    removeToOldFromHistory(20)
    createHistoryReport()
  }
}


case class HistoryReportDescription(timestamp: Long, gitCommitHash: Option[String])

object HistoryReportDescription {

  def load(in: InputStream): HistoryReportDescription = {
    val descriptorXml = XML.load(in)
    val timestamp = (descriptorXml \ "timestamp").text.trim.toLong
    val gitHash = (descriptorXml \ "gitCommitHash").text.trim
    HistoryReportDescription(timestamp, Some(gitHash))
  }

  def save(historyReportDescription: HistoryReportDescription, file: File): Unit = {
    val xml =
      <HistoryReportDescription>
        <timestamp>
          {historyReportDescription.timestamp}
        </timestamp>
        <gitCommitHash>
          {historyReportDescription.gitCommitHash.getOrElse("")}
        </gitCommitHash>
      </HistoryReportDescription>
    val prettyXml = new scala.xml.PrettyPrinter(80, 2).format(xml)
    Io.writeToFile(file, prettyXml)
  }
}

object History {
  val descriptorFile = "descriptor.xml"
  val dateFormat = "yyyyMMdd-HHmmss"

  def loadHistory: (File) => HistoricalRun = {
    file => {
      val manager = VFS.getManager
      val uri = file.toURI.toString.replace("file:/", "zip:/")
      val fo = manager.resolveFile(uri)
      val report: FlakyTestReport = Flaky.createReportFromHistory(fo)
      val descriptorFile = Option(fo.getChild(History.descriptorFile))
      val dateFromFileName = file.getName.replace(".zip","")
      val hrd = descriptorFile
        .filter(_.exists())
        .map(f => HistoryReportDescription.load(f.getContent.getInputStream))
        .getOrElse(HistoryReportDescription(new SimpleDateFormat(dateFormat).parse(dateFromFileName).getTime, None))
      HistoricalRun(hrd, report)
    }
  }
}

Source File: HistorySpec.scala From sbt-flaky with Apache License 2.0

5 votes

package flaky.history

import java.io.File
import java.text.SimpleDateFormat

import org.scalatest.{Matchers, WordSpec}

class HistorySpec extends WordSpec with Matchers {

  val fileWithDescriptor = "20170516-072750.zip"
  val fileWithoutDescriptor = "20170516-072825.zip"
  val dirWithReports = new File("./src/test/resources/history")


  "HistoryTest" should {

    "loadHistory with descriptor" in {
      val historicalRun: HistoricalRun = History.loadHistory.apply(new File(dirWithReports, fileWithDescriptor))
      historicalRun.historyReportDescription shouldBe HistoryReportDescription(123456L, Some("abcdefg"))
    }
    "loadHistory without descriptor" in {
      //Timestamp can't be hardcoded, because loadHistory tries to parse date from file name
      // with local time zone
      val timestamp = new SimpleDateFormat("yyyyMMdd-HHmmss").parse("20170516-072825").getTime
      val historicalRun: HistoricalRun = History.loadHistory.apply(new File(dirWithReports, fileWithoutDescriptor))
      historicalRun.historyReportDescription shouldBe HistoryReportDescription(timestamp, None)
    }

  }

}

Source File: FieldSequentialValue.scala From schedoscope with Apache License 2.0

5 votes

package org.schedoscope.test

import java.text.SimpleDateFormat
import java.util.Date

import org.schedoscope.dsl.{FieldLike, Structure}


object FieldSequentialValue {

  def get(f: FieldLike[_], i: Int, p: String): Any = {
    if (f.t == manifest[Int])
      i
    else if (f.t == manifest[Long])
      i.toLong
    else if (f.t == manifest[Byte])
      i.toByte
    else if (f.t == manifest[Boolean])
      i % 2 == 0
    else if (f.t == manifest[Double])
      i.toDouble
    else if (f.t == manifest[Float])
      i.toFloat
    else if (f.t == manifest[Date])
      new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date(i * 1000L))
    else if (f.t == manifest[String])
      f.n + "-" + p.format(i)
    else if (classOf[Structure].isAssignableFrom(f.t.runtimeClass)) {
      f.t.runtimeClass.newInstance().asInstanceOf[Structure].fields.map(sf => (sf.n, get(sf, i, p))).toMap
    } else if (f.t.runtimeClass == classOf[List[_]]) {
      List()
    } else if (f.t.runtimeClass == classOf[Map[_, _]])
      Map()
    else
      throw new RuntimeException("Cannot generate random values for: " + f.n + ", type is: " + f.t)
  }
}

Source File: Globals.scala From schedoscope with Apache License 2.0

5 votes

package schedoscope.example.osm

import java.text.SimpleDateFormat
import java.util.Date

import org.schedoscope.Settings
import org.schedoscope.dsl.View
import org.schedoscope.dsl.views.MonthlyParameterization

object Globals {
  def defaultHiveQlParameters(v: View) = {
    val baseParameters = Map(
      "env" -> v.env,
      "workflow_time" -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date),
      "workflow_name" -> v.getClass().getName())

    if (v.isInstanceOf[MonthlyParameterization])
      baseParameters ++ Map(
        "year" -> v.asInstanceOf[MonthlyParameterization].year.v.get,
        "month" -> v.asInstanceOf[MonthlyParameterization].month.v.get)
    else baseParameters
  }

  def defaultPigProperties(v: View) = Map(
    "exec.type" -> "MAPREDUCE",
    "mapred.job.tracker" -> Settings().jobTrackerOrResourceManager,
    "fs.default.name" -> Settings().nameNode,
    "workflow_time" -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date),
    "workflow_name" -> v.getClass().getName())
}

Source File: CustomTelemetryService.scala From finagle-prometheus with MIT License

5 votes

package com.samstarling.prometheusfinagle.examples

import java.text.SimpleDateFormat
import java.util.Calendar

import com.samstarling.prometheusfinagle.metrics.Telemetry
import com.twitter.finagle.Service
import com.twitter.finagle.http.{Request, Response, Status}
import com.twitter.util.Future

class CustomTelemetryService(telemetry: Telemetry)
    extends Service[Request, Response] {

  private val dayOfWeekFormat = new SimpleDateFormat("E")

  private val counter = telemetry.counter("requests_by_day_of_week",
                                          "Help text",
                                          Seq("day_of_week"))

  override def apply(request: Request): Future[Response] = {
    dayOfWeek
    counter.labels(dayOfWeek).inc()
    val rep = Response(request.version, Status.Ok)
    rep.setContentString("Your request was logged!")
    Future(rep)
  }

  private def dayOfWeek: String = {
    dayOfWeekFormat.format(Calendar.getInstance.getTime)
  }
}

Source File: DirectDataInjector.scala From SparkOnKudu with Apache License 2.0

5 votes

package org.kududb.spark.demo.gamer.cdc

import java.text.SimpleDateFormat
import java.util.Random

import org.kududb.client.{PartialRow, Operation, KuduClient}
import org.kududb.spark.demo.gamer.aggregates.GamerDataGenerator


class DirectDataInjector {
  val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy")
  val random = new Random
  def main(args:Array[String]): Unit = {

    if (args.length == 0) {
      println("<kuduMaster> <tableName> <numberOfRecords>")
      return
    }

    val kuduMaster = args(0)
    val tableName = args(1)
    val numberOfRecords = args(2).toInt


    val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build()
    val table = kuduClient.openTable(tableName)
    val session = kuduClient.newSession()

    for (i <- 0 to numberOfRecords) {
      val record = GamerDataGenerator.makeNewGamerRecord(100000)




      val pr = new PartialRow(table.getSchema)
      pr.addString(0, "record.gamerId")
      pr.addString(1, "")
      val scannerRows = kuduClient.newScannerBuilder(table).lowerBound(null).limit(1).build().nextRows()
      val op:Operation = if (scannerRows.hasNext) {
        val oldRow = scannerRows.next()

        val oldRecordUpdateOp = table.newInsert()

        val row = oldRecordUpdateOp.getRow
        row.addString("gamer_id", oldRow.getString("gamer_id"))
        row.addString("eff_to", simpleDateFormat.format(System.currentTimeMillis()))
        row.addString("eff_from", oldRow.getString("eff_from"))
        row.addLong("last_time_played", oldRow.getLong("last_time_played"))
        row.addInt("games_played", oldRow.getInt("games_played"))
        row.addInt("games_won", oldRow.getInt("games_won"))
        row.addInt("oks", oldRow.getInt("oks"))
        row.addInt("deaths", oldRow.getInt("deaths"))
        row.addInt("damage_given", oldRow.getInt("damage_given"))
        row.addInt("damage_taken", oldRow.getInt("damage_taken"))
        row.addInt("max_oks_in_one_game", oldRow.getInt("max_oks_in_one_game"))
        row.addInt("max_deaths_in_one_game", oldRow.getInt("max_deaths_in_one_game"))

        session.apply(oldRecordUpdateOp)
        table.newUpdate()
      } else {
        table.newInsert()
      }

      val row = op.getRow
      row.addString("gamer_id", record.gamerId)
      row.addString("eff_to", "")
      row.addString("eff_from", simpleDateFormat.format(System.currentTimeMillis()))
      row.addLong("last_time_played", record.lastTimePlayed)
      row.addInt("games_played", record.gamesPlayed)
      row.addInt("games_won", record.gamesWon)
      row.addInt("oks", record.oks)
      row.addInt("deaths", record.deaths)
      row.addInt("damage_given", record.damageGiven)
      row.addInt("damage_taken", record.damageTaken)
      row.addInt("max_oks_in_one_game", record.maxOksInOneGame)
      row.addInt("max_deaths_in_one_game", record.maxDeathsInOneGame)

      session.apply(op)
    }
    session.flush()

    kuduClient.close()


  }
}

Source File: DirectDataMultiThreadedInjector.scala From SparkOnKudu with Apache License 2.0

5 votes

package org.kududb.spark.demo.gamer.cdc

import java.text.SimpleDateFormat
import java.util.Random
import java.util.concurrent.atomic.AtomicInteger
import java.util.concurrent.{TimeUnit, Executors}

import org.kududb.client.{Operation, PartialRow, KuduClient}
import org.kududb.spark.demo.gamer.aggregates.GamerDataGenerator

object DirectDataMultiThreadedInjector {
  val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy")
  val random = new Random
  def main(args:Array[String]): Unit = {

    if (args.length == 0) {
      println("<kuduMaster> <tableName> <numberOfRecords> <numberOfThreads>")
      return
    }

    val kuduMaster = args(0)
    val tableName = args(1)
    val numberOfRecords = args(2).toInt
    val executor = Executors.newFixedThreadPool(args(3).toInt)
    val numberOfGamers = args(4).toInt
    val sleepTime = args(5).toInt

    val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build()
    val leftToRun = new AtomicInteger()

    for (i <- 0 to numberOfRecords) {
      leftToRun.incrementAndGet()
      executor.execute(new ApplyNewRecordRunnable(GamerDataGenerator.makeNewGamerRecord(numberOfGamers),
      kuduClient, tableName, leftToRun))
      println("Summited:" + i)

      Thread.sleep(sleepTime)
    }


    val startTime = System.currentTimeMillis()
    while (!executor.awaitTermination(10000, TimeUnit.SECONDS)) {
      val newTime = System.currentTimeMillis()
      println("> Still Waiting: {Time:" + (newTime - startTime) + ", LeftToRun:" + leftToRun + "}" )
    }


    kuduClient.close()


  }
}

Source File: ApplyNewRecordRunnable.scala From SparkOnKudu with Apache License 2.0

5 votes

package org.kududb.spark.demo.gamer.cdc

import java.text.SimpleDateFormat
import java.util.concurrent.atomic.AtomicInteger

import org.kududb.client.{Operation, PartialRow, KuduClient}
import org.kududb.spark.demo.gamer.GamerEvent

class ApplyNewRecordRunnable(val gameEvent: GamerEvent,
                              val kuduClient: KuduClient,
                              val tableName: String,
                              val leftToRun:AtomicInteger) extends Runnable{
  override def run(): Unit = {
    val table = kuduClient.openTable(tableName)
    val session = kuduClient.newSession()
    val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy")

    val record = gameEvent

    val pr = new PartialRow(table.getSchema)
    pr.addString(0, record.gamerId)
    pr.addString(1, "")
    val scannerRows = kuduClient.newScannerBuilder(table).lowerBound(pr).limit(1).build().nextRows()
    val op:Operation = if (scannerRows.hasNext) {
      println(" >> had next")
      val oldRow = scannerRows.next()

      val oldRecordUpdateOp = table.newInsert()

      val row = oldRecordUpdateOp.getRow
      row.addString("gamer_id", oldRow.getString("gamer_id"))
      row.addString("eff_to", simpleDateFormat.format(record.lastTimePlayed))
      row.addString("eff_from", oldRow.getString("eff_from"))
      row.addLong("last_time_played", oldRow.getLong("last_time_played"))
      row.addInt("games_played", oldRow.getInt("games_played"))
      row.addInt("games_won", oldRow.getInt("games_won"))
      row.addInt("oks", oldRow.getInt("oks"))
      row.addInt("deaths", oldRow.getInt("deaths"))
      row.addInt("damage_given", oldRow.getInt("damage_given"))
      row.addInt("damage_taken", oldRow.getInt("damage_taken"))
      row.addInt("max_oks_in_one_game", oldRow.getInt("max_oks_in_one_game"))
      row.addInt("max_deaths_in_one_game", oldRow.getInt("max_deaths_in_one_game"))

      session.apply(oldRecordUpdateOp)
      table.newUpdate()
    } else {
      table.newInsert()
    }

    val row = op.getRow
    row.addString("gamer_id", record.gamerId)
    row.addString("eff_to", "")
    row.addString("eff_from", simpleDateFormat.format(record.lastTimePlayed))
    row.addLong("last_time_played", record.lastTimePlayed)
    row.addInt("games_played", record.gamesPlayed)
    row.addInt("games_won", record.gamesWon)
    row.addInt("oks", record.oks)
    row.addInt("deaths", record.deaths)
    row.addInt("damage_given", record.damageGiven)
    row.addInt("damage_taken", record.damageTaken)
    row.addInt("max_oks_in_one_game", record.maxOksInOneGame)
    row.addInt("max_deaths_in_one_game", record.maxDeathsInOneGame)

    session.apply(op)

    session.flush()
    leftToRun.decrementAndGet()
    println(" >> finished Submit")
  }
}

Source File: SequenceFileSink.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.hadoop

import java.text.SimpleDateFormat

import org.apache.hadoop.fs.Path
import org.apache.hadoop.hdfs.HdfsConfiguration
import org.apache.hadoop.io.SequenceFile
import org.apache.gearpump.Message
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.streaming.hadoop.lib.HadoopUtil
import org.apache.gearpump.streaming.hadoop.lib.format.{DefaultSequenceFormatter, OutputFormatter}
import org.apache.gearpump.streaming.hadoop.lib.rotation.{FileSizeRotation, Rotation}
import org.apache.gearpump.streaming.sink.DataSink
import org.apache.gearpump.streaming.task.{TaskContext, TaskId}

class SequenceFileSink(
    userConfig: UserConfig,
    basePath: String,
    rotation: Rotation = new FileSizeRotation(128 * Math.pow(2, 20).toLong),
    sequenceFormat: OutputFormatter = new DefaultSequenceFormatter)
  extends DataSink{
  @transient private lazy val configuration = new HdfsConfiguration()
  private val dateFormat = new SimpleDateFormat("yyyy_MM_dd-HH-mm-ss")
  private var writer: SequenceFile.Writer = null
  private var taskId: TaskId = null
  private var appName: String = null

  
  override def close(): Unit = {
    closeWriter()
  }

  private def closeWriter(): Unit = {
    Option(writer).foreach { w =>
      w.hflush()
      w.close()
    }
  }

  private def getNextWriter: SequenceFile.Writer = {
    SequenceFile.createWriter(
      configuration,
      SequenceFile.Writer.file(getNextFilePath),
      SequenceFile.Writer.keyClass(sequenceFormat.getKeyClass),
      SequenceFile.Writer.valueClass(sequenceFormat.getValueClass)
    )
  }

  private def getNextFilePath: Path = {
    val base = new Path(basePath, s"$appName-task${taskId.processorId}_${taskId.index}")
    new Path(base, dateFormat.format(new java.util.Date))
  }
}

Source File: ImageProcessing.scala From 006877 with MIT License

5 votes

package aia.routing

import java.text.SimpleDateFormat
import java.util.Date

case class Photo(license: String, speed: Int)

object ImageProcessing {
  val dateFormat = new SimpleDateFormat("ddMMyyyy HH:mm:ss.SSS")
  def getSpeed(image: String): Option[Int] = {
    val attributes = image.split('|')
    if (attributes.size == 3)
      Some(attributes(1).toInt)
    else
      None
  }
  def getTime(image: String): Option[Date] = {
    val attributes = image.split('|')
    if (attributes.size == 3)
      Some(dateFormat.parse(attributes(0)))
    else
      None
  }
  def getLicense(image: String): Option[String] = {
    val attributes = image.split('|')
    if (attributes.size == 3)
      Some(attributes(2))
    else
      None
  }
  def createPhotoString(date: Date, speed: Int): String = {
    createPhotoString(date, speed, " ")
  }

  def createPhotoString(date: Date,
                        speed: Int,
                        license: String): String = {
    "%s|%s|%s".format(dateFormat.format(date), speed, license)
  }
}

Source File: ThriftJsonServlet.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache.livy.thriftserver.ui

import java.text.SimpleDateFormat

import org.apache.livy.server.JsonServlet
import org.apache.livy.thriftserver.LivyThriftServer


class ThriftJsonServlet(val basePath: String) extends JsonServlet {

  private val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z")

  case class SessionInfo(
      sessionId: String,
      livySessionId: String,
      owner: String,
      createdAt: String)

  get("/sessions") {
    val thriftSessions = LivyThriftServer.getInstance.map { server =>
      val sessionManager = server.getSessionManager
      sessionManager.getSessions.map { sessionHandle =>
        val info = sessionManager.getSessionInfo(sessionHandle)
        SessionInfo(sessionHandle.getSessionId.toString,
          sessionManager.livySessionId(sessionHandle).map(_.toString).getOrElse(""),
          info.username,
          df.format(info.creationTime))
      }.toSeq
    }.getOrElse(Seq.empty)
    val from = params.get("from").map(_.toInt).getOrElse(0)
    val size = params.get("size").map(_.toInt).getOrElse(100)

    Map(
      "from" -> from,
      "total" -> thriftSessions.length,
      "sessions" -> thriftSessions.view(from, from + size))
  }
}

Source File: GMMClusteringPersist.scala From Machine-Learning-with-Spark-Second-Edition with MIT License

5 votes

package org.sparksamples.gmm

import java.text.SimpleDateFormat

import org.apache.spark.SparkConf
import org.apache.spark.ml.clustering.{GaussianMixture}
import org.apache.spark.sql.SparkSession


object GMMClusteringPersist {
  val PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/"
  val BASE = "./data/movie_lens_libsvm_2f"

  val time = System.currentTimeMillis()
  val formatter = new SimpleDateFormat("dd_MM_yyyy_hh_mm_ss")

  import java.util.Calendar
  val calendar = Calendar.getInstance()
  calendar.setTimeInMillis(time)
  val date_time = formatter.format(calendar.getTime())

  def main(args: Array[String]): Unit = {

    val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp").
      set("spark.driver.allowMultipleContexts", "true")

    val spark = SparkSession
      .builder()
      .appName("Spark SQL Example")
      .config(spConfig)
      .getOrCreate()

    val datasetUsers = spark.read.format("libsvm").load(
      BASE + "/movie_lens_2f_users_libsvm/part-00000")
    datasetUsers.show(3)

    val gmmUsers = new GaussianMixture().setK(5).setSeed(1L)
    gmmUsers.setMaxIter(20)
    val modelUsers = gmmUsers.fit(datasetUsers)

    val predictedDataSetUsers = modelUsers.transform(datasetUsers)
    val predictionsUsers = predictedDataSetUsers.select("prediction").rdd.map(x=> x(0))
    predictionsUsers.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_users")


    val dataSetItems = spark.read.format("libsvm").load(BASE +
      "/movie_lens_2f_items_libsvm/part-00000")


    val gmmItems = new GaussianMixture().setK(5).setSeed(1L)
    val modelItems = gmmItems.fit(dataSetItems)

    val predictedDataSetItems = modelItems.transform(dataSetItems)
    val predictionsItems = predictedDataSetItems.select("prediction").rdd.map(x=> x(0))
    predictionsItems.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_items")
    spark.stop()
  }
}

Source File: BisectingKMeansPersist.scala From Machine-Learning-with-Spark-Second-Edition with MIT License

5 votes

package org.sparksamples.kmeans

import java.text.SimpleDateFormat

import org.apache.spark.SparkConf
import org.apache.spark.ml.clustering.BisectingKMeans
import org.apache.spark.sql.SparkSession


object BisectingKMeansPersist {
  val PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/"
  val BASE = "./data/movie_lens_libsvm_2f"

  val time = System.currentTimeMillis()
  val formatter = new SimpleDateFormat("dd_MM_yyyy_hh_mm_ss")

  import java.util.Calendar
  val calendar = Calendar.getInstance()
  calendar.setTimeInMillis(time)
  val date_time = formatter.format(calendar.getTime())

  def main(args: Array[String]): Unit = {

    val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp").
      set("spark.driver.allowMultipleContexts", "true")

    val spark = SparkSession
      .builder()
      .appName("Spark SQL Example")
      .config(spConfig)
      .getOrCreate()

    val datasetUsers = spark.read.format("libsvm").load(
      BASE + "/movie_lens_2f_users_xy/part-00000")
    datasetUsers.show(3)
    val bKMeansUsers = new BisectingKMeans()
    bKMeansUsers.setMaxIter(10)
    bKMeansUsers.setMinDivisibleClusterSize(5)

    val modelUsers = bKMeansUsers.fit(datasetUsers)
    val predictedUserClusters = modelUsers.transform(datasetUsers)

    modelUsers.clusterCenters.foreach(println)
    val predictedDataSetUsers = modelUsers.transform(datasetUsers)
    val predictionsUsers = predictedDataSetUsers.select("prediction").rdd.map(x=> x(0))
    predictionsUsers.saveAsTextFile(BASE + "/prediction/" + date_time + "/bkmeans_2f_users")


    val datasetItems = spark.read.format("libsvm").load(BASE +
      "/movie_lens_2f_items_xy/part-00000")
    datasetItems.show(3)

    val kmeansItems = new BisectingKMeans().setK(5).setSeed(1L)
    val modelItems = kmeansItems.fit(datasetItems)


    val predictedDataSetItems = modelItems.transform(datasetItems)
    val predictionsItems = predictedDataSetItems.select("prediction").rdd.map(x=> x(0))
    predictionsItems.saveAsTextFile(BASE + "/prediction/" + date_time + "/bkmeans_2f_items")
    spark.stop()
  }
}

Source File: ScalaApp.scala From Machine-Learning-with-Spark-Second-Edition with MIT License

5 votes

import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{ALS, Rating}

//import org.apache.spark.

    val predictedRating = model.predict(789, 123)
    println(predictedRating)
    val userId = 789
    val K = 10
    val topKRecs = model.recommendProducts(userId, K)
    println(topKRecs.mkString("\n"))

    val movies = sc.textFile(PATH + "/ml-100k/u.item")
    val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap()
    titles(123)
    // res68: String = Frighteners, The (1996)
    val moviesForUser = ratings.keyBy(_.user).lookup(789)
    // moviesForUser: Seq[org.apache.spark.mllib.recommendation.Rating] = WrappedArray(Rating(789,1012,4.0), Rating(789,127,5.0), Rating(789,475,5.0), Rating(789,93,4.0), ...
    // ...
    println(moviesForUser.size)
    moviesForUser.sortBy(-_.rating).take(10).map(rating => (titles(rating.product), rating.rating)).foreach(println)
    topKRecs.map(rating => (titles(rating.product), rating.rating)).foreach(println)
    sc.stop()
    //bw.close()
  }

  class Util {
    def getDate(): String = {
      val today = Calendar.getInstance().getTime()
      // (2) create a date "formatter" (the date format we want)
      val formatter = new SimpleDateFormat("yyyy-MM-dd-hh.mm.ss")
   
      // (3) create a new String using the date format we want
      val folderName = formatter.format(today)
      return folderName
    }
  }

}

Source File: Util.scala From Machine-Learning-with-Spark-Second-Edition with MIT License

5 votes

package com.sparksample


object Util {
  val PATH = "../.."
  val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
  var sc = new SparkContext(spConfig)

  def getMovieData() : RDD[String] = {
    val movie_data = sc.textFile(PATH + "/data/ml-100k/u.item")
    return movie_data
  }
  def getUserData() : RDD[String] = {
    val user_data = sc.textFile(PATH + "/data/ml-100k/u.data")
    return user_data
  }
  def getDate(): String = {
    val today = Calendar.getInstance().getTime()
    // (2) create a date "formatter" (the date format we want)
    val formatter = new SimpleDateFormat("yyyy-MM-dd-hh.mm.ss")

    // (3) create a new String using the date format we want
    val folderName = formatter.format(today)
    return folderName
  }

  def cosineSimilarity(vec1: DoubleMatrix, vec2: DoubleMatrix): Double = {
    vec1.dot(vec2) / (vec1.norm2() * vec2.norm2())
  }

  def avgPrecisionK(actual: Seq[Int], predicted: Seq[Int], k: Int): Double = {
    val predK = predicted.take(k)
    var score = 0.0
    var numHits = 0.0
    for ((p, i) <- predK.zipWithIndex) {
      if (actual.contains(p)) {
        numHits += 1.0
        score += numHits / (i.toDouble + 1.0)
      }
    }
    if (actual.isEmpty) {
      1.0
    } else {
      score / scala.math.min(actual.size, k).toDouble
    }
  }

}

Source File: PMMLModelExport.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.{Date, Locale}

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
}

Source File: JacksonMessageWriter.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, Locale, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
}

Source File: SimpleDateParam.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.{Locale, TimeZone}
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
}

Source File: PlainText.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils.meta

import java.text.SimpleDateFormat
import java.util.Calendar
import java.util.TimeZone

import org.clulab.wm.eidos.utils.EidosException
import org.clulab.timenorm.scate.SimpleInterval
import org.clulab.wm.eidos.context.DCT
import org.clulab.wm.eidos.document.Metadata

class PlainText(text: String,
  titleOpt: Option[String] = None,
  idOpt: Option[String] = None,
  dateOpt: Option[String] = None,
  locationOpt: Option[String] = None
) extends EidosText {
  protected val metadata = {
    val dctOpt: Option[DCT] = {
      dateOpt.map { date =>
        val calendar = try {
          val parsed = PlainText.dateFormat.parse(date)
          val calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"))

          calendar.setTime(parsed)
          calendar
        }
        catch {
          case throwable: Throwable =>
            throw new EidosException(s"""Could not decipher "${date}" as a date""", throwable)
        }

        val simpleInterval = SimpleInterval.of(calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH) + 1, calendar.get(Calendar.DAY_OF_MONTH))
        DCT(simpleInterval, date)
      }
    }

    new Metadata(dctOpt, idOpt, titleOpt, locationOpt)
  }

  def getText: String = text

  def getMetadata: Metadata = metadata
}

object PlainText {
  protected val dateFormat: SimpleDateFormat = {
    val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
    val timeZone = TimeZone.getTimeZone("UTC")

    dateFormat.setTimeZone(timeZone)
    dateFormat
  }
}

Source File: CustomScalarSpec.scala From sangria with Apache License 2.0

5 votes

package sangria.schema

import java.text.SimpleDateFormat
import java.util.Date

import sangria.ast
import sangria.util.Pos
import sangria.util.SimpleGraphQlSupport._
import sangria.validation.ValueCoercionViolation

import scala.util.{Failure, Success, Try}
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

class CustomScalarSpec extends AnyWordSpec with Matchers {
  "Schema" should {
    "allow to define custom scalar types" in {
      val dateFormat = new SimpleDateFormat("yyyy-MM-dd")

      case object DateCoercionViolation extends ValueCoercionViolation("Date value expected")

      def parseDate(s: String) = Try(dateFormat.parse(s)) match {
        case Success(d) => Right(d)
        case Failure(error) => Left(DateCoercionViolation)
      }

      val DateType = ScalarType[Date]("Date",
        description = Some("An example of date scalar type"),
        coerceOutput = (d, _) => dateFormat.format(d),
        coerceUserInput = {
          case s: String => parseDate(s)
          case _ => Left(DateCoercionViolation)
        },
        coerceInput = {
          case ast.StringValue(s, _, _, _, _) => parseDate(s)
          case _ => Left(DateCoercionViolation)
        })

      val DateArg = Argument("dateInput", DateType)

      val QueryType = ObjectType("Query", fields[Unit, Unit](
        Field("foo", DateType,
          arguments = DateArg :: Nil,
          resolve = ctx => {
            val date: Date = ctx.arg(DateArg)
            new Date(date.getTime + 1000 * 60 * 60 * 24 * 5)
          })
      ))

      val schema = Schema(QueryType)

      check(schema, (),
        """
          {
            foo(dateInput: "2015-05-11")
          }
        """,
        Map("data" -> Map("foo" -> "2015-05-16"))
      )

      checkContainsErrors(schema, (),
        """
          {
            foo(dateInput: "2015-05-test")
          }
        """,
        null,
        List("""Expected type 'Date!', found '"2015-05-test"'. Date value expected""" -> List(Pos(3, 28)))
      )
    }
  }
}

Source File: CarbonLoadParams.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.command.management

import java.text.SimpleDateFormat
import java.util

import scala.collection.mutable

import org.apache.hadoop.conf.Configuration
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.command.UpdateTableModel
import org.apache.spark.sql.execution.datasources.LogicalRelation

import org.apache.carbondata.core.indexstore.PartitionSpec
import org.apache.carbondata.core.statusmanager.SegmentStatus
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.events.OperationContext
import org.apache.carbondata.processing.loading.model.CarbonLoadModel


case class CarbonLoadParams(
    sparkSession: SparkSession,
    tableName: String,
    sizeInBytes: Long,
    isOverwriteTable: Boolean,
    carbonLoadModel: CarbonLoadModel,
    hadoopConf: Configuration,
    logicalPartitionRelation: LogicalRelation,
    dateFormat : SimpleDateFormat,
    timeStampFormat : SimpleDateFormat,
    optionsOriginal: Map[String, String],
    finalPartition : Map[String, Option[String]],
    currPartitions: util.List[PartitionSpec],
    partitionStatus : SegmentStatus,
    var dataFrame: Option[DataFrame],
    scanResultRDD : Option[RDD[InternalRow]],
    updateModel: Option[UpdateTableModel],
    operationContext: OperationContext) {
}

Source File: TestUpdateAndDeleteWithLargeData.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.testsuite.iud

import java.text.SimpleDateFormat

import org.apache.spark.sql.test.util.QueryTest
import org.apache.spark.sql.{DataFrame, Row, SaveMode}
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties

class TestUpdateAndDeleteWithLargeData extends QueryTest with BeforeAndAfterAll {
  var df: DataFrame = _

  override def beforeAll {
    dropTable()
    buildTestData()
  }

  private def buildTestData(): Unit = {

    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd")

    // Simulate data and write to table orders
    import sqlContext.implicits._

    val sdf = new SimpleDateFormat("yyyy-MM-dd")
    df = sqlContext.sparkSession.sparkContext.parallelize(1 to 1500000)
      .map(value => (value, new java.sql.Date(sdf.parse("2015-07-" + (value % 10 + 10)).getTime),
        "china", "aaa" + value, "phone" + 555 * value, "ASD" + (60000 + value), 14999 + value,
        "ordersTable" + value))
      .toDF("o_id", "o_date", "o_country", "o_name",
        "o_phonetype", "o_serialname", "o_salary", "o_comment")
    createTable()

  }

  private def createTable(): Unit = {
    df.write
      .format("carbondata")
      .option("tableName", "orders")
      .option("tempCSV", "true")
      .option("compress", "true")
      .mode(SaveMode.Overwrite)
      .save()
  }

  private def dropTable() = {
    sql("DROP TABLE IF EXISTS orders")

  }

  test("test the update and delete delete functionality for large data") {

    sql(
      """
            update ORDERS set (o_comment) = ('yyy')""").show()
    checkAnswer(sql(
      """select o_comment from orders limit 2 """), Seq(Row("yyy"), Row("yyy")))

    sql("delete from orders where exists (select 1 from orders)")

    checkAnswer(sql(
      """
           SELECT count(*) FROM orders
           """), Row(0))
  }

}

Source File: RowStreamParserImp.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.streaming.parser

import java.text.SimpleDateFormat
import java.util

import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.processing.loading.ComplexDelimitersEnum
import org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants


class RowStreamParserImp extends CarbonStreamParser {

  var configuration: Configuration = null
  var isVarcharTypeMapping: Array[Boolean] = null
  var structType: StructType = null
  var encoder: ExpressionEncoder[Row] = null

  var timeStampFormat: SimpleDateFormat = null
  var dateFormat: SimpleDateFormat = null
  var complexDelimiters: util.ArrayList[String] = new util.ArrayList[String]()
  var serializationNullFormat: String = null

  override def initialize(configuration: Configuration,
      structType: StructType, isVarcharTypeMapping: Array[Boolean]): Unit = {
    this.configuration = configuration
    this.structType = structType
    this.encoder = RowEncoder.apply(this.structType).resolveAndBind()
    this.isVarcharTypeMapping = isVarcharTypeMapping

    this.timeStampFormat = new SimpleDateFormat(
      this.configuration.get(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT))
    this.dateFormat = new SimpleDateFormat(
      this.configuration.get(CarbonCommonConstants.CARBON_DATE_FORMAT))
    this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_1"))
    this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_2"))
    this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_3"))
    this.complexDelimiters.add(ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value())
    this.serializationNullFormat =
      this.configuration.get(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT)
  }

  override def parserRow(value: InternalRow): Array[Object] = {
    this.encoder.fromRow(value).toSeq.zipWithIndex.map { case (x, i) =>
      FieldConverter.objectToString(
        x, serializationNullFormat, complexDelimiters,
        timeStampFormat, dateFormat,
        isVarcharType = i < this.isVarcharTypeMapping.length && this.isVarcharTypeMapping(i),
        binaryCodec = null)
    } }.toArray

  override def close(): Unit = {
  }

}

Source File: Commons.scala From spark-structured-streaming with MIT License

5 votes

package com.kafkaToSparkToCass


import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}

object Commons {

  case class UserEvent(user_id: String, time: Timestamp, event: String)
      extends Serializable

  def getTimeStamp(timeStr: String): Timestamp = {
    val dateFormat1: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val dateFormat2: DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")

    val date: Option[Timestamp] = {
      try {
        Some(new Timestamp(dateFormat1.parse(timeStr).getTime))
      } catch {
        case e: java.text.ParseException =>
          Some(new Timestamp(dateFormat2.parse(timeStr).getTime))
      }
    }
    date.getOrElse(Timestamp.valueOf(timeStr))
  }

}

Source File: DateUtils.scala From common4s with Apache License 2.0

5 votes

package commons.mapper.utils

import java.text.{ ParseException, ParsePosition, SimpleDateFormat }
import java.util.Date


	def parseDateWithLeniency(str : String, parsePatterns : Array[String], lenient : Boolean) : Date = {
		if (str == null || parsePatterns == null) {
			throw new IllegalArgumentException("Date and Patterns must not be null");
		}

		val parser = new SimpleDateFormat();
		parser.setLenient(lenient);
		val pos = new ParsePosition(0);

		for (parsePattern <- parsePatterns) {

			var pattern = parsePattern;

			// LANG-530 - need to make sure 'ZZ' output doesn't get passed to SimpleDateFormat
			if (parsePattern.endsWith("ZZ")) {
				pattern = pattern.substring(0, pattern.length() - 1);
			}

			parser.applyPattern(pattern);
			pos.setIndex(0);

			var str2 = str;
			// LANG-530 - need to make sure 'ZZ' output doesn't hit SimpleDateFormat as it will ParseException
			if (parsePattern.endsWith("ZZ")) {
				str2 = str.replaceAll("([-+][0-9][0-9]):([0-9][0-9])$", "$1$2");
			}

			val date = parser.parse(str2, pos);
			if (date != null && pos.getIndex() == str2.length()) {
				return date;
			}
		}

		throw new ParseException("Unable to parse the date: " + str, -1);
	}
}

Source File: JsonUtil.scala From ionroller with MIT License

5 votes

package ionroller

import java.text.SimpleDateFormat

import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
import play.api.libs.json.Json

object JsonUtil {

  object Implicits {
    implicit class Unmarshallable(unMarshallMe: String) {
      def toMap: Map[String, Any] = JsonUtil.toMap(unMarshallMe)

      def toMapOf[V]()(implicit m: Manifest[V]): Map[String, V] = JsonUtil.toMap[V](unMarshallMe)

      def fromJson[T]()(implicit m: Manifest[T]): T = JsonUtil.fromJson[T](unMarshallMe)
    }

    implicit class Marshallable[T](marshallMe: T) {
      def toJson: String = JsonUtil.toJson(marshallMe)

      def toJsonValue = Json.parse(JsonUtil.toJson(marshallMe))
    }
  }

  val mapper = new ObjectMapper() with ScalaObjectMapper
  mapper.registerModule(DefaultScalaModule)
  mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
  mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"))

  def toJson(value: Map[Symbol, Any]): String = {
    toJson(value map { case (k, v) => k.name -> v })
  }

  def toJson(value: Any): String = {
    mapper.writeValueAsString(value)
  }

  def toMap[V](json: String)(implicit m: Manifest[V]) = fromJson[Map[String, V]](json)

  def fromJson[T](json: String)(implicit m: Manifest[T]): T = {
    mapper.readValue[T](json)
  }
}

Source File: DateUtil.scala From real-time-stream-processing-engine with Apache License 2.0

5 votes

package com.knoldus.streaming.util

import java.text.SimpleDateFormat
import java.util.Date


object DateUtil {


  private val dateFormats = List(
    "yyyyMMdd'T'HHmmss.SSSZ",
    "EEE, dd MMM yyyy HH:mm:ss Z",
    "yyyy-MM-dd HH:mm:ss",
    "EEE MMM dd HH:mm:ss Z yyyy",
    "MMM dd, yyyy, HH:mm a",
    "MMM dd, yyyy HH:mm a",
    "yyyy-MM-dd'T'HH:mm:ss",
    "dd MMM yyyy HH:mm:ss:S Z",
    "E MMM dd HH:mm:ss z yyyy",
    "dd MMM yyyy HH:mm:ss:SSS",
    "dd MMM yyyy H:mm:ss:SSS",
    "MM-dd-yyyy HH:mm:ss:SSS",
    "MM/dd/yyyy HH:mm:ss:SSS",
    "dd/MM/yyyy HH:mm:ss:SSS",
    "dd-MM-yyyy HH:mm:ss:SSS",
    "MMM/dd/yyyy HH:mm:ss:SSS",
    "MMM-dd-yyyy HH:mm:ss:SSS",
    "dd-MMM-yyyy HH:mm:ss:SSS",
    "MM-dd-yyyy H:mm:ss:SSS",
    "MM/dd/yyyy H:mm:ss:SSS",
    "dd/MM/yyyy H:mm:ss:SSS",
    "dd-MM-yyyy H:mm:ss:SSS",
    "MMM/dd/yyyy H:mm:ss:SSS",
    "MMM-dd-yyyy H:mm:ss:SSS",
    "dd-MMM-yyyy H:mm:ss:SSS",
    "MM-dd-yyyy HH:mm:ss",
    "MM/dd/yyyy HH:mm:ss",
    "dd/MM/yyyy HH:mm:ss",
    "dd-MM-yyyy HH:mm:ss",
    "MMM/dd/yyyy HH:mm:ss",
    "MMM-dd-yyyy HH:mm:ss",
    "dd-MMM-yyyy HH:mm:ss",
    "MM-dd-yyyy H:mm:ss",
    "MM/dd/yyyy H:mm:ss",
    "dd/MM/yyyy H:mm:ss",
    "dd-MM-yyyy H:mm:ss",
    "MMM/dd/yyyy H:mm:ss",
    "MMM-dd-yyyy H:mm:ss",
    "dd-MMM-yyyy H:mm:ss",
    "yyyy-MM-dd",
    "MM-dd-yyyy",
    "MM/dd/yyyy",
    "dd/MM/yyyy",
    "dd-MM-yyyy",
    "MMM/dd/yyyy",
    "MMM-dd-yyyy",
    "dd-MMM-yyyy")


  private val esDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ")

  def getESDateFormat(dateString: String): String = {
    def getDate(dateFormats: Seq[String], dateString: String): String =
      try {
        val dateFormat = new SimpleDateFormat(dateFormats.head)
        val date = dateFormat.parse(dateString)
        esDateFormat.format(date)
      } catch {
        case _ if (dateFormats.size > 1) => getDate(dateFormats.tail, dateString)
        case _: Exception => esDateFormat.format(new Date())
      }
    getDate(dateFormats, dateString)
  }

}

Source File: httpserverplugin_staticfile.scala From scalabpe with Apache License 2.0

5 votes

package scalabpe.plugin.http

import java.io.File
import java.net.URLEncoder
import java.text.SimpleDateFormat
import java.util.Calendar
import java.util.GregorianCalendar
import java.util.Locale
import java.util.TimeZone

import scala.collection.mutable.HashMap

import org.jboss.netty.handler.codec.http.HttpHeaders

import scalabpe.core.HashMapStringAny

class StaticFilePlugin extends HttpServerPlugin with HttpServerStaticFilePlugin {

    val ETAG_TAG = "etag"
    val EXPIRE_TAG = "expire"
    val ATTACHMENT = "attachment"
    val FILENAME = "filename"

    val HTTP_DATE_FORMAT = "EEE, dd MMM yyyy HH:mm:ss zzz";
    val HTTP_DATE_GMT_TIMEZONE = "GMT";

    val df_tl = new ThreadLocal[SimpleDateFormat]() {
        override def initialValue(): SimpleDateFormat = {
            val df = new SimpleDateFormat(HTTP_DATE_FORMAT, Locale.US)
            df.setTimeZone(TimeZone.getTimeZone(HTTP_DATE_GMT_TIMEZONE));
            df
        }
    }

    def generateStaticFile(serviceId: Int, msgId: Int, errorCode: Int, errorMessage: String, body: HashMapStringAny, pluginParam: String, headers: HashMap[String, String]): String = {

        if (body.ns(FILENAME) == "") {
            return null
        }

        val filename = body.ns(FILENAME)
        if (!new File(filename).exists()) {
            return null
        }
        if (body.ns(ETAG_TAG) != "") {
            headers.put("ETag", body.ns(ETAG_TAG))
        }

        if (body.ns(EXPIRE_TAG) != "") {
            body.i(EXPIRE_TAG) match {
                case 0 | -1 =>
                    headers.put(HttpHeaders.Names.CACHE_CONTROL, "no-cache")
                case n => // seconds
                    val time = new GregorianCalendar();
                    time.add(Calendar.SECOND, n);
                    headers.put(HttpHeaders.Names.EXPIRES, df_tl.get.format(time.getTime()));
                    headers.put(HttpHeaders.Names.CACHE_CONTROL, "max-age=" + n);
            }
        }

        val ext = parseExt(filename)
        if (ext != "")
            body.put("__file_ext__", ext)

        if (body.ns(ATTACHMENT, "1") == "1") {
            val filename = body.ns(FILENAME)
            val v = "attachment; filename=\"%s\"".format(URLEncoder.encode(parseFilename(filename), "UTF-8"))
            headers.put("Content-Disposition", v)
        }

        filename
    }

    def parseFilename(name: String): String = {
        val p = name.lastIndexOf("/")
        if (p < 0) return name
        name.substring(p + 1)
    }
    def parseExt(name: String): String = {
        val p = name.lastIndexOf(".")
        if (p < 0) return ""
        name.substring(p + 1).toLowerCase()
    }

}

Source File: IlluminaBasecallsToSam.scala From dagr with MIT License

5 votes

package dagr.tasks.picard

import java.text.SimpleDateFormat

import dagr.core.execsystem._
import dagr.core.tasksystem.{JvmRanOutOfMemory, VariableResources}
import dagr.tasks.DagrDef.{DirPath, FilePath}
import htsjdk.samtools.util.Iso8601Date
import picard.util.IlluminaUtil.IlluminaAdapterPair

import scala.collection.mutable.ListBuffer

class IlluminaBasecallsToSam(basecallsDir: DirPath,
                             lane: Int,
                             runBarcode: String,
                             readStructure: String,
                             libraryParamsFile: FilePath,
                             runDate: Option[Iso8601Date] = None,
                             sequencingCenter: Option[String] = None,
                             includeNonPfReads: Boolean = false,
                             ignoreUnexpectedBarcodes: Boolean = false,
                             minThreads: Int = 4,
                             maxThreads: Int = 16,
                             adapterPairs: Seq[IlluminaAdapterPair] = Seq(
                               IlluminaAdapterPair.INDEXED,
                               IlluminaAdapterPair.DUAL_INDEXED,
                               IlluminaAdapterPair.NEXTERA_V2,
                               IlluminaAdapterPair.FLUIDIGM
                             ),
                             barcodesDir: Option[DirPath] = None,
                             maxReadsInRamPerTile: Option[Int] = Some(500000),
                             firstTile: Option[Int] = None,
                             tileLimit: Option[Int] = None,
                             tmpDir: Option[DirPath] = None
                            ) extends PicardTask with VariableResources with JvmRanOutOfMemory {

  protected val byMemoryPerThread: Memory = Memory("1GB")
  protected var memoryPerThread: Memory = Memory("2GB")

  
  override def pickResources(resources: ResourceSet): Option[ResourceSet] = {
    Range.inclusive(start=maxThreads, end=minThreads, step= -1)
      .flatMap { cores =>
        resources.subset(Cores(cores), Memory(cores * memoryPerThread.value))
      }.headOption
  }

  override protected def addPicardArgs(buffer: ListBuffer[Any]): Unit = {
    buffer += "BASECALLS_DIR=" + basecallsDir
    buffer += "LANE=" + lane
    buffer += "RUN_BARCODE=" + runBarcode
    barcodesDir.foreach(dir => buffer += "BARCODES_DIR=" + dir)
    runDate.foreach(date => buffer += "RUN_START_DATE=" + new SimpleDateFormat("yyyy/MM/dd").format(date))
    buffer += "SEQUENCING_CENTER=" + sequencingCenter.getOrElse("null")
    buffer += "NUM_PROCESSORS=" + resources.cores.toInt
    buffer += "READ_STRUCTURE=" + readStructure.toString
    buffer += "LIBRARY_PARAMS=" + libraryParamsFile
    buffer += "INCLUDE_NON_PF_READS=" + includeNonPfReads
    if (ignoreUnexpectedBarcodes) buffer += "IGNORE_UNEXPECTED_BARCODES=true"
    if (adapterPairs.isEmpty) buffer += "ADAPTERS_TO_CHECK=null"
    else adapterPairs.foreach(buffer += "ADAPTERS_TO_CHECK=" + _)
    maxReadsInRamPerTile.foreach(n => buffer += "MAX_READS_IN_RAM_PER_TILE=" + n)
    firstTile.foreach(buffer += "FIRST_TILE=" + _) // If set, this is the first tile to be processed (used for debugging).
    tileLimit.foreach(buffer += "TILE_LIMIT=" + _) // If set, process no more than this many tiles (used for debugging).
    tmpDir.foreach(tmp => buffer += "TMP_DIR=" + tmp)
  }
}

Source File: StringToTimestampParser.scala From bandar-log with Apache License 2.0

5 votes

package com.aol.one.dwh.infra.parser

import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import com.aol.one.dwh.infra.util.{ExceptionPrinter, LogTrait}

import scala.util.control.NonFatal
import scala.util.{Failure, Try}


object StringToTimestampParser extends LogTrait with ExceptionPrinter {

  def parse(value: String, format: String): Option[Long] = {

    Try {
      val dateFormat: DateFormat = new SimpleDateFormat(format)
      dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
      dateFormat.parse(value).getTime
    }.recoverWith {
        case NonFatal(e) =>
          logger.error(s"Could not parse value:[$value] using format:[$format]. Catching exception {}", e.getStringStackTrace)
          Failure(e)
    }.toOption
  }
}

Source File: Bench.scala From akka-nbench with Apache License 2.0

5 votes

package bench

import akka.actor._
import akka.pattern.ask
import akka.util.Timeout

import scala.concurrent.duration._
import scala.reflect.runtime.universe._
import scala.concurrent.Await

import com.typesafe.config._
import net.ceedubs.ficus.Ficus._

import java.util.Properties
import java.nio.file._
import java.util.Date
import java.text.SimpleDateFormat
import java.util.Date

import Tapper._

object Bench extends App {

  def prepareOutputDirs(): String = {
    val csvDateTimeDir = FileSystems.getDefault().getPath(
      "tests/" + new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()))
    Files.createDirectories(csvDateTimeDir)
    val csvSymlink = FileSystems.getDefault().getPath("tests/current")
    if(Files.isSymbolicLink(csvSymlink)){
      Files.delete(csvSymlink)
    } else if (Files.exists(csvSymlink)) {
      throw new NotASymbolicLinkException(s"test/current is not a symbolic link. Path: $csvSymlink")
    }
    Files.createSymbolicLink(csvSymlink, csvDateTimeDir.toAbsolutePath)
    csvDateTimeDir.toAbsolutePath.toString
  }

  def parseOptions(): String = {
     val usage = """
         Usage: activator -mem 4096 "run-main bench.Bench scenario_name"
     """
    if (args.length != 1) println(usage)
    return args(0)
  }

  val scenario = parseOptions
  val config = ConfigFactory.load().getConfig(scenario)
  val duration = config.getInt("duration")
  val concurrent = config.getInt("concurrent")
  val csvDateTimeDir = prepareOutputDirs

  val system = ActorSystem("bench")
  val actorProps = Props(classOf[StatsCollector], csvDateTimeDir, config)
  val statsCollector = system.actorOf(actorProps, name = "statscollector")

  val operationsWithRatio: Map[String, Int] = config.as[Map[String, Int]]("operations")
  val numer = operationsWithRatio.values.sum
  if (concurrent < numer){
    val msg = s"concurrent($concurrent) must greater than sum of operations ratio($numer)"
    System.err.println(msg)
    throw new ApplicationConfigException(msg)
  }
  val operations = for((key, value) <- operationsWithRatio) yield {
    List.range(0, concurrent * operationsWithRatio(key) / numer).map(_ => key)
  }

  implicit val timeout = Timeout(duration * 2, SECONDS)
  var driverClz = Class.forName(config.getString("driver"))
  val drivers = operations.flatten.zipWithIndex.map{ case (operation, i) =>
    system.actorOf(Props(driverClz, operation, statsCollector, config).withDispatcher("my-dispatcher"), name = s"driver_$i")
  }

  drivers.par.map(actor => actor ? Ready()).foreach{ f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  val startAt = new Date()
  val doUntil = new Date(startAt.getTime + duration * 1000)
  drivers.par.map(actor => actor ? Go(doUntil)).foreach { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  (statsCollector ? TearDown()).tap { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  drivers.par.map(actor => actor ? TearDown()).foreach { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  (drivers.head ? TearDown()).tap { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  system.awaitTermination()
}

Source File: Utils.scala From graphcool-framework with Apache License 2.0

5 votes

package cool.graph.rabbit

import java.text.SimpleDateFormat
import java.util.{Date, UUID}
import java.util.concurrent.ThreadFactory
import java.util.concurrent.atomic.AtomicLong

object Utils {
  def timestamp: String = {
    val formatter = new SimpleDateFormat("HH:mm:ss.SSS-dd.MM.yyyy")
    val now       = new Date()
    formatter.format(now)
  }

  def timestampWithRandom: String = timestamp + "-" + UUID.randomUUID()

  def newNamedThreadFactory(name: String): ThreadFactory = new ThreadFactory {
    val count = new AtomicLong(0)

    override def newThread(runnable: Runnable): Thread = {
      val thread = new Thread(runnable)
      thread.setName(s"$name-" + count.getAndIncrement)
      thread.setDaemon(true)
      thread
    }
  }
}

Source File: LDBCRouter.scala From Raphtory with Apache License 2.0

5 votes

package com.raphtory.examples.ldbc.routers

import java.text.SimpleDateFormat
import java.util.Date

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.EdgeAdd
import com.raphtory.core.model.communication.EdgeDelete
import com.raphtory.core.model.communication.Type
import com.raphtory.core.model.communication.VertexAdd
import com.raphtory.core.model.communication.VertexDelete
import com.raphtory.examples.random.actors.RandomSpout

class LDBCRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {
  override protected def parseTuple(value: Any): Unit = {

    val fileLine           = value.asInstanceOf[String].split("\\|")
    val date               = fileLine(1).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
    val date2              = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
    val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime()
    val deletionDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date2).getTime()
    val vertexDeletion = sys.env.getOrElse("LDBC_VERTEX_DELETION", "false").trim.toBoolean
    val edgeDeletion = sys.env.getOrElse("LDBC_EDGE_DELETION", "false").trim.toBoolean
    fileLine(0) match {
      case "person" =>
        sendGraphUpdate(VertexAdd(creationDate, assignID("person" + fileLine(3)), Type("person")))
        //sendGraphUpdate(VertexAdd(creationDate, fileLine(3).toLong,Type("person")))
        if(vertexDeletion)
          sendGraphUpdate(VertexDelete(deletionDate, assignID("person" + fileLine(3))))
      case "person_knows_person" =>
        //sendGraphUpdate(EdgeAdd(creationDate, fileLine(3).toLong,fileLine(4).toLong,Type("person_knows_person")))
        sendGraphUpdate(
                EdgeAdd(
                        creationDate,
                        assignID("person" + fileLine(3)),
                        assignID("person" + fileLine(4)),
                        Type("person_knows_person")
                )
        )
        if(edgeDeletion)
          sendGraphUpdate(EdgeDelete(deletionDate, assignID("person"+fileLine(3)),assignID("person"+fileLine(4))))
    }
  }
}
//2012-11-01T09:28:01.185+00:00|2019-07-22T11:24:24.362+00:00|35184372093644|Jose|Garcia|female|1988-05-20|111.68.47.44|Firefox

Source File: LDBCOldRouter.scala From Raphtory with Apache License 2.0

5 votes

package com.raphtory.examples.ldbc.routers

import java.text.SimpleDateFormat
import java.util.Date

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.EdgeAdd
import com.raphtory.core.model.communication.EdgeDelete
import com.raphtory.core.model.communication.Type
import com.raphtory.core.model.communication.VertexAdd
import com.raphtory.core.model.communication.VertexDelete

class LDBCOldRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {
  override protected def parseTuple(value: Any): Unit = {

    val fileLine = value.asInstanceOf[String].split("\\|")

    //val deletionDate:Long  = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date2).getTime()
    fileLine(0) match {
      case "person" =>
        val date = fileLine(6).substring(0, 10) + fileLine(5).substring(11, 23); //extract the day of the event
        //val date2 = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
        val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime()
        sendGraphUpdate(VertexAdd(creationDate, assignID("person" + fileLine(1)), Type("person")))
      //sendGraphUpdate(VertexAdd(creationDate, fileLine(3).toLong,Type("person")))
      //    sendGraphUpdate(VertexDelete(deletionDate, assignID("person"+fileLine(3))))
      case "person_knows_person" =>
        val date = fileLine(3).substring(0, 10) + fileLine(3).substring(11, 23); //extract the day of the event
        //val date2 = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
        val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime()
        //sendGraphUpdate(EdgeAdd(creationDate, fileLine(3).toLong,fileLine(4).toLong,Type("person_knows_person")))
        sendGraphUpdate(
                EdgeAdd(
                        creationDate,
                        assignID("person" + fileLine(1)),
                        assignID("person" + fileLine(2)),
                        Type("person_knows_person")
                )
        )
      //sendGraphUpdate(EdgeDelete(deletionDate, assignID("person"+fileLine(3)),assignID("person"+fileLine(4))))
    }
  }
}
//2012-11-01T09:28:01.185+00:00|2019-07-22T11:24:24.362+00:00|35184372093644|Jose|Garcia|female|1988-05-20|111.68.47.44|Firefox

Source File: rumourInteractRouter.scala From Raphtory with Apache License 2.0

5 votes

package com.raphtory.examples.twitterRumour

import java.text.SimpleDateFormat

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication._
import spray.json._

import scala.io.Source

class rumourInteractRouter(override val routerId: Int,override val workerID:Int, val initialManagerCount: Int) extends RouterWorker {

  override protected def parseTuple(cmd: Any): Unit = {
    //println("im at router top...")
    val List(r_status, tweet) = cmd.asInstanceOf[String].split("__").toList
    val json                  = Source.fromFile(tweet)
    for (line <- json.getLines) {
      // println("reading json"+cmd)
      var user = line.parseJson.asJsObject.fields("user").asJsObject
      val post = line.parseJson.asJsObject

      val replyTime = post.fields("created_at").toString.toString.split("\"")(1)
      val source    = user.fields("id").toString
      //      if (source.toLong <0){println("this is converting worng.."+source)
      //      sys.exit()}
      val dist = post.fields("in_reply_to_user_id").toString

      if (dist != "null")
        sendGraphUpdate(
                EdgeAddWithProperties(
                        getTwitterDate(replyTime),
                        source.toLong,
                        dist.toLong,
                        properties = Properties(ImmutableProperty("rumourStatus", r_status))
                )
        )
      else
        sendGraphUpdate(
                VertexAddWithProperties(
                        getTwitterDate(replyTime),
                        source.toLong,
                        properties = Properties(ImmutableProperty("rumourStatus", r_status))
                )
        )
    }
  }

  def getTwitterDate(date: String): Long = {
    // println(">>> converting time...")
    val twitter = "EEE MMM dd HH:mm:ss ZZZZZ yyyy"
    val sf      = new SimpleDateFormat(twitter)
    //println(date)
    try
    //println("converted time ///"+t)
    return sf.parse(date).getTime()
    catch {
      case e: Throwable =>
        println("-----time not properly converting" + date)
        sys.exit()
        return 0
    }

  }
}

Source File: ChainalysisABRouter.scala From Raphtory with Apache License 2.0

5 votes

package com.raphtory.examples.blockchain.routers

import java.text.SimpleDateFormat

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.Type
import com.raphtory.core.model.communication._

class ChainalysisABRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {

  def parseTuple(record: Any): Unit = {
    val dp = formatLine(record.asInstanceOf[String].split(",").map(_.trim))
    val transactionTime = dp.time
    val srcClusterId = assignID(dp.srcCluster.toString)
    val dstClusterId = assignID(dp.dstCluster.toString)
    val transactionId = assignID(dp.txid.toString)
    val btcAmount = dp.amount
    val usdAmount = dp.usd

    sendGraphUpdate(VertexAdd(transactionTime, srcClusterId, Type("Cluster")))
    sendGraphUpdate(VertexAdd(transactionTime, dstClusterId, Type("Cluster")))
    sendGraphUpdate(VertexAdd(transactionTime, transactionId, Type("Transaction")))

    sendGraphUpdate(
      EdgeAddWithProperties(transactionTime,
        srcClusterId,
        transactionId,
        Properties(DoubleProperty("BitCoin", btcAmount), DoubleProperty("USD",usdAmount)),
        Type("Incoming Payment")
      )
    )
    sendGraphUpdate(
      EdgeAddWithProperties(transactionTime,
        transactionId,
        dstClusterId,
        Properties(DoubleProperty("BitCoin", btcAmount), DoubleProperty("USD",usdAmount)),
        Type("Outgoing Payment")
      )
    )

  }

  //converts the line into a case class which has all of the data via the correct name and type
  def formatLine(line: Array[String]): Datapoint =
    Datapoint(
      line(1).toDouble / 100000000, 			//Amount of transaction in BTC
      line(2).toLong, 			            //ID of destination cluster
      line(3).toLong, 			            //ID of source cluster
      line(4).toLong * 1000,            //Time of transaction in seconds (milli in Raph)
      line(5).toLong, 			            //ID of transaction, can be similar for many records
      line(6).toDouble / 100000 			    //Amount of transaction in USD

    )

  def longCheck(data: String): Option[Long] = if (data equals "") None else Some(data.toLong)


  case class Datapoint(
                        amount: Double,       //Amount of transaction in Satoshi
                        dstCluster: Long,   //ID of destination cluster
                        srcCluster: Long,   //ID of source cluster
                        time: Long,         //Time of transaction in seconds
                        txid: Long,          //ID of transaction, can be similar for many records
                        usd: Double          //Amount of transaction in USD
                      )
}

Source File: OutDegree.scala From Raphtory with Apache License 2.0

5 votes

package com.raphtory.examples.random.depricated

import java.text.SimpleDateFormat
import java.util.Date

import com.raphtory.core.analysis.API.Analyser
import com.raphtory.core.utils.Utils

import scala.collection.mutable.ArrayBuffer

class OutDegree(args:Array[String]) extends Analyser(args){

  override def analyse(): Unit = {
    var results = ArrayBuffer[Int]()
    proxy.getVerticesSet().foreach { v =>
      val vertex     = proxy.getVertex(v._2)
      val totalEdges = vertex.getOutgoingNeighbors.size
      //  println("Total edges for V "+v+" "+vertex.getOutgoingNeighbors + " "+vertex.getIngoingNeighbors )
      results += totalEdges
    }
    // println("THIS IS HOW RESULTS LOOK: "+ results.groupBy(identity).mapValues(_.size))
    results.groupBy(identity).mapValues(_.size).toList
  }
  override def setup(): Unit = {}

  override def defineMaxSteps(): Int = 1

  override def processResults(results: ArrayBuffer[Any], timeStamp: Long, viewCompleteTime: Long): Unit = {}

  override def processViewResults(results: ArrayBuffer[Any], timestamp: Long, viewCompleteTime: Long): Unit = {
    val output_file = System.getenv().getOrDefault("GAB_PROJECT_OUTPUT", "/app/defout.csv").trim

    val inputFormat  = new SimpleDateFormat("E MMM dd HH:mm:ss z yyyy")
    val outputFormat = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
    var finalResults = ArrayBuffer[(Int, Int)]()

    for (kv <- results)
      // println("KV RESULTS: " + kv)
      for (pair <- kv.asInstanceOf[List[(Int, Int)]])
        finalResults += pair

    val currentDate   = new Date(timestamp)
    val formattedDate = outputFormat.format(inputFormat.parse(currentDate.toString))
    var degrees       = finalResults.groupBy(_._1).mapValues(seq => seq.map(_._2).reduce(_ + _)).toList.sortBy(_._1) //.foreach(println)
    for ((degree, total) <- degrees) {
      var text = formattedDate + "," + degree + "," + total
      Utils.writeLines(output_file, text, "Date,OutDegree,Total")

    }
  }

  override def processWindowResults(
      results: ArrayBuffer[Any],
      timestamp: Long,
      windowSize: Long,
      viewCompleteTime: Long
  ): Unit = ???

  override def returnResults(): Any = ???
}

Source File: CitationRouter.scala From Raphtory with Apache License 2.0

5 votes

package com.raphtory.examples.citationNetwork

import java.text.SimpleDateFormat

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication._

class CitationRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {

  def parseTuple(record: Any): Unit = {

    val fileLine = record.asInstanceOf[String].split(",").map(_.trim)
    //extract the values from the data source in the form of:
    // 0-sourceNode,1-targetNode,2-sourceCitedTargetOn,3-targetCreationDate,4-targetLastCitedOn
    val sourceNode          = fileLine(0).toInt
    val targetNode          = fileLine(1).toInt
    val sourceCitedTargetOn = dateToUnixTime(timestamp = fileLine(2))
    val targetCreationDate  = dateToUnixTime(timestamp = fileLine(3))
    val targetLastCitedOn   = dateToUnixTime(timestamp = fileLine(4))

    //create sourceNode
    sendGraphUpdate(VertexAdd(sourceCitedTargetOn, sourceNode))
    //create destinationNode
    sendGraphUpdate(VertexAdd(targetCreationDate, targetNode))
    //create edge
    sendGraphUpdate(EdgeAdd(sourceCitedTargetOn, sourceNode, targetNode))

    if (sourceCitedTargetOn == targetLastCitedOn)
      sendGraphUpdate(EdgeDelete(targetLastCitedOn, sourceNode, targetNode))

  }

  def dateToUnixTime(timestamp: => String): Long = {
    //if(timestamp == null) return null;
    println(timestamp)
    val sdf = new SimpleDateFormat("dd/MM/yyyy")
    println(sdf)
    val dt = sdf.parse(timestamp)
    println(dt)
    val epoch = dt.getTime()
    println(epoch)
    epoch / 1000

  }
}

Source File: S3MigrationHandlerBase.scala From flyway-awslambda with MIT License

5 votes

package crossroad0201.aws.flywaylambda

import java.text.SimpleDateFormat
import java.util.Date

import com.amazonaws.services.lambda.runtime.Context
import com.amazonaws.services.s3.AmazonS3
import crossroad0201.aws.flywaylambda.deploy.{FlywayDeployment, S3SourceFlywayDeployer}
import crossroad0201.aws.flywaylambda.migration.{FlywayMigrator, MigrationInfo, MigrationResult}
import spray.json.DefaultJsonProtocol

import scala.util.Try

object MigrationResultProtocol extends DefaultJsonProtocol {
  import spray.json._

  implicit val DateFormat = new RootJsonFormat[Date] {
    override def write(value: Date): JsValue = if (value == null) JsNull else JsString(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").format(value))
    override def read(json: JsValue): Date = ???
  }
  implicit val migrationInfoFormat = jsonFormat6(MigrationInfo.apply)
  implicit val migrationResultFormat = jsonFormat5(MigrationResult.apply)
}

trait S3MigrationHandlerBase extends FlywayMigrator {

  type ResultJson = String
  type ResultStoredPath = String

  protected def migrate(bucketName: String, prefix: String, flywayConfFileName: String = "flyway.conf")(implicit context: Context, s3Client: AmazonS3): Try[ResultJson] = {
    val logger = context.getLogger

    def resultJson(result: MigrationResult): ResultJson = {
      import MigrationResultProtocol._
      import spray.json._

      result.toJson.prettyPrint
    }

    def storeResult(deployment: FlywayDeployment, result: MigrationResult): ResultStoredPath = {
      val jsonPath = s"${deployment.sourcePrefix}/migration-result.json"
      s3Client.putObject(deployment.sourceBucket, jsonPath, resultJson(result))
      jsonPath
    }

    for {
      // Deploy Flyway resources.
      d <- new S3SourceFlywayDeployer(s3Client, bucketName, prefix, flywayConfFileName).deploy
      _ = {
        logger.log(
          s"""--- Flyway configuration ------------------------------------
             |flyway.url      = ${d.url}
             |flyway.user     = ****
             |flyway.password = ****
             |
             |SQL locations   = ${d.location}
             |SQL files       = ${d.sqlFiles.mkString(", ")}
             |-------------------------------------------------------------
              """.stripMargin)
      }

      // Migrate DB.
      r = migrate(d)
      _ = {
        logger.log(s"${r.message}!. ${r.appliedCount} applied.")
        r.infos.foreach { i =>
          logger.log(s"Version=${i.version}, Type=${i.`type`}, State=${i.state} InstalledAt=${i.installedAt} ExecutionTime=${i.execTime} Description=${i.description}")
        }
      }

      // Store migration result.
      storedPath = storeResult(d, r)
      _ = logger.log(s"Migration result stored to $bucketName/$storedPath.")

    } yield resultJson(r)
  }

}

Source File: CliLogger.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.jnsaf.native_statistics

import java.io.{File, FileWriter, PrintWriter}
import java.text.SimpleDateFormat
import java.util.Date

 
object CliLogger {
  
  def timeStamp = new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date)
  
  def outPrint(s : String) {
    scala.Console.out.print(s)
    scala.Console.out.flush()
  }

  def outPrintln(s : String) {
    scala.Console.out.println(s)
    scala.Console.out.flush()
  }

  def outPrintln() {
    scala.Console.out.println()
    scala.Console.out.flush()
  }

  def errPrintln(s : String) {
    scala.Console.err.println(s)
    scala.Console.err.flush()
  }

  def errPrintln() {
    scala.Console.err.println()
    scala.Console.err.flush()
  }
  
  def logError(dir: File, text: String, e: Throwable) {
    outPrintln()
    errPrintln(text + e.getMessage)
    val f = new File(dir, ".errorlog")
    f.getParentFile.mkdirs
    val fw = new FileWriter(f)
    try {
      val pw = new PrintWriter(fw)
      pw.println("An error occurred on " + timeStamp)
      e.printStackTrace(pw)
      fw.close()
      outPrintln("Written: " + f.getAbsolutePath)
    } catch {
      case e : Throwable =>
        errPrintln("Error: " + e.getMessage)
    }
  }
}

Source File: CliLogger.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.saf.cli.util

import java.io.{File, FileWriter, PrintWriter}
import java.text.SimpleDateFormat
import java.util.Date

 
object CliLogger {
  
  def timeStamp: String = new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date)
  
  def outPrint(s : String) {
    scala.Console.out.print(s)
    scala.Console.out.flush()
  }

  def outPrintln(s : String) {
    scala.Console.out.println(s)
    scala.Console.out.flush()
  }

  def outPrintln() {
    scala.Console.out.println()
    scala.Console.out.flush()
  }

  def errPrintln(s : String) {
    scala.Console.err.println(s)
    scala.Console.err.flush()
  }

  def errPrintln() {
    scala.Console.err.println()
    scala.Console.err.flush()
  }
  
  def logError(dir: File, text: String, e: Throwable) {
    outPrintln()
    errPrintln(text + e.getMessage)
    val f = new File(dir, ".errorlog")
    f.getParentFile.mkdirs
    val fw = new FileWriter(f)
    try {
      val pw = new PrintWriter(fw)
      pw.println("An error occurred on " + timeStamp)
      e.printStackTrace(pw)
      fw.close()
      outPrintln("Written: " + f.getAbsolutePath)
    } catch {
      case e : Throwable =>
        errPrintln("Error: " + e.getMessage)
    }
  }
}

Source File: StatsSender.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.stats

import java.net.{DatagramPacket, DatagramSocket, InetAddress}
import java.util.Calendar
import java.text.SimpleDateFormat
import akka.actor.{Actor, ActorSystem, Props}
import akka.actor.Actor.Receive


case class Message(msg: String, host: String, port: Int)

class SenderActor extends Actor {
  private val dsocket = new DatagramSocket()

  sys.addShutdownHook {
    dsocket.close()
  }

  override def receive: Receive = {
    case Message(msg, host, port) =>
      val address = InetAddress.getByName(host)
      val packet = new DatagramPacket(msg.getBytes(), msg.length, address, port)
      dsocket.send(packet)
  }
}

class StatsSender(path: String, host: String = "localhost", port: Int = 8125) {

  object Sender {
    val system = ActorSystem("mySystem")
    val actor = system.actorOf(Props[SenderActor], "SenderActor")
    def send(message: String) {
      actor ! Message(message, host, port)
    }

  }

  private def getCurrentTimeStr: String = {
    val now = Calendar.getInstance().getTime()
    val dateFormat = new SimpleDateFormat("ddMMyyyy_hhmm")
    dateFormat.format(now)
  }

  private def getMachineName: String = {
    java.net.InetAddress.getLocalHost().getHostName().split('.')(0)
  }

  private def getName(p: String, action: String): String = {
    p.replace("{MachineName}", getMachineName).replace("{DateTime}", getCurrentTimeStr) + "." + action
      .replace(".", "-")
      .replace(" ", "_")
  }

  def sendCounts(action: String, num: Int) {
    val message = getName(path, action) + ":" + num + "|c"
    Sender.send(message)
  }

  def sendTimings(action: String, num: Int) {
    val message = getName(path, action) + ":" + num + "|ms"
    Sender.send(message)
  }

  def sendGauges(action: String, num: Int) {
    val message = getName(path, action) + ":" + num + "|g"
    Sender.send(message)
  }

  def sendSets(action: String) {
    val message = getName(path, action) + "|s"
    Sender.send(message)
  }
}

Source File: IotMessageConverterTest.scala From toketi-kafka-connect-iothub with MIT License

5 votes

// Copyright (c) Microsoft. All rights reserved.

package com.microsoft.azure.iot.kafka.connect.source

import java.text.SimpleDateFormat
import java.time.Instant

import com.microsoft.azure.eventhubs.impl.AmqpConstants
import com.microsoft.azure.iot.kafka.connect.source.testhelpers.DeviceTemperature
import org.apache.kafka.connect.data.Struct
import org.json4s.jackson.Serialization._
import org.scalatest.{FlatSpec, GivenWhenThen}

import scala.collection.mutable
import scala.util.Random

class IotMessageConverterTest extends FlatSpec with GivenWhenThen with JsonSerialization {

  private val random: Random = new Random

  "IotMessage Converter" should "populate right values for kafka message struct fields" in {

    Given("IotMessage object")
    val deviceTemp = DeviceTemperature(100.01, "F")
    val deviceTempStr = write(deviceTemp)

    val sequenceNumber = random.nextLong()
    val correlationId = random.nextString(10)
    val offset = random.nextString(10)
    val enqueuedDate = new SimpleDateFormat("MM/dd/yyyy").parse("12/01/2016")
    val systemProperties = mutable.Map[String, Object](
      "iothub-connection-device-id" → "device10",
      AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME → sequenceNumber.asInstanceOf[Object],
      AmqpConstants.AMQP_PROPERTY_CORRELATION_ID → correlationId,
      AmqpConstants.OFFSET_ANNOTATION_NAME → offset,
      AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME → enqueuedDate)

    val timestamp = Instant.now().toString
    val messageProperties = mutable.Map[String, Object](
      "timestamp" → timestamp,
      "contentType" → "temperature"
    )

    val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties)

    When("getIotMessageStruct is called with IotMessage object")
    val kafkaMessageStruct: Struct = IotMessageConverter.getIotMessageStruct(iotMessage)

    Then("The struct has all the expected properties")
    assert(kafkaMessageStruct.getString("deviceId") == "device10")
    assert(kafkaMessageStruct.getString("offset") == offset)
    assert(kafkaMessageStruct.getString("contentType") == "temperature")
    assert(kafkaMessageStruct.getString("enqueuedTime") == enqueuedDate.toInstant.toString)
    assert(kafkaMessageStruct.getInt64("sequenceNumber") == sequenceNumber)
    assert(kafkaMessageStruct.getString("content") == deviceTempStr)

    val structSystemProperties = kafkaMessageStruct.getMap[String, String]("systemProperties")
    assert(structSystemProperties != null)
    assert(structSystemProperties.size == 1)
    assert(structSystemProperties.get(AmqpConstants.AMQP_PROPERTY_CORRELATION_ID) == correlationId)

    val structProperties = kafkaMessageStruct.getMap[String, String]("properties")
    assert(structProperties != null)
    assert(structProperties.size == 1)
    assert(structProperties.get("timestamp") == timestamp)
  }

  it should "use default values for missing properties" in {

    val deviceTemp = DeviceTemperature(100.01, "F")
    val deviceTempStr = write(deviceTemp)

    val systemProperties = mutable.Map.empty[String, Object]
    val messageProperties = mutable.Map.empty[String, Object]

    val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties)

    When("getIotMessageStruct is called with IotMessage object")
    val kafkaMessageStruct: Struct = IotMessageConverter.getIotMessageStruct(iotMessage)

    Then("The struct has all the expected properties")
    assert(kafkaMessageStruct.getString("deviceId") == "")
    assert(kafkaMessageStruct.getString("offset") == "")
    assert(kafkaMessageStruct.getString("contentType") == "")
    assert(kafkaMessageStruct.getString("enqueuedTime") == "")
    assert(kafkaMessageStruct.getInt64("sequenceNumber") == 0)
    assert(kafkaMessageStruct.getString("content") == deviceTempStr)

    val structSystemProperties = kafkaMessageStruct.getMap[String, String]("systemProperties")
    assert(structSystemProperties != null)
    assert(structSystemProperties.size == 0)

    val structProperties = kafkaMessageStruct.getMap[String, String]("properties")
    assert(structProperties != null)
    assert(structProperties.size == 0)
  }
}

Source File: MockDataReceiver.scala From toketi-kafka-connect-iothub with MIT License

5 votes

// Copyright (c) Microsoft. All rights reserved.

package com.microsoft.azure.iot.kafka.connect.source.testhelpers

import java.text.SimpleDateFormat
import java.time.{Duration, Instant}

import com.microsoft.azure.eventhubs.impl.AmqpConstants
import com.microsoft.azure.iot.kafka.connect.source.{DataReceiver, IotMessage, JsonSerialization}
import org.json4s.jackson.Serialization.write

import scala.collection.mutable
import scala.util.Random

class MockDataReceiver(val connectionString: String, val receiverConsumerGroup: String, val partition: String,
    var offset: Option[String], val startTime: Option[Instant], val receiveTimeout: Duration
    ) extends DataReceiver with JsonSerialization {

  private val random: Random = new Random

  override def receiveData(batchSize: Int): Iterable[IotMessage] = {
    val list = scala.collection.mutable.ListBuffer.empty[IotMessage]
    for (i <- 0 until batchSize) {
      list += generateIotMessage(i)
    }
    list
  }

  def generateIotMessage(index: Int): IotMessage = {
    val temp = 70 + random.nextInt(10) + random.nextDouble()
    val deviceTemp = DeviceTemperature(temp, "F")
    val deviceTempStr = write(deviceTemp)

    val systemProperties = mutable.Map[String, Object](
      "iothub-connection-device-id" → s"device$index",
      AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME → index.toLong.asInstanceOf[Object],
      AmqpConstants.AMQP_PROPERTY_CORRELATION_ID → random.nextString(10),
      AmqpConstants.OFFSET_ANNOTATION_NAME → random.nextString(10),
      AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME → new SimpleDateFormat("MM/dd/yyyy").parse("12/01/2016"))

    val messageProperties = mutable.Map[String, Object](
      "timestamp" → Instant.now().toString,
      "contentType" → "temperature"
    )

    val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties)
    iotMessage
  }

  override def close(): Unit = {}
}

Source File: PMMLModelExport.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.{Date, Locale}

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
}

Source File: JacksonMessageWriter.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, Locale, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
}

Source File: SimpleDateParam.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.{Locale, TimeZone}
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
}

Source File: AppAnalyzer.scala From sparklens with Apache License 2.0

5 votes

package com.qubole.sparklens.analyzer

import java.util.Date
import java.util.concurrent.TimeUnit

import com.qubole.sparklens.common.AppContext

import scala.collection.mutable.ListBuffer


  def pd(millis: Long) : String = {
    "%02dm %02ds".format(
      TimeUnit.MILLISECONDS.toMinutes(millis),
      TimeUnit.MILLISECONDS.toSeconds(millis) -
        TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))
    )
  }

  def pcm(millis: Long) : String = {
    val millisForMinutes = millis % (60*60*1000)

    "%02dh %02dm".format(
      TimeUnit.MILLISECONDS.toHours(millis),
      TimeUnit.MILLISECONDS.toMinutes(millisForMinutes))
  }

  implicit class PrintlnStringBuilder(sb: StringBuilder) {
    def println(x: Any): StringBuilder = {
      sb.append(x).append("\n")
    }
    def print(x: Any): StringBuilder = {
      sb.append(x)
    }
  }
}

object AppAnalyzer {
  def startAnalyzers(appContext: AppContext): Unit = {
    val list = new ListBuffer[AppAnalyzer]
    list += new SimpleAppAnalyzer
    list += new HostTimelineAnalyzer
    list += new ExecutorTimelineAnalyzer
    list += new AppTimelineAnalyzer
    list += new JobOverlapAnalyzer
    list += new EfficiencyStatisticsAnalyzer
    list += new ExecutorWallclockAnalyzer
    list += new StageSkewAnalyzer


    list.foreach( x => {
      try {
        val output = x.analyze(appContext)
        println(output)
      } catch {
        case e:Throwable => {
          println(s"Failed in Analyzer ${x.getClass.getSimpleName}")
          e.printStackTrace()
        }
      }
    })
  }

}

Source File: PMMLModelExport.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = new PMML

  setHeader(pmml)

  private def setHeader(pmml: PMML): Unit = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application().withName("Apache Spark MLlib").withVersion(version)
    val timestamp = new Timestamp()
      .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .withApplication(app)
      .withTimestamp(timestamp)
    pmml.setHeader(header)
  }
}

Source File: DateUtils.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.sql.Date
import java.text.SimpleDateFormat
import java.util.{Calendar, TimeZone}

import org.apache.spark.sql.catalyst.expressions.Cast


object DateUtils {
  private val MILLIS_PER_DAY = 86400000

  // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
  private val LOCAL_TIMEZONE = new ThreadLocal[TimeZone] {
    override protected def initialValue: TimeZone = {
      Calendar.getInstance.getTimeZone
    }
  }

  private def javaDateToDays(d: Date): Int = {
    millisToDays(d.getTime)
  }

  // we should use the exact day as Int, for example, (year, month, day) -> day
  def millisToDays(millisLocal: Long): Int = {
    ((millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal)) / MILLIS_PER_DAY).toInt
  }

  private def toMillisSinceEpoch(days: Int): Long = {
    val millisUtc = days.toLong * MILLIS_PER_DAY
    millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc)
  }

  def fromJavaDate(date: java.sql.Date): Int = {
    javaDateToDays(date)
  }

  def toJavaDate(daysSinceEpoch: Int): java.sql.Date = {
    new java.sql.Date(toMillisSinceEpoch(daysSinceEpoch))
  }

  def toString(days: Int): String = Cast.threadLocalDateFormat.get.format(toJavaDate(days))

  def stringToTime(s: String): java.util.Date = {
    if (!s.contains('T')) {
      // JDBC escape string
      if (s.contains(' ')) {
        java.sql.Timestamp.valueOf(s)
      } else {
        java.sql.Date.valueOf(s)
      }
    } else if (s.endsWith("Z")) {
      // this is zero timezone of ISO8601
      stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
    } else if (s.indexOf("GMT") == -1) {
      // timezone with ISO8601
      val inset = "+00.00".length
      val s0 = s.substring(0, s.length - inset)
      val s1 = s.substring(s.length - inset, s.length)
      if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
        stringToTime(s0 + "GMT" + s1)
      } else {
        stringToTime(s0 + ".0GMT" + s1)
      }
    } else {
      // ISO8601 with GMT insert
      val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
      ISO8601GMT.parse(s)
    }
  }
}

Source File: JacksonMessageWriter.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
}

Source File: SimpleDateParam.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.text.SimpleDateFormat
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

import scala.util.Try

private[v1] class SimpleDateParam(val originalValue: String) {
  val timestamp: Long = {
    SimpleDateParam.formats.collectFirst {
      case fmt if Try(fmt.parse(originalValue)).isSuccess =>
        fmt.parse(originalValue).getTime()
    }.getOrElse(
      throw new WebApplicationException(
        Response
          .status(Status.BAD_REQUEST)
          .entity("Couldn't parse date: " + originalValue)
          .build()
      )
    )
  }
}

private[v1] object SimpleDateParam {

  val formats: Seq[SimpleDateFormat] = {

    val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
    gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))

    Seq(
      new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz"),
      gmtDay
    )
  }
}

Source File: DataFrameReportPerformanceSpec.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import ai.deepsense.commons.utils.{DoubleUtils, Logging}
import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report()
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

Source File: OutputInterceptorFactory.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.outputintercepting

import java.io.File
import java.text.SimpleDateFormat
import java.util.logging._
import java.util.{Calendar, UUID}

import com.google.inject.Inject
import com.google.inject.name.Named
import org.apache.spark.launcher.SparkLauncher

import ai.deepsense.commons.models.ClusterDetails



case class OutputInterceptorHandle private [outputintercepting] (
    private val logger: Logger,
    private val childProcLoggerName: String,
    private val loggerFileHandler: FileHandler ) {

  def attachTo(sparkLauncher: SparkLauncher): Unit = {
    sparkLauncher.setConf(
      "spark.launcher.childProcLoggerName", childProcLoggerName
    )
  }

  def writeOutput(text: String): Unit = {
    logger.info(text)
  }

  def close(): Unit = {
    loggerFileHandler.close()
  }

}

class OutputInterceptorFactory @Inject()(
  @Named("session-executor.spark-applications-logs-dir") val executorsLogDirectory: String
) {

  def prepareInterceptorWritingToFiles(clusterDetails: ClusterDetails): OutputInterceptorHandle = {
    new File(executorsLogDirectory).mkdirs()

    val childProcLoggerName = s"WE-app-${UUID.randomUUID()}"
    val logger = Logger.getLogger(childProcLoggerName)

    val fileName = {
      val time = Calendar.getInstance().getTime()
      // Colons are not allowed in Windows filenames
      val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss")
      val formattedTime = format.format(time)
      val illegalFileNameCharactersRegExp = "[^a-zA-Z0-9.-]"
      s"$formattedTime-${clusterDetails.name.replaceAll(illegalFileNameCharactersRegExp, "_")}.log"
    }
    val fileHandler = new FileHandler(s"$executorsLogDirectory/$fileName")
    fileHandler.setFormatter(new SimpleFormaterWithoutOutputRedirectorNoise)
    logger.addHandler(fileHandler)
    sys.addShutdownHook {
      fileHandler.close()
    }
    OutputInterceptorHandle(logger, childProcLoggerName, fileHandler)
  }

  class SimpleFormaterWithoutOutputRedirectorNoise extends Formatter {

    val simpleFormatter = new SimpleFormatter

    override def format(logRecord: LogRecord): String = {
      val formatted = simpleFormatter.format(logRecord)
      val redirectorNoise = "org.apache.spark.launcher.OutputRedirector redirect\nINFO: "
      val beginningOfRedirectorNoise = formatted.indexOf(redirectorNoise)

      val endOfRedirectorNoise = if (beginningOfRedirectorNoise > 0) {
        beginningOfRedirectorNoise + redirectorNoise.length
      } else {
        0
      }

      formatted.substring(endOfRedirectorNoise)
    }
  }

}

Source File: GeneratorTest.scala From kafka-connect-kcql-smt with Apache License 2.0

5 votes

package com.landoop.connect.sql

import java.text.SimpleDateFormat
import java.util.Date

import com.landoop.json.sql.JacksonJson
import com.sksamuel.avro4s.SchemaFor
import org.scalatest.{Matchers, WordSpec}

import scala.util.Random


class GeneratorTest extends WordSpec with Matchers {
  "Generator" should {
    "generate schema" in {
      val sql = Sql.parse("SELECT * FROM `order-topic`")
      val schema = SchemaFor[Product]()
      val str = schema.toString
      println(str)
    }

    "generate data" in {
      val rnd = new Random(System.currentTimeMillis())
      val f = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss.FFF")
      val products = (1 to 4).map { i =>
        Product(i, f.format(new Date()), s"product_$i", Payment(rnd.nextDouble(), i * rnd.nextInt(3), "GBP"))
      }.map(JacksonJson.toJson).mkString(s"${System.lineSeparator()}")
      println(products)
    }
  }

}


case class Product(id: Int,
                   created: String,
                   name: String,
                   payment: Payment)

case class Payment(price: Double,
                   quantity: Int,
                   currency: String)

Source File: RandomListTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import java.text.SimpleDateFormat

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import com.salesforce.op.testkit.RandomList.{NormalGeolocation, UniformGeolocation}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class RandomListTest extends FlatSpec with TestCommon with Assertions {
  private val numTries = 10000
  private val rngSeed = 314159214142136L

  private def check[D, T <: OPList[D]](
    g: RandomList[D, T],
    minLen: Int, maxLen: Int,
    predicate: (D => Boolean) = (_: D) => true
  ) = {
    g reset rngSeed

    def segment = g limit numTries

    segment count (_.value.length < minLen) shouldBe 0
    segment count (_.value.length > maxLen) shouldBe 0
    segment foreach (list => list.value foreach { x =>
      predicate(x) shouldBe true
    })
  }

  private val df = new SimpleDateFormat("dd/MM/yy")

  Spec[Text, RandomList[String, TextList]] should "generate lists of strings" in {
    val sut = RandomList.ofTexts(RandomText.countries, 0, 4)
    check[String, TextList](sut, 0, 4, _.length > 0)

    (sut limit 7 map (_.value.toList)) shouldBe
      List(
        List("Madagascar", "Gondal", "Zephyria"),
        List("Holy Alliance"),
        List("North American Union"),
        List("Guatemala", "Estonia", "Kolechia"),
        List(),
        List("Myanmar", "Bhutan"),
        List("Equatorial Guinea")
      )
  }

  Spec[Date, RandomList[Long, DateList]] should "generate lists of dates" in {
    val dates = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000)
    val sut = RandomList.ofDates(dates, 11, 22)
    var d0 = 0L
    check[Long, DateList](sut, 11, 22, d => {
      val d1 = d0
      d0 = d
      d > d1
    })
  }

  Spec[DateTimeList, RandomList[Long, DateTimeList]] should "generate lists of datetimes" in {
    val datetimes = RandomIntegral.datetimes(df.parse("01/01/2017"), 1000, 1000000)
    val sut = RandomList.ofDateTimes(datetimes, 11, 22)
    var d0 = 0L
    check[Long, DateTimeList](sut, 11, 22, d => {
      val d1 = d0
      d0 = d
      d > d1
    })
  }

  Spec[UniformGeolocation] should "generate uniformly distributed geolocations" in {
    val sut = RandomList.ofGeolocations
    val segment = sut limit numTries
    segment foreach (_.value.length shouldBe 3)
  }

  Spec[NormalGeolocation] should "generate geolocations around given point" in {
    for {accuracy <- GeolocationAccuracy.values} {
      val geolocation = RandomList.ofGeolocationsNear(37.444136, 122.163160, accuracy)
      val segment = geolocation limit numTries
      segment foreach (_.value.length shouldBe 3)
    }
  }
}

Source File: RandomIntegralTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.testkit

import java.text.SimpleDateFormat

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.language.postfixOps

@RunWith(classOf[JUnitRunner])
class RandomIntegralTest extends FlatSpec with TestCommon with Assertions {
  private val numTries = 10000
  private val rngSeed = 314159214142135L

  private def check[T <: Integral](
    g: RandomIntegral[T],
    predicate: Long => Boolean = _ => true
  ) = {
    g reset rngSeed

    def segment = g limit numTries

    val numberOfEmpties = segment count (_.isEmpty)

    val expectedNumberOfEmpties = g.probabilityOfEmpty * numTries

    withClue(s"numEmpties = $numberOfEmpties, expected $expectedNumberOfEmpties") {
      math.abs(numberOfEmpties - expectedNumberOfEmpties) < 2 * math.sqrt(numTries) shouldBe true
    }

    val maybeValues = segment filterNot (_.isEmpty) map (_.value)
    val values = maybeValues collect { case Some(s) => s }

    values foreach (x => predicate(x) shouldBe true)

    withClue(s"number of distinct values = ${values.size}, expected:") {
      math.abs(maybeValues.size - values.toSet.size) < maybeValues.size / 20
    }

  }

  private val df = new SimpleDateFormat("dd/MM/yy")

  Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers" in {
    val sut0 = RandomIntegral.integrals
    val sut = sut0.withProbabilityOfEmpty(0.3)
    check(sut)
    sut.probabilityOfEmpty shouldBe 0.3
  }

  Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers in some range" in {
    val sut0 = RandomIntegral.integrals(100, 200)
    val sut = sut0.withProbabilityOfEmpty(0.3)
    check(sut, i => i >= 100 && i < 200)
    sut.probabilityOfEmpty shouldBe 0.3
  }

  Spec[RandomIntegral[Date]] should "generate dates" in {
    val sut = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000)
    var d0 = 0L
    check(sut withProbabilityOfEmpty 0.01, d => {
      val d1 = d0
      d0 = d
      d0 > d1
    })
  }

  Spec[RandomIntegral[DateTime]] should "generate dates with times" in {
    val sut = RandomIntegral.datetimes(df.parse("08/24/2017"), 1000, 1000000)
    var d0 = 0L
    check(sut withProbabilityOfEmpty 0.001, d => {
      val d1 = d0
      d0 = d
      d0 > d1
    })
  }
}

Source File: IncomingTransactionDialogFragment.scala From OUTDATED_ledger-wallet-android with MIT License

5 votes

package co.ledger.wallet.app.ui.m2fa

import java.text.SimpleDateFormat
import java.util.Locale

import android.content.DialogInterface
import android.os.Bundle
import android.view.{View, ViewGroup, LayoutInflater}

import co.ledger.wallet.R
import co.ledger.wallet.app.base.BaseDialogFragment
import co.ledger.wallet.core.bitcoin.AmountFormatter
import co.ledger.wallet.app.api.m2fa.IncomingTransactionAPI
import co.ledger.wallet.core.utils.TR
import co.ledger.wallet.core.view.DialogActionBarController
import co.ledger.wallet.core.widget.TextView

class IncomingTransactionDialogFragment extends BaseDialogFragment {

  lazy val actions = DialogActionBarController(R.id.dialog_action_bar).noNeutralButton
  lazy val amount = TR(R.id.amount).as[TextView]
  lazy val address = TR(R.id.address).as[TextView]
  lazy val date = TR(R.id.date).as[TextView]
  lazy val name = TR(R.id.dongle_name).as[TextView]

  private[this] var _transaction: Option[IncomingTransactionAPI#IncomingTransaction] = None

  def this(tx: IncomingTransactionAPI#IncomingTransaction) {
    this()
    _transaction = Option(tx)
    setCancelable(false)
  }

  override def onCreateView(inflater: LayoutInflater, container: ViewGroup, savedInstanceState: Bundle): View = {
    inflater.inflate(R.layout.incoming_transaction_dialog_fragment, container, false)
  }

  override def onResume(): Unit = {
    super.onResume()
    if (_transaction.isEmpty || _transaction.get.isDone)
      dismiss()
    _transaction.foreach(_.onCancelled(dismiss))
  }

  override def onPause(): Unit = {
    super.onPause()
    _transaction.foreach(_.onCancelled(null))
    dismissAllowingStateLoss()
  }

  override def onViewCreated(view: View, savedInstanceState: Bundle): Unit = {
    super.onViewCreated(view, savedInstanceState)
    actions onPositiveClick {
      _transaction.foreach(_.accept())
      _transaction = None
      dismiss()
    }
    actions onNegativeClick {
      _transaction.foreach(_.reject())
      _transaction = None
      dismiss()
    }
    _transaction match {
      case Some(transaction) =>
        amount.setText(AmountFormatter.Bitcoin.format(transaction.amount))
        address.setText(transaction.address)
        name.setText(transaction.dongle.name.get)
        val df = android.text.format.DateFormat.getDateFormat(getActivity)
        val hf = android.text.format.DateFormat.getTimeFormat(getActivity)
        date.setText(TR(R.string.incoming_tx_date).as[String].format(df.format(transaction.date), hf.format(transaction.date)))
      case _ =>
    }
  }

  override def onDismiss(dialog: DialogInterface): Unit = {
    super.onDismiss(dialog)
    _transaction.foreach(_.cancel())
  }
}

object IncomingTransactionDialogFragment {
  val DefaultTag = "IncomingTransactionDialogFragment"

}

Source File: TimerSchedule.scala From spark1.52 with Apache License 2.0

5 votes

package scalaDemo

import java.text.SimpleDateFormat
import java.util.{Timer, TimerTask}

object TimerSchedule {
  
    val fTime = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
    val d1 = fTime.parse("2005/12/30 14:10:00");
    val timer: Timer = new Timer();
    timer.scheduleAtFixedRate(new TimerTask() {
      override def run(): Unit = {
        System.out.println("this is task you do6");
      }
    }, d1, 3 * 60 * 1000);

  }
}

Source File: PMMLModelExport.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = new PMML

  setHeader(pmml)

  private def setHeader(pmml: PMML): Unit = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application().withName("Apache Spark MLlib").withVersion(version)
    val timestamp = new Timestamp()
      .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .withApplication(app)
      .withTimestamp(timestamp)
    pmml.setHeader(header)
  }
}

Source File: JacksonMessageWriter.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
}

Source File: SimpleDateParam.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
}

Source File: BusinessLogger.scala From languagedetector with MIT License

5 votes

package biz.meetmatch.logging

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import org.slf4j.LoggerFactory

object BusinessLogger {
  def getDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
}

class BusinessLogger(module: String) {
  private val logger = LoggerFactory.getLogger("businessLogger")

  def calcStarted(options: String, sparkAppId: String): Unit = {
    log(s"CALC\tSTART\t$options\t$sparkAppId")
  }

  def calcStopped(result: String): Unit = {
    log(s"CALC\tSTOP\t$result")
  }

  def jobStarted(jobId: Int, jobDescription: String, stageCount: Int, executionId: Option[String]): Unit = {
    log(s"JOB\t$jobId\tSTART\t${jobDescription.replace("\n", " ").replace("\t", " ")}\t$stageCount\t${executionId.getOrElse("")}")
  }

  def jobStopped(jobId: Int, result: String): Unit = {
    log(s"JOB\t$jobId\tSTOP\t$result")
  }

  def transactionStarted(category: String, id: String, stageId: Int = -1, partitionId: Int = -1, taskId: Long = -1, message: String = ""): Unit = {
    log(s"TRANSACTION\t$category\t$id\tSTART\t$stageId\t$partitionId\t$taskId\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  def transactionStopped(category: String, id: String): Unit = {
    log(s"TRANSACTION\t$category\t$id\tSTOP")
  }

  def dataParquetRead(tableName: String, count: Long = -1): Unit = {
    log(s"DATA\tPARQUET\tREAD\t${tableName.replace("\n", " ").replace("\t", " ")}\t$count")
  }

  def dataParquetWritten(tableName: String, countBefore: Long, countAfter: Long): Unit = {
    log(s"DATA\tPARQUET\tWRITE\t${tableName.replace("\n", " ").replace("\t", " ")}\t$countBefore\t$countAfter")
  }

  def dataJdbcRead(tableName: String, count: Long = -1): Unit = {
    log(s"DATA\tJDBC\tREAD\t${tableName.replace("\n", " ").replace("\t", " ")}\t$count")
  }

  def dataJdbcWritten(tableName: String, countBefore: Long = -1, countAfter: Long = -1, countUpdated: Long = -1): Unit = {
    log(s"DATA\tJDBC\tWRITE\t${tableName.replace("\n", " ").replace("\t", " ")}\t$countBefore\t$countAfter\t$countUpdated")
  }

  def info(subject: String, message: String): Unit = {
    log(s"MESSAGE\tINFO\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  def warn(subject: String, message: String): Unit = {
    log(s"MESSAGE\tWARN\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  def error(subject: String, message: String): Unit = {
    log(s"MESSAGE\tERROR\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  private def log(line: String) = {
    logger.info(s"${BusinessLogger.getDateFormat.format(Calendar.getInstance.getTime)}\t$module\t$line")
  }
}

case class LogLineWorkflow(message: String, startDate: String, stopDate: Date, duration: String, state: String, options: Array[Array[String]], sparkAppId: String, calcs: Array[LogLineCalc], warnings: Int, errors: Int)

case class LogLineCalc(module: String, startDate: String, stopDate: Date, duration: String, state: String, options: Array[Array[String]], sparkAppId: String, jobs: Array[LogLineJob], transactionCategories: Array[LogLineTransactionCategory], dataReads: Array[LogLineDataRead], dataWrites: Array[LogLineDataWrite], messages: Array[LogLineMessage])
case class LogLineJob(id: Int, startDate: String, duration: String, state: String, description: String, stageCount: Int, executionId: Int = -1)

case class LogLineTransactionCategory(category: String, transactions: Array[LogLineTransaction], numberOfTransactions: Int, averageFinishedTransactionDuration: String)
case class LogLineTransaction(category: String, id: String, stageId: Int, partitionId: Int, taskId: Long, message: String, startDate: String, duration: String, state: String)

case class LogLineDataRead(storage: String, tableName: String, count: Int, date: String)

case class LogLineDataWrite(storage: String, tableName: String, countBefore: Int, countAfter: Int, countUpdated: Int, date: String)

case class LogLineMessage(category: String, subject: String, message: String, date: String)

Source File: PMMLModelExport.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.{Date, Locale}

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
}

Source File: JacksonMessageWriter.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, Locale, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
}

Source File: SimpleDateParam.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.{Locale, TimeZone}
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
}

Source File: StreamingDemo.scala From flink-demos with Apache License 2.0

5 votes

package com.dataartisans.flink.example.eventpattern

import java.text.SimpleDateFormat
import java.util
import java.util.{Calendar, Properties, UUID}

import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer

import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction}
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.util.Collector

import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.Requests


class StateMachineMapper extends RichFlatMapFunction[Event, Alert] {
  
  private[this] var currentState: ValueState[State] = _
    
  override def open(config: Configuration): Unit = {
    currentState = getRuntimeContext.getState(
      new ValueStateDescriptor("state", classOf[State], InitialState))
  }
  
  override def flatMap(t: Event, out: Collector[Alert]): Unit = {
    val state = currentState.value()
    val nextState = state.transition(t.event)
    
    nextState match {
      case InvalidTransition =>
        out.collect(Alert(t.sourceAddress, state, t.event))
      case x if x.terminal =>
        currentState.clear()
      case x =>
        currentState.update(nextState)
    }
  }
}

Source File: FECData.scala From s4ds with Apache License 2.0

5 votes

import java.io.File
import java.sql.Date
import java.text.SimpleDateFormat
import com.github.tototoshi.csv._

object FECData {

  val DataDirectory = "./data/"

  private val dateParser = new SimpleDateFormat("DD-MMM-YY")

  private def load(fileName:String):FECData = {
    val reader = CSVReader.open(new File(DataDirectory + fileName))
    val transactions = for { 
      row <- reader.iteratorWithHeaders
      id = None
      candidate = row("candidate")
      contributor = row("contributor_name")
      state = row("contributor_state")
      occupation = row("contributor_occupation") match {
        case "" => None
        case v => Some(v)
      }
      amount = (row("amount").toDouble*100).toInt
      date = new Date(dateParser.parse(row("date")).getTime)
    } yield Transaction(id, candidate, contributor, state, occupation, amount, date)
    
    new FECData(transactions)
  }

  def loadAll:FECData = load("us.csv")

  def loadOhio:FECData = load("ohio.csv")


}

class FECData(val transactions:Iterator[Transaction])

Source File: PMMLModelExport.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = new PMML

  pmml.setVersion("4.2")
  setHeader(pmml)

  private def setHeader(pmml: PMML): Unit = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application().withName("Apache Spark MLlib").withVersion(version)
    val timestamp = new Timestamp()
      .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .withApplication(app)
      .withTimestamp(timestamp)
    pmml.setHeader(header)
  }
}

Source File: JacksonMessageWriter.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
}

Source File: SimpleDateParam.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
}

Source File: JavaJsonUtils.scala From asura with MIT License

5 votes

package asura.common.util

import java.text.SimpleDateFormat

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}

object JavaJsonUtils extends JsonUtils {

  val mapper: ObjectMapper = new ObjectMapper()
  mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"))
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.configure(DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT, true)
  mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
  mapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false)
  mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true)
  mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true)
  mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true)
}

Source File: JsonUtils.scala From asura with MIT License

5 votes

package asura.common.util

import java.io.InputStream
import java.text.SimpleDateFormat

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.core.`type`.TypeReference
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper

object JsonUtils extends JsonUtils {

  val mapper: ObjectMapper with ScalaObjectMapper = new ObjectMapper() with ScalaObjectMapper
  mapper.registerModule(DefaultScalaModule)
  mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"))
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.configure(DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT, true)
  mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
  mapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false)
  mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true)
  mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true)
  mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true)

}

trait JsonUtils {
  val mapper: ObjectMapper

  def stringify(obj: AnyRef): String = {
    mapper.writeValueAsString(obj)
  }

  def parse[T <: AnyRef](content: String, c: Class[T]): T = {
    mapper.readValue(content, c)
  }

  def parse[T <: AnyRef](input: InputStream, c: Class[T]): T = {
    mapper.readValue(input, c)
  }

  def parse[T <: AnyRef](content: String, typeReference: TypeReference[T]): T = {
    mapper.readValue(content, typeReference)
  }
}

Source File: MongodbSchemaIT.scala From Spark-MongoDB with Apache License 2.0

5 votes

package com.stratio.datasource.mongodb.schema

import java.text.SimpleDateFormat
import java.util.Locale

import com.stratio.datasource.MongodbTestConstants
import com.stratio.datasource.mongodb.config.{MongodbConfig, MongodbConfigBuilder}
import com.stratio.datasource.mongodb.partitioner.MongodbPartitioner
import com.stratio.datasource.mongodb.rdd.MongodbRDD
import com.stratio.datasource.mongodb._
import org.apache.spark.sql.mongodb.{TemporaryTestSQLContext, TestSQLContext}
import org.apache.spark.sql.types.{ArrayType, StringType, StructField, TimestampType}
import org.junit.runner.RunWith
import org.scalatest._
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class MongodbSchemaIT extends FlatSpec
with Matchers
with MongoEmbedDatabase
with TestBsonData
with MongodbTestConstants {

  private val host: String = "localhost"
  private val collection: String = "testCol"
  private val readPreference = "secondaryPreferred"

  val testConfig = MongodbConfigBuilder()
    .set(MongodbConfig.Host,List(host + ":" + mongoPort))
    .set(MongodbConfig.Database,db)
    .set(MongodbConfig.Collection,collection)
    .set(MongodbConfig.SamplingRatio,1.0)
    .set(MongodbConfig.ReadPreference, readPreference)
    .build()

  val mongodbPartitioner = new MongodbPartitioner(testConfig)

  val mongodbRDD = new MongodbRDD(TemporaryTestSQLContext, testConfig, mongodbPartitioner)

  behavior of "A schema"

  it should "be inferred from rdd with primitives" + scalaBinaryVersion in {
    withEmbedMongoFixture(primitiveFieldAndType) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 7
      schema.fieldNames should contain allOf("string", "integer", "long", "double", "boolean", "null")

      schema.printTreeString()
    }
  }

  it should "be inferred from rdd with complex fields" + scalaBinaryVersion in {
    withEmbedMongoFixture(complexFieldAndType1) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 13

      schema.fields filter {
        case StructField(name, ArrayType(StringType, _), _, _) => Set("arrayOfNull", "arrayEmpty") contains name
        case _ => false
      } should have size 2

      schema.printTreeString()
    }
  }

  it should "resolve type conflicts between fields" + scalaBinaryVersion in {
    withEmbedMongoFixture(primitiveFieldValueTypeConflict) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 7

      schema.printTreeString()
    }
  }

  it should "be inferred from rdd with more complex fields" + scalaBinaryVersion in {
    withEmbedMongoFixture(complexFieldAndType2) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 5

      schema.printTreeString()
    }
  }

  it should "read java.util.Date fields as timestamptype" + scalaBinaryVersion in {
    val dfunc = (s: String) => new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy", Locale.ENGLISH).parse(s)
    import com.mongodb.casbah.Imports.DBObject
    val stringAndDate = List(DBObject("string" -> "this is a simple string.", "date" -> dfunc("Mon Aug 10 07:52:49 EDT 2015")))
    withEmbedMongoFixture(stringAndDate) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 3
      schema.fields.filter(_.name == "date").head.dataType should equal(TimestampType)
      schema.printTreeString()
    }
  }
}

Source File: TSQR.scala From SparkAndMPIFactorizations with MIT License

5 votes

package edu.berkeley.cs.amplab.mlmatrix

import java.util.concurrent.ThreadLocalRandom
import scala.collection.mutable.ArrayBuffer

import breeze.linalg._

import edu.berkeley.cs.amplab.mlmatrix.util.QRUtils
import edu.berkeley.cs.amplab.mlmatrix.util.Utils

import org.apache.spark.rdd.RDD
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.Accumulator
import org.apache.spark.SparkContext._

import java.util.Calendar
import java.text.SimpleDateFormat

class modifiedTSQR extends Serializable {

    def report(message: String, verbose: Boolean = true) = {
        val now = Calendar.getInstance().getTime()
        val formatter = new SimpleDateFormat("H:m:s")
        if (verbose) {
            println("STATUS REPORT (" + formatter.format(now) + "): " + message)
        }
    }

  
  private def reduceQR(
      acc: Accumulator[Double],
      a: Tuple2[DenseVector[Double], DenseMatrix[Double]],
      b: Tuple2[DenseVector[Double], DenseMatrix[Double]]): Tuple2[DenseVector[Double], DenseMatrix[Double]] = {
    val begin = System.nanoTime
    val outmat = QRUtils.qrR(DenseMatrix.vertcat(a._2, b._2), false)
    val outcolnorms = a._1 + b._1
    acc += ((System.nanoTime - begin) / 1e6)
    (outcolnorms, outmat)
  }

}

Source File: HtmlFetcher.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet

import java.text.SimpleDateFormat

import com.gravity.goose.{Configuration, Goose}
import io.gzet.HtmlFetcher.Content
import org.apache.commons.lang.StringUtils
import org.apache.spark.rdd.RDD

class HtmlFetcher(
                   connectionTimeout: Int = 10000,
                   socketTimeout: Int = 10000
                 ) extends Serializable {

  final val USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
  final val ISO_SDF = "yyyy-MM-dd'T'HH:mm:ssZ"

  def fetchWithContext(urlRdd: RDD[(Long, String)]): RDD[(Long, Content)] = {
    urlRdd mapPartitions { urls =>
      val sdf = new SimpleDateFormat(ISO_SDF)
      val goose = getGooseScraper
      urls map { case (id, url) =>
        (id, fetchUrl(goose, url, sdf))
      }
    }
  }

  def fetch(urlRdd: RDD[String]): RDD[Content] = {
    urlRdd mapPartitions { urls =>
      val sdf = new SimpleDateFormat(ISO_SDF)
      val goose = getGooseScraper
      urls map(url => fetchUrl(goose, url, sdf))
    }
  }

  private def getGooseScraper: Goose = {
    val conf: Configuration = new Configuration
    conf.setEnableImageFetching(false)
    conf.setBrowserUserAgent(USER_AGENT)
    conf.setConnectionTimeout(connectionTimeout)
    conf.setSocketTimeout(socketTimeout)
    new Goose(conf)
  }

  private def fetchUrl(goose: Goose, url: String, sdf: SimpleDateFormat) : Content = {

    try {

      val article = goose.extractContent(url)
      var body = None: Option[String]
      var title = None: Option[String]
      var description = None: Option[String]
      var publishDate = None: Option[String]

      if (StringUtils.isNotEmpty(article.cleanedArticleText))
        body = Some(article.cleanedArticleText)

      if (StringUtils.isNotEmpty(article.title))
        title = Some(article.title)

      if (StringUtils.isNotEmpty(article.metaDescription))
        description = Some(article.metaDescription)

      if (article.publishDate != null)
        publishDate = Some(sdf.format(article.publishDate))

      Content(url, title, description, body, publishDate)

    } catch {
      case e: Throwable =>
        Content(url, None, None, None, None)
    }
  }
}


object HtmlFetcher {

  case class Content(
                      url: String,
                      title: Option[String],
                      description: Option[String],
                      body: Option[String],
                      publishedDate: Option[String]
                    )

}

Source File: GdeltTagger.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet.tagging.gdelt

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.config.ConfigFactory
import io.gzet.tagging.classifier.Classifier
import io.gzet.tagging.html.HtmlHandler
import io.gzet.tagging.html.HtmlHandler.Content
import org.apache.spark.Accumulator
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.util.LongAccumulator
import org.elasticsearch.spark._

class GdeltTagger() extends Serializable {

  val config = ConfigFactory.load().getConfig("io.gzet.kappa")
  val isoSdf = "yyyy-MM-dd HH:mm:ss"
  val esIndex = config.getString("gdeltIndex")
  val vectorSize = config.getInt("vectorSize")
  val minProba = config.getDouble("minProba")

  def predict(gdeltStream: DStream[String], batchId: LongAccumulator) = {

    // Extract HTML content
    val gdeltContent = fetchHtmlContent(gdeltStream)

    // Predict each RDD
    gdeltContent foreachRDD { batch =>

      batch.cache()
      val count = batch.count()

      if (count > 0) {

        if (Classifier.model.isDefined) {
          val labels = Classifier.model.get.labels

          // Predict HashTags using latest Twitter model
          val textRdd = batch.map(_.body.get)
          val predictions = Classifier.predictProbabilities(textRdd)
          val taggedGdelt = batch.zip(predictions) map { case (content, probabilities) =>
            val validLabels = probabilities filter { case (label, probability) =>
              probability > minProba
            }

            val labels = validLabels.toSeq
              .sortBy(_._2)
              .reverse
              .map(_._1)

            (content, labels)
          }

          // Saving articles to Elasticsearch
          taggedGdelt map { case (content, hashTags) =>
            gdeltToJson(content, hashTags.toArray)
          } saveToEs esIndex

        } else {

          // Saving articles to Elasticsearch
          batch map { content =>
            gdeltToJson(content, Array())
          } saveToEs esIndex
        }

      }

      batch.unpersist(blocking = false)
    }
  }

  private def gdeltToJson(content: Content, hashTags: Array[String]) = {
    val sdf = new SimpleDateFormat(isoSdf)
    Map(
      "time" -> sdf.format(new Date()),
      "body" -> content.body.get,
      "url" -> content.url,
      "tags" -> hashTags,
      "title" -> content.title
    )
  }

  private def fetchHtmlContent(urlStream: DStream[String]) = {
    urlStream.map(_ -> 1).groupByKey().map(_._1) mapPartitions { urls =>
      val sdf = new SimpleDateFormat(isoSdf)
      val htmlHandler = new HtmlHandler()
      val goose = htmlHandler.getGooseScraper
      urls map { url =>
        htmlHandler.fetchUrl(goose, url, sdf)
      }
    } filter { content =>
      content.isDefined &&
        content.get.body.isDefined &&
        content.get.body.get.length > 255
    } map { content =>
      content.get
    }
  }
}

Source File: OilPriceFunc.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet.geomesa

import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.{udf, window, last, col, lag}

object OilPriceFunc {

    // use this if the window function misbehaves due to timezone e.g. BST
    // ./spark-shell --driver-java-options "-Duser.timezone=UTC"
    // ./spark-submit --conf 'spark.driver.extraJavaOptions=-Duser.timezone=UTC'

    // define a function to reformat the date field
    def convert(date:String) : String = {
      val df1 = new SimpleDateFormat("dd/MM/yyyy")
      val dt = df1.parse(date)
      val df2 = new SimpleDateFormat("yyyy-MM-dd")
      df2.format(dt)
    }

    // create and save oil price changes
    def createOilPriceDF(inputfile: String, outputfile: String, spark: SparkSession) = {

      val oilPriceDF = spark.
        read.
        option("header", "true").
        option("inferSchema", "true").
        csv(inputfile)

      val convertDateUDF = udf { (Date: String) => convert(Date) }

      val oilPriceDatedDF = oilPriceDF.withColumn("DATE", convertDateUDF(oilPriceDF("DATE")))

      // offset to start at beginning of week
      val windowDF = oilPriceDatedDF.groupBy(window(oilPriceDatedDF.col("DATE"), "7 days", "7 days", "4 days"))

      val windowLastDF = windowDF.agg(last("PRICE") as "last(PRICE)").sort("window")

//      windowLastDF.show(20, false)

      val sortedWindow = Window.orderBy("window.start")

      val lagLastCol = lag(col("last(PRICE)"), 1).over(sortedWindow)
      val lagLastColDF = windowLastDF.withColumn("lastPrev(PRICE)", lagLastCol)

//      lagLastColDF.show(20, false)

      val simplePriceChangeFunc = udf { (last: Double, prevLast: Double) =>
        var change = ((last - prevLast) compare 0).signum
        if (change == -1)
          change = 0
        change.toDouble
      }

      val findDateTwoDaysAgoUDF = udf { (date: String) =>
        val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
        val cal = Calendar.getInstance
        cal.setTime(dateFormat.parse(date))
        cal.add(Calendar.DATE, -3)
        dateFormat.format(cal.getTime)
      }

      val oilPriceChangeDF = lagLastColDF.withColumn("label", simplePriceChangeFunc(
        lagLastColDF("last(PRICE)"),
        lagLastColDF("lastPrev(PRICE)")
      )).withColumn("commonFriday", findDateTwoDaysAgoUDF(lagLastColDF("window.end")))

//      oilPriceChangeDF.show(20, false)

      oilPriceChangeDF.select("label", "commonFriday").
        write.
        format("com.databricks.spark.csv").
        option("header", "true").
        //.option("codec", "org.apache.hadoop.io.compress.GzipCodec")
        save(outputfile)
    }
}

Source File: PoFile.scala From scalingua with Apache License 2.0

5 votes

package ru.makkarpov.scalingua.pofile

import java.io._
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.Date

import ru.makkarpov.scalingua.StringUtils
import ru.makkarpov.scalingua.pofile.parse.{ErrorReportingParser, PoLexer}

object PoFile {
  

  val encoding                = StandardCharsets.UTF_8

  val GeneratedPrefix = "!Generated:"
  private def headerComment(s: String) = s"#  $GeneratedPrefix $s"

  def apply(f: File): Seq[Message] = apply(new FileInputStream(f), f.getName)

  def apply(is: InputStream, filename: String = "<unknown>"): Seq[Message] = {
    val parser = new ErrorReportingParser(new PoLexer(new InputStreamReader(is, StandardCharsets.UTF_8), filename))
    parser.parse().value.asInstanceOf[Seq[Message]]
  }

  def update(f: File, messages: Seq[Message], escapeUnicode: Boolean = true): Unit = {
    val output = new NewLinePrintWriter(new OutputStreamWriter(new FileOutputStream(f), encoding), false)
    try {
      output.println(headerComment(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())))
      output.println()

      def printEntry(s: String, m: MultipartString): Unit = {
        output.print(s + " ")

        if (m.parts.isEmpty) output.println("\"\"")
        else for (p <- m.parts) output.println("\"" + StringUtils.escape(p, escapeUnicode) + "\"")
      }

      for (m <- messages) {
        for (s <- m.header.comments)
          output.println(s"#  $s")

        for (s <- m.header.extractedComments)
          output.println(s"#. $s")

        for (s <- m.header.locations.sorted)
          if (s.line < 0)
            output.println(s"#: ${s.fileString}")
          else
            output.println(s"#: ${s.fileString}:${s.line}")

        if (m.header.flags.nonEmpty)
          output.println(s"#, " + m.header.flags.map(_.toString).mkString(", "))

        for (t <- m.header.tag)
          output.println(s"#~ $t")

        for (c <- m.context)
          printEntry("msgctxt", c)

        printEntry("msgid", m.message)

        m match {
          case Message.Singular(_, _, _, tr) =>
            printEntry("msgstr", tr)

          case Message.Plural(_, _, _, id, trs) =>
            printEntry("msgid_plural", id)

            for ((m, i) <- trs.zipWithIndex)
              printEntry(s"msgstr[$i]", m)
        }

        output.println()
      }
    } finally output.close()
  }
}

Source File: Utils.scala From lemon-schedule with GNU General Public License v2.0

5 votes

package com.gabry.job.utils

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.collection.mutable.ArrayBuffer


  def getLoadedClass(classLoader: ClassLoader):Array[Class[_]] = {
    val loadedClass = ArrayBuffer.empty[Class[_]]

    val loadedClassEnum = classesField.get(classLoader).asInstanceOf[java.util.Vector[Class[_]]].elements()

    while(loadedClassEnum.hasMoreElements){
      val nextElement = loadedClassEnum.nextElement()
      loadedClass.append(nextElement)
    }
    loadedClass.toArray
  }
}

Source File: Output.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.spark.clustering.mtm
import java.io._
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.DenseVector
import scala.sys.process._
import java.util.Calendar
import java.text.SimpleDateFormat
import java.io.File
import java.io.FileWriter

object Output extends Serializable
{

  def saveStr(savingPath: String, value: String, fileName: String = "") =
  {
    s"mkdir -p ${savingPath}".!
    val finalPath = savingPath + fileName
    val fw = new FileWriter(finalPath, true)
    fw.write(value + "\n")
    fw.close()
  }

  def write(outputDir: String, datas: RDD[Array[Double]], model: AbstractModel, nbRowSOM:Int, nbColSOM: Int): String =
  {
      val now = Calendar.getInstance().getTime()
      val format = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss")
      val time = format.format(now)
      val dim = datas.first.size
      val datasWithIndex = datas.zipWithIndex.map(_.swap)

      val path: String = outputDir + "/EXP-" + time + "/"
      s"mkdir -p ${path}".!
    
      val mapMin = Array.fill[Byte](dim)(0).mkString(",")
      var header = "# mapDim=2 mapSize={"+ nbRowSOM +"," + nbColSOM + "}"
      header += " pointDim=" + dim + " pointRealDim=" + dim + " mapMin={" + mapMin + "}"
    
      val prototypes = model.prototypes.map( d => (d.id, d.point)).sortBy(_._1).map(_._2)
      println("Write Prototypes...")
      val protosString = prototypes.map( d => d.toArray.mkString(",")).mkString("\n")

      // Utiliser fileWriter
      saveStr(path, header + "\n" + protosString, "maps")

      val sumAffectedDatas = datas.map( d => (model.findClosestPrototype(d).id, 1)).reduceByKey{ case (sum1, sum2) => sum1 + sum2 }.collectAsMap 
    
      // fill in all the prototypes that have 0 observations
      val card = (0 until prototypes.length).map( d => if (sumAffectedDatas.contains(d)) sumAffectedDatas(d) + "" else "0" )
    
      println("Write Cardinalities...")
      var cardHeader = "# mapDim=2 mapSize={"+ nbRowSOM +"," + nbColSOM + "}" 
      cardHeader +=  "pointDim=1 pointRealDim=0 mapMin={0} mapMax={0}"
      val cardStr = card.mkString("\n")
      saveStr(path, cardHeader + "\n" + cardStr, "cards")

      val affHeader = "# mapDim=1 mapSize={" + datas.count() + "} pointDim=1 pointRealDim=0 mapMin={0} mapMax={0}"
      val aff = datasWithIndex.map(d => (d._1, model.findClosestPrototype(d._2).id + "")).sortByKey().values.collect.mkString("\n")

      println("Write Affiliate...")
      saveStr(path, affHeader + "\n" + aff, "affs")    
      println("Write Maps...")

      val maps = prototypes.zip(card).map(d => d._1.toArray.mkString(",") + "," + d._2).mkString("\n")
      saveStr(path, maps, "mapscard")
      println("Write successfully...")
      time
  }
}

Source File: KeysExtractor.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.azure.documentdb.sink

import java.text.SimpleDateFormat
import java.util.TimeZone

import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.data._
import org.json4s.JsonAST._


object KeysExtractor {

  private val ISO_DATE_FORMAT: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
  private val TIME_FORMAT: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss.SSSZ")

  ISO_DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"))

  def fromStruct(struct: Struct, keys: Set[String]): Set[(String, Any)] = {
    keys.map { key =>
      val schema = struct.schema().field(key).schema()
      val value = struct.get(key)

      val v = schema.`type`() match {
        case Schema.Type.INT32 =>
          if (schema != null && Date.LOGICAL_NAME == schema.name) ISO_DATE_FORMAT.format(Date.toLogical(schema, value.asInstanceOf[Int]))
          else if (schema != null && Time.LOGICAL_NAME == schema.name) TIME_FORMAT.format(Time.toLogical(schema, value.asInstanceOf[Int]))
          else value

        case Schema.Type.INT64 =>
          if (Timestamp.LOGICAL_NAME == schema.name) Timestamp.fromLogical(schema, value.asInstanceOf[(java.util.Date)])
          else value

        case Schema.Type.STRING => value.asInstanceOf[CharSequence].toString

        case Schema.Type.BYTES =>
          if (Decimal.LOGICAL_NAME == schema.name) value.asInstanceOf[BigDecimal].toDouble
          else throw new ConfigException(s"Schema.Type.BYTES is not supported for $key.")

        case Schema.Type.ARRAY =>
          throw new ConfigException(s"Schema.Type.ARRAY is not supported for $key.")

        case Schema.Type.MAP => throw new ConfigException(s"Schema.Type.MAP is not supported for $key.")
        case Schema.Type.STRUCT => throw new ConfigException(s"Schema.Type.STRUCT is not supported for $key.")
        case other => throw new ConfigException(s"$other is not supported for $key.")
      }
      key -> v
    }
  }

  def fromMap(map: java.util.Map[String, Any], keys: Set[String]): Set[(String, Any)] = {
    keys.map { key =>
      if (!map.containsKey(key)) throw new ConfigException(s"The key $key can't be found")
      val value = map.get(key) match {
        case t: String => t
        case t: Boolean => t
        case t: Int => t
        case t: Long => t
        case t: Double => t
        case t: BigInt => t.toLong
        case t: BigDecimal => t.toDouble
        case other => throw new ConfigException(s"The key $key is not supported for type ${Option(other).map(_.getClass.getName).getOrElse("NULL")}")
      }
      key -> value
    }
  }

  def fromJson(jvalue: JValue, keys: Set[String]): List[(String, Any)] = {
    jvalue match {
      case JObject(children) =>
        children.collect {
          case JField(name, value) if keys.contains(name) =>
            val v = value match {
              case JBool(b) => b
              case JDecimal(d) => d.toDouble
              case JDouble(d) => d
              case JInt(i) => i.toLong
              case JLong(l) => l
              case JString(s) => s
              case other => throw new ConfigException(s"Field $name is not handled as a key (${other.getClass}). it needs to be a int, long, string, double or decimal")
            }
            name -> v
        }
      case other => throw new ConfigException(s"${other.getClass} is not supported")
    }
  }
}

Source File: StructFieldsExtractorBytes.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.hbase

import java.text.SimpleDateFormat
import java.util.TimeZone

import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.data._

import scala.collection.JavaConverters._

trait FieldsValuesExtractor {
  def get(struct: Struct): Seq[(String, Array[Byte])]
}

case class StructFieldsExtractorBytes(includeAllFields: Boolean, fieldsAliasMap: Map[String, String]) extends FieldsValuesExtractor with StrictLogging {

  def get(struct: Struct): Seq[(String, Array[Byte])] = {
    val schema = struct.schema()
    val fields: Seq[Field] = if (includeAllFields) {
      schema.fields().asScala
    }
    else {
      val selectedFields = schema.fields().asScala.filter(f => fieldsAliasMap.contains(f.name()))
      val diffSet = fieldsAliasMap.keySet.diff(selectedFields.map(_.name()).toSet)
      if (diffSet.nonEmpty) {
        val errMsg = s"Following columns ${diffSet.mkString(",")} have not been found. Available columns:${fieldsAliasMap.keys.mkString(",")}"
        logger.error(errMsg)
        sys.error(errMsg)
      }
      selectedFields
    }

    val fieldsAndValues = fields.flatMap(field =>
      getFieldBytes(field, struct).map(bytes => fieldsAliasMap.getOrElse(field.name(), field.name()) -> bytes))

    fieldsAndValues
  }

  private def getFieldBytes(field: Field, struct: Struct): Option[Array[Byte]] = {
    Option(struct.get(field))
      .map { value =>
        Option(field.schema().name()).collect {
          case Decimal.LOGICAL_NAME =>
            value.asInstanceOf[Any] match {
              case _:java.math.BigDecimal => value.fromBigDecimal()
              case arr: Array[Byte] => Decimal.toLogical(field.schema, arr).asInstanceOf[Any].fromBigDecimal()
              case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
            }
          case Time.LOGICAL_NAME =>
            value.asInstanceOf[Any] match {
              case i: Int => StructFieldsExtractorBytes.TimeFormat.format(Time.toLogical(field.schema, i)).asInstanceOf[Any].fromString()
              case d:java.util.Date => StructFieldsExtractorBytes.TimeFormat.format(d).asInstanceOf[Any].fromString()
              case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
            }

          case Timestamp.LOGICAL_NAME =>
            value.asInstanceOf[Any] match {
              case d:java.util.Date => StructFieldsExtractorBytes.DateFormat.format(d).asInstanceOf[Any].fromString()
              case l: Long => StructFieldsExtractorBytes.DateFormat.format(Timestamp.toLogical(field.schema, l)).asInstanceOf[Any].fromString()
              case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
            }
        }.getOrElse {

          field.schema().`type`() match {
            case Schema.Type.BOOLEAN => value.fromBoolean()
            case Schema.Type.BYTES => value.fromBytes()
            case Schema.Type.FLOAT32 => value.fromFloat()
            case Schema.Type.FLOAT64 => value.fromDouble()
            case Schema.Type.INT8 => value.fromByte()
            case Schema.Type.INT16 => value.fromShort()
            case Schema.Type.INT32 => value.fromInt()
            case Schema.Type.INT64 => value.fromLong()
            case Schema.Type.STRING => value.fromString()
            case other => sys.error(s"$other is not a recognized schema!")
          }
        }
      }
  }
}


object StructFieldsExtractorBytes {
  val DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
  val TimeFormat = new SimpleDateFormat("HH:mm:ss.SSSZ")

  DateFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
}

Source File: StructFieldsExtractor.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.voltdb

import java.text.SimpleDateFormat
import java.util.TimeZone

import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.data.{Field, Struct, _}

import scala.collection.JavaConverters._

trait FieldsValuesExtractor {
  def get(struct: Struct): Map[String, Any]
}

case class StructFieldsExtractor(targetTable: String,
                                 includeAllFields: Boolean,
                                 fieldsAliasMap: Map[String, String],
                                 isUpsert: Boolean = false) extends FieldsValuesExtractor with StrictLogging {
  require(targetTable != null && targetTable.trim.length > 0)

  def get(struct: Struct): Map[String, Any] = {
    val schema = struct.schema()
    val fields: Seq[Field] = {
      if (includeAllFields) {
        schema.fields().asScala
      } else {
        val selectedFields = schema.fields().asScala.filter(f => fieldsAliasMap.contains(f.name()))
        val diffSet = fieldsAliasMap.keySet.diff(selectedFields.map(_.name()).toSet)
        if (diffSet.nonEmpty) {
          val errMsg = s"Following columns ${diffSet.mkString(",")} have not been found. Available columns:${fieldsAliasMap.keys.mkString(",")}"
          logger.error(errMsg)
          sys.error(errMsg)
        }
        selectedFields
      }
    }

    //need to select all fields including null. the stored proc needs a fixed set of params
    fields.map { field =>
      val schema = field.schema()
      val value = Option(struct.get(field))
        .map { value =>
          //handle specific schema
          schema.name() match {
            case Decimal.LOGICAL_NAME =>
              value.asInstanceOf[Any] match {
                case _:java.math.BigDecimal => value
                case arr: Array[Byte] => Decimal.toLogical(schema, arr)
                case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
              }
            case Time.LOGICAL_NAME =>
              value.asInstanceOf[Any] match {
                case i: Int => StructFieldsExtractor.TimeFormat.format(Time.toLogical(schema, i))
                case d:java.util.Date => StructFieldsExtractor.TimeFormat.format(d)
                case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
              }

            case Timestamp.LOGICAL_NAME =>
              value.asInstanceOf[Any] match {
                case d:java.util.Date => StructFieldsExtractor.DateFormat.format(d)
                case l: Long => StructFieldsExtractor.DateFormat.format(Timestamp.toLogical(schema, l))
                case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
              }

            case _ => value
          }
        }.orNull

      fieldsAliasMap.getOrElse(field.name(), field.name()) -> value
    }.toMap
  }
}


object StructFieldsExtractor {
  val DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
  val TimeFormat: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss.SSSZ")
  DateFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
}

Source File: DateFormatConstraint.scala From drunken-data-quality with Apache License 2.0

5 votes

package de.frosner.ddq.constraints

import java.text.SimpleDateFormat

import org.apache.spark.sql.functions._
import org.apache.spark.sql.{Column, DataFrame}

import scala.util.Try

case class DateFormatConstraint(columnName: String,
                                formatString: String) extends Constraint {

  val fun = (df: DataFrame) => {
    val cannotBeDate = udf((column: String) =>
      column != null && Try {
        val format = new SimpleDateFormat(formatString)
        format.setLenient(false)
        format.parse(column)
      }.isFailure)
    val maybeCannotBeDateCount = Try(df.filter(cannotBeDate(new Column(columnName))).count)
    DateFormatConstraintResult(
      this,
      data = maybeCannotBeDateCount.toOption.map(DateFormatConstraintResultData),
      status = ConstraintUtil.tryToStatus[Long](maybeCannotBeDateCount, _ == 0)
    )
  }

}

case class DateFormatConstraintResult(constraint: DateFormatConstraint,
                                      data: Option[DateFormatConstraintResultData],
                                      status: ConstraintStatus) extends ConstraintResult[DateFormatConstraint] {

  val message: String = {
    val format = constraint.formatString
    val columnName = constraint.columnName
    val maybeFailedRows = data.map(_.failedRows)
    val maybePluralS = maybeFailedRows.map(failedRows => if (failedRows == 1) "" else "s")
    val maybeVerb = maybeFailedRows.map(failedRows => if (failedRows == 1) "is" else "are")
    (status, maybeFailedRows, maybePluralS, maybeVerb) match {
      case (ConstraintSuccess, Some(0), _, _) =>
        s"Column $columnName is formatted by $format."
      case (ConstraintFailure, Some(failedRows), Some(pluralS), Some(verb)) =>
        s"Column $columnName contains $failedRows row$pluralS that $verb not formatted by $format."
      case (ConstraintError(throwable), None, None, None) =>
        s"Checking whether column $columnName is formatted by $format failed: $throwable"
      case default => throw IllegalConstraintResultException(this)
    }

  }

}

case class DateFormatConstraintResultData(failedRows: Long)

Source File: TimeHelper.scala From ez-framework with Apache License 2.0

5 votes

package com.ecfront.ez.framework.core.helper

import java.text.SimpleDateFormat
import java.time.ZonedDateTime
import java.util.{Calendar, Date, TimeZone}


object TimeHelper {

  val msf = new SimpleDateFormat("yyyyMMddHHmmssSSS")
  val sf = new SimpleDateFormat("yyyyMMddHHmmss")
  val mf = new SimpleDateFormat("yyyyMMddHHmm")
  val hf = new SimpleDateFormat("yyyyMMddHH")
  val df = new SimpleDateFormat("yyyyMMdd")
  val Mf = new SimpleDateFormat("yyyyMM")
  val yf = new SimpleDateFormat("yyyy")

  val yyyy_MM_dd_HH_mm_ss_SSS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS")
  val yyyy_MM_dd_HH_mm_ss = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
  val yyyy_MM_dd = new SimpleDateFormat("yyyy-MM-dd")

  def dateOffset(offsetValue: Int, offsetUnit: Int, currentTime: Long): Long = {
    val format = currentTime.toString.length match {
      case 8 => df
      case 10 => hf
      case 12 => mf
      case 14 => sf
      case 17 => msf
    }
    val calendar = Calendar.getInstance()
    calendar.setTime(format.parse(currentTime + ""))
    calendar.add(offsetUnit, offsetValue)
    format.format(calendar.getTime).toLong
  }

  def dateOffset(offsetValue: Int, offsetUnit: Int, currentDate: Date): Date = {
    val calendar = Calendar.getInstance()
    calendar.setTime(currentDate)
    calendar.add(offsetUnit, offsetValue)
    calendar.getTime
  }

  def utc2Local(utcTime: String, localTimePatten: String = "yyyy-MM-dd'T'HH:mm:ss"): String = {
    val utcDate = Date.from(ZonedDateTime.parse(utcTime).toInstant)
    val localF = new SimpleDateFormat(localTimePatten)
    localF.setTimeZone(TimeZone.getDefault)
    localF.format(utcDate.getTime)
  }

}

Source File: TaskMonitor.scala From ez-framework with Apache License 2.0

5 votes

package com.ecfront.ez.framework.core.monitor

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.ConcurrentHashMap

import com.ecfront.ez.framework.core.EZ
import com.ecfront.ez.framework.core.helper.{TimeHelper, TimerHelper}
import com.ecfront.ez.framework.core.logger.Logging

import scala.collection.JavaConversions._

object TaskMonitor extends Logging {

  private val tasks = new ConcurrentHashMap[String, (String, Date)]()

  def add(taskName: String): String = {
    val taskId = EZ.createUUID
    tasks += taskId -> (taskName, new Date())
    taskId
  }

  def get(taskId:String):(String,Date)={
    tasks.get(taskId)
  }

  def poll(taskId:String):(String,Date)={
    val d=tasks.get(taskId)
    tasks -= taskId
    d
  }

  def remove(taskId: String): Unit = {
    tasks -= taskId
  }

  def hasTask(): Boolean = {
    tasks.nonEmpty
  }

  
  def waitFinish(timeout: Long = Long.MaxValue): Unit = {
    logger.info("[Monitor]waiting task finish...")
    val waitStart = new Date().getTime
    while (tasks.nonEmpty && waitStart + timeout < new Date().getTime) {
      Thread.sleep(500)
      if (new Date().getTime - waitStart > 60 * 1000) {
        var warn = "[Monitor]has some unfinished tasks:\r\n"
        warn += tasks.map(task => s" > id:${task._1} name:${task._2._1} start time:${TimeHelper.yyyy_MM_dd_HH_mm_ss_SSS.format(task._2._2)}").mkString("\r\n")
        logger.warn(warn)
      }
    }
    if (tasks.nonEmpty) {
      var error = "[Monitor]has some unfinished tasks,but time is out:\r\n"
      error += tasks.map(task => s" > id:${task._1} name:${task._2._1} start time:${TimeHelper.yyyy_MM_dd_HH_mm_ss_SSS.format(task._2._2)}").mkString("\r\n")
      logger.error(error)
    }
    // 再等1秒
    Thread.sleep(1000)
  }

  private val yyyy_MM_dd_HH_mm_ss_SSS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS")
  TimerHelper.periodic(60L, {
    if (tasks.nonEmpty) {
      val info = new StringBuffer(s"\r\n--------------Current Execute Tasks : (${tasks.size()}) --------------\r\n")
      tasks.foreach {
            i =>
              info.append(s"------ ${yyyy_MM_dd_HH_mm_ss_SSS.format(i._2._2)} : [${i._1}]${i._2._1}\r\n")
      }
      logger.trace(info.toString)
    }
  })

}

Source File: LateDataMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.sideoutput.lateDataProcess

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.SECOND, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =74540;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("late_data", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(200)
      i = i + 1
//      System.exit(-1)
    }
  }

}

Source File: ErrorHandler.scala From kafka-connect-common with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.errors

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.scalalogging.StrictLogging

import scala.util.{Failure, Success, Try}


trait ErrorHandler extends StrictLogging {
  var errorTracker: Option[ErrorTracker] = None
  private val dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS'Z'")

  def initialize(maxRetries: Int, errorPolicy: ErrorPolicy): Unit = {
    errorTracker = Some(ErrorTracker(maxRetries, maxRetries, "", new Date(), errorPolicy))
  }

  def getErrorTrackerRetries() : Int = {
    errorTracker.get.retries
  }

  def errored() : Boolean = {
    errorTracker.get.retries != errorTracker.get.maxRetries
  }

  def handleTry[A](t : Try[A]) : Option[A] = {
    require(errorTracker.isDefined, "ErrorTracker is not set call. Initialize.")
    t
    match {
      case Success(s) => {
        //success, check if we had previous errors.
        if (errorTracker.get.retries != errorTracker.get.maxRetries) {
          logger.info(s"Recovered from error ${errorTracker.get.lastErrorMessage} at " +
            s"${dateFormatter.format(errorTracker.get.lastErrorTimestamp)}")
        }
        //cleared error
        resetErrorTracker()
        Some(s)
      }
      case Failure(f) =>
        //decrement the retry count
        logger.error(s"Encountered error ${f.getMessage}", f)
        this.errorTracker = Some(decrementErrorTracker(errorTracker.get, f.getMessage))
        handleError(f, errorTracker.get.retries, errorTracker.get.policy)
        None
    }
  }

  def resetErrorTracker() = {
    errorTracker = Some(ErrorTracker(errorTracker.get.maxRetries, errorTracker.get.maxRetries, "", new Date(),
      errorTracker.get.policy))
  }

  private def decrementErrorTracker(errorTracker: ErrorTracker, msg: String): ErrorTracker = {
    if (errorTracker.maxRetries == -1) {
      ErrorTracker(errorTracker.retries, errorTracker.maxRetries, msg, new Date(), errorTracker.policy)
    } else {
      ErrorTracker(errorTracker.retries - 1, errorTracker.maxRetries, msg, new Date(), errorTracker.policy)
    }
  }

  private def handleError(f: Throwable, retries: Int, policy: ErrorPolicy): Unit = {
    policy.handle(f, true, retries)
  }
}

Source File: DataFrameReportPerformanceSpec.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import io.deepsense.commons.utils.{DoubleUtils, Logging}
import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

Source File: ReceiptRenderer.scala From apple-of-my-iap with MIT License

5 votes

package com.meetup.iap.receipt

import java.text.SimpleDateFormat

import com.meetup.iap.AppleApi
import AppleApi.{ReceiptResponse, ReceiptInfo}

import java.util.{Date, TimeZone}

import org.json4s.JsonDSL._
import org.json4s.native.JsonMethods._
import org.json4s.JsonAST.JValue
import org.slf4j.LoggerFactory

object ReceiptRenderer {
  val log = LoggerFactory.getLogger(ReceiptRenderer.getClass)

  private def appleDateFormat(date: Date): String = {
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'Etc/GMT'")
    sdf.setTimeZone(TimeZone.getTimeZone("UTC"))
    sdf.format(date)
  }

  def apply(response: ReceiptResponse): String = {
    pretty(render(
      ("status" -> response.statusCode) ~
        ("latest_receipt_info" -> response.latestReceiptInfo.reverse.map(renderReceipt)) ~
        ("latest_receipt" -> response.latestReceipt)))
  }

  private def renderReceipt(receiptInfo: ReceiptInfo): JValue = {
    val origPurchaseDate = receiptInfo.originalPurchaseDate
    val origPurchaseDateStr = appleDateFormat(origPurchaseDate)
    val origPurchaseDateMs = origPurchaseDate.getTime

    val purchaseDate = receiptInfo.purchaseDate
    val purchaseDateStr = appleDateFormat(purchaseDate)
    val purchaseDateMs = purchaseDate.getTime

    val expiresDate = receiptInfo.expiresDate
    val expiresDateStr = appleDateFormat(expiresDate)
    val expiresDateMs = expiresDate.getTime

    val cancellationDate = receiptInfo.cancellationDate.map { date =>
      appleDateFormat(date)
    }
    ("quantity" -> "1") ~
      ("product_id" -> receiptInfo.productId) ~
      ("transaction_id" -> receiptInfo.transactionId) ~
      ("original_transaction_id" -> receiptInfo.originalTransactionId) ~
      ("purchase_date" -> purchaseDateStr) ~
      ("purchase_date_ms" -> purchaseDateMs.toString) ~
      ("original_purchase_date" -> origPurchaseDateStr) ~
      ("original_purchase_date_ms" -> origPurchaseDateMs.toString) ~
      ("expires_date" -> expiresDateStr) ~
      ("expires_date_ms" -> expiresDateMs.toString) ~
      ("is_trial_period" -> receiptInfo.isTrialPeriod.toString) ~ //We mimic Apple's weird json here by converting the boolean type to a string
      ("is_in_intro_offer_period" -> receiptInfo.isInIntroOfferPeriod.map(_.toString)) ~
      ("cancellation_date" -> cancellationDate)
  }
}

Source File: KafkaOffsetRevertTest.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.kafka

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp(true))
    var i = 0;
    while (true) {

      //      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
      //      System.exit(-1)
    }
  }

}

Source File: SlotPartitionMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.demo

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {

    val prop = Common.getProp
    prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("slot_partition", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

}

Source File: IntervalJoinKafkaKeyMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.intervalJoin

import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object IntervalJoinKafkaKeyMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("topic_left")
      right("topic_right")
      Thread.sleep(500)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

  var idRight = 0

  def right(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idRight = idRight + 1
    val map = Map("id" -> idRight,  "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("right : \t\t\t\t\t\t\t\t" + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

}

Source File: CacheFile.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.tableJoin

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.scala._
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.configuration.Configuration
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

import scala.io.Source


object CacheFile {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
      env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt")
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
      // file and register name
      env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt")
    }
    // cache table


    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp)


    env.addSource(source)
      .map(json => {

        val id = json.get("id").asText()
        val phone = json.get("phone").asText()

        Tuple2(id, phone)
      })
      .map(new RichMapFunction[(String, String), String] {

        var cache = Map("" -> "")

        override def open(parameters: Configuration): Unit = {

          // read cache file
          val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt")
          if (file.canRead) {
            val context = Source.fromFile(file, "utf-8").getLines().toArray

           context.foreach(line => {
             val tmp = line.split(",")
             cache += (tmp(0) -> tmp(1))
           })
          }
        }

        override def map(value: (String, String)): String = {
          val name = cache.get(value._1)

          value._1 + "," + value._2 + "," + cache.get(value._1)
        }

      })
      .print()

    env.execute("cacheFile")

  }

}

Source File: StreamingFileSinkDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.{BulkWriter, SimpleStringEncoder}
import org.apache.flink.api.scala._
import org.apache.flink.core.fs.Path
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

object StreamingFileSinkDemo {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }

    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp)
    // row format
    val sinkRow = StreamingFileSink
      .forRowFormat(new Path("D:\\idea_out\\rollfilesink"), new SimpleStringEncoder[ObjectNode]("UTF-8"))
      .withBucketAssigner(new DayBucketAssigner)
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()

    // use define BulkWriterFactory and DayBucketAssinger
    val sinkBuck = StreamingFileSink
      .forBulkFormat(new Path("D:\\idea_out\\rollfilesink"), new DayBulkWriterFactory)
      .withBucketAssigner(new DayBucketAssigner())
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()


    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
//        json.get("date") + "-" + json.toString
        json
      })
      .addSink(sinkBuck)

    env.execute("StreamingFileSink")
  }

}

Source File: FileSinkMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.filesink

import java.text.SimpleDateFormat
import java.util.Calendar

import com.venn.common.Common
import com.venn.stream.api.dayWindow.CurrentDayMaker.{calendar, getCreateTime, sdf}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object FileSinkMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("roll_file_sink")
      Thread.sleep(100)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime)
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
//    producer.send(msg)
//    producer.flush()
  }

  var minute : Int = 1
  val calendar: Calendar = Calendar.getInstance()
  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }

}

Source File: RollingFileSinkDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.fs.StringWriter
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.api.scala._


    val sink = new BucketingSink[String]("D:\\idea_out\\rollfilesink")
    sink.setBucketer(new DayBasePathBucketer)
    sink.setWriter(new StringWriter[String])
    sink.setBatchSize(1024 * 1024 * 400) // this is 400 MB,
    //    sink.setBatchRolloverInterval(24 * 60 * 60 * 1000) // this is 24 hour
//    sink.setInProgressPrefix("inProcessPre")
//    sink.setPendingPrefix("pendingpre")
//    sink.setPartPrefix("partPre")

    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
        json.get("date") + "-" + json.toString
      })
      .addSink(sink)

    env.execute("rollingFileSink")
  }

}

Source File: ProcessWindowForTrigger.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.trigger

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


object ProcessWindowDemoForTrigger {
  val logger = LoggerFactory.getLogger(this.getClass)

  def main(args: Array[String]): Unit = {
    // environment
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    if ("\\".equals(File.pathSeparator)) {
      val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR)
      env.setStateBackend(rock)
      // checkpoint interval
      env.enableCheckpointing(10000)
    }

    val topic = "current_day"
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

    val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
    val stream = env.addSource(kafkaSource)
      .map(s => {
        s
      })
      .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60)))
      .trigger(CountAndTimeTrigger.of(10, Time.seconds(10)))
      .process(new ProcessAllWindowFunction[String, String, TimeWindow] {
        override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {

          var count = 0

          elements.iterator.foreach(s => {
            count += 1
          })
          logger.info("this trigger have : {} item", count)
        }
      })

    // execute job
    env.execute(this.getClass.getName)
  }

}

Source File: WindowDemoMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.trigger

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

}

Source File: TimeSpec.scala From ez-framework with Apache License 2.0

5 votes

package com.ecfront.ez.framework.core.misc

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.ecfront.ez.framework.core.BasicSpec

class TimeSpec extends BasicSpec {

  test("ZeroTimeOffset Test") {

    val dfd = new SimpleDateFormat("yyyyMMdd")

    def getZeroTimeOffset = {
      val currentTime = new Date()
      val currentDay = dfd.parse(dfd.format(currentTime))
      val calendar = Calendar.getInstance()
      calendar.setTime(currentDay)
      calendar.add(Calendar.DATE, 1)
      calendar.getTime.getTime - currentTime.getTime
    }

    println(getZeroTimeOffset)
    Thread.sleep(10000)
    println(getZeroTimeOffset)
    val calendar = Calendar.getInstance()
    calendar.setTimeInMillis(new Date().getTime + getZeroTimeOffset)
    println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS").format(calendar.getTime))

  }

}

Source File: CurrentDayPvCountWaterMark.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.dayWindow

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import com.venn.source.TumblingEventTimeWindows
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.triggers.{ContinuousEventTimeTrigger, ContinuousProcessingTimeTrigger, CountTrigger}
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}


      .assignAscendingTimestamps(event => sdf.parse(event.createTime).getTime)
      .windowAll(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8)))
      .reduce(new ReduceFunction[Eventx] {
        override def reduce(event1: Eventx, event2: Eventx): Eventx = {

//          println("reduce event : " +  event2.toString)
          //            val minId:String = if (event1.id.compareTo(event2.id) >= 0 ) event2.id else event1.id
          //            val maxId = if (event1.id.compareTo(event2.id) < 0 ) event1.id else event2.id
          //            val minCreateTime = if ( event1.createTime.compareTo(event2.createTime) >= 0 ) event2.createTime else event1.createTime
          //            val maxCreateTime = if ( event1.createTime.compareTo(event2.createTime) < 0 ) event1.createTime else event2.createTime
          //            val count = event1.count + event2.count
          //            new EventResult(minId, maxId, minCreateTime, maxCreateTime, count)
          new Eventx(event1.id , event2.id , event1.amt + event2.amt)
        }
      })
      // format output even, connect min max id, add current timestamp
//      .map(event => Event(event.id + "-" + event.createTime, sdf.format(System.currentTimeMillis()), event.count))
    stream.print("result : ")
    // execute job
    env.execute("CurrentDayCount")
  }

}

Source File: CurrentDayMaker.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.dayWindow

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =0;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
//      System.exit(-1)
    }
  }

}

Source File: PackageSpec.scala From sparkpipe-core with Apache License 2.0

5 votes

package software.uncharted.sparkpipe.ops.core.dataframe.temporal

import org.scalatest._
import software.uncharted.sparkpipe.Spark
import software.uncharted.sparkpipe.ops.core.rdd.toDF

import java.text.SimpleDateFormat
import java.sql.Timestamp

class PackageSpec extends FunSpec {
  describe("ops.core.dataframe.temporal") {
    val rdd = Spark.sc.parallelize(Seq(
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18").getTime), "2015-11-18", 1),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19").getTime), "2015-11-19", 2),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-20").getTime), "2015-11-20", 3),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21").getTime), "2015-11-21", 4)
    ))
    val df = toDF(Spark.sparkSession)(rdd)

    describe("#dateFilter()") {
      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21"),
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 3)
      }

      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range, specified using strings") {
        val df2 = dateFilter(
          "2015-11-19",
          "2015-11-20",
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 2)
      }

      it("should support filtering rows in an input DataFrame with a Timestamp timestamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-17"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18"),
          "_1"
        )(df)
        assert(df2.count == 1)
      }
    }

    describe("#parseDate()") {
      it("should facilitate converting a string timestamp column into a TimestampType and adding it as a new column") {
        val df2 = parseDate("_2", "new", "yyyy-MM-dd")(df)
        assert(df2.filter("new = _1").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }

    describe("#dateField()") {
      it("should facilitate extracting a single field from a Timestamp column, and placing it a new column") {
        val df2 = dateField("_1", "new", java.util.Calendar.YEAR)(df)
        assert(df2.filter("new = 2015").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }
  }
}

Source File: DateUtilsTest.scala From bigdata-examples with Apache License 2.0

5 votes

package com.timeyang.common.util

import java.text.SimpleDateFormat
import java.util.Calendar

import org.junit.Test


@Test
class DateUtilsTest {

  @Test
  def test(): Unit = {
    // scalastyle:off
    println(DateUtils.current())

    val formatter = new SimpleDateFormat("yyyyMMddHH")
    println(formatter.format(System.currentTimeMillis()))
    // scalastyle:on
  }

  @Test
  def testTime(): Unit = {
    val calendar = Calendar.getInstance()
    val hour = calendar.get(Calendar.HOUR_OF_DAY)
    // scalastyle:off println
    println(hour)
    // scalastyle:on println
    calendar.add(Calendar.MILLISECOND, 60 * 60 * 1000)
    val hourOfNext = calendar.get(Calendar.HOUR_OF_DAY)
    print(hourOfNext)
  }

}

Source File: RequestDSL.scala From twitter4s with Apache License 2.0

5 votes

package com.danielasfregola.twitter4s.helpers

import java.text.SimpleDateFormat
import java.util.Locale

import akka.http.scaladsl.model._
import akka.http.scaladsl.model.headers.RawHeader
import akka.testkit.TestProbe
import com.danielasfregola.twitter4s.entities.RateLimit
import org.specs2.specification.AfterEach

import scala.concurrent.duration._
import scala.concurrent.{Await, Future}

abstract class RequestDSL extends TestActorSystem with FixturesSupport with AfterEach {

  def after = system.terminate

  private val timeout = 10 seconds

  val headers = List(RawHeader("x-rate-limit-limit", "15"),
                     RawHeader("x-rate-limit-remaining", "14"),
                     RawHeader("x-rate-limit-reset", "1445181993"))

  val rateLimit = {
    val dateFormatter = new SimpleDateFormat("EEE MMM dd HH:mm:ss ZZZZ yyyy", Locale.ENGLISH)
    val resetDate = dateFormatter.parse("Sun Oct 18 15:26:33 +0000 2015").toInstant
    new RateLimit(limit = 15, remaining = 14, reset = resetDate)
  }

  protected val transport = TestProbe()

  def when[T](future: Future[T]): RequestMatcher[T] = new RequestMatcher(future)

  class RequestMatcher[T](future: Future[T]) {
    protected def responder = new Responder(future)

    def expectRequest(req: HttpRequest): Responder[T] = {
      transport.expectMsg(timeout, req)
      responder
    }

    def expectRequest(fn: HttpRequest => Unit) = {
      transport.expectMsgPF(timeout) {
        case req: HttpRequest => fn(req)
      }
      responder
    }
  }

  class Responder[T](future: Future[T]) {
    def respondWith(response: HttpResponse): Await[T] = {
      transport.reply(response)
      new Await(future)
    }

    def respondWith(resourcePath: String): Await[T] =
      respondWith(HttpResponse(StatusCodes.OK, entity = HttpEntity(MediaTypes.`application/json`, load(resourcePath))))

    def respondWithRated(resourcePath: String): Await[T] =
      respondWith(
        HttpResponse(StatusCodes.OK,
                     headers = headers,
                     entity = HttpEntity(MediaTypes.`application/json`, load(resourcePath))))

    def respondWithOk: Await[Unit] = {
      val response =
        HttpResponse(StatusCodes.OK, entity = HttpEntity(MediaTypes.`application/json`, """{"code": "OK"}"""))
      transport.reply(response)
      new Await(Future.successful((): Unit))
    }
  }

  class Await[T](future: Future[T]) {
    private[helpers] val underlyingFuture = future

    def await(implicit duration: FiniteDuration = 20 seconds) =
      Await.result(future, duration)
  }

  implicit def awaitToReqMatcher[T](await: Await[T]) =
    new RequestMatcher(await.underlyingFuture)

}

Source File: NetflixPrizeUtils.scala From zen with Apache License 2.0

5 votes

package com.github.cloudml.zen.examples.ml

import java.text.SimpleDateFormat
import java.util.{Locale, TimeZone}

import breeze.linalg.{SparseVector => BSV}
import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg.{SparseVector => SSV}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import scala.collection.mutable.ArrayBuffer

object NetflixPrizeUtils {

  def genSamplesWithTime(
    sc: SparkContext,
    input: String,
    numPartitions: Int = -1,
    newLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK):
  (RDD[(Long, LabeledPoint)], RDD[(Long, LabeledPoint)], Array[Long]) = {

    val probeFile = s"$input/probe.txt"
    val dataSetFile = s"$input/training_set
    val views = Array(maxUserId, maxMovieId + maxUserId, numFeatures).map(_.toLong)

    (trainSet, testSet, views)

  }
}

Source File: SparkStreamingAkkaTest.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend.spark.streaming.akka

import java.text.SimpleDateFormat

import akka.actor.Props
import com.github.dnvriend.TestSpec
import org.apache.spark.streaming.akka.{ ActorReceiver, AkkaUtils }

import scala.concurrent.duration._

class CustomActor extends ActorReceiver {
  import context.dispatcher
  def ping() = context.system.scheduler.scheduleOnce(200.millis, self, "foo")
  def today: String = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS").format(new java.util.Date)
  def receive = counter(0)
  def counter(x: Long): Receive = {
    case _ =>
      store(s"counter: $x, msg: Today is $today, have a nice day!")
      context.become(counter(x + 1))
      ping()
  }
  ping()
}

class SparkStreamingAkkaTest extends TestSpec {
  it should "stream from an actor" in withStreamingContext() { spark => ssc =>
    import spark.implicits._
    val lines = AkkaUtils.createStream[String](ssc, Props[CustomActor](), "CustomReceiver")
    lines.foreachRDD { rdd =>
      rdd.toDF.show(truncate = false)
    }

    ssc.start()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
  }
}

Source File: CreatePosts.scala From apache-spark-test with Apache License 2.0

5 votes

package com.github.dnvriend

import java.nio.file.Paths
import java.nio.file.StandardOpenOption._
import java.text.SimpleDateFormat
import java.util.Date

import akka.actor.{ ActorSystem, Terminated }
import akka.stream.scaladsl.{ FileIO, Source }
import akka.stream.{ ActorMaterializer, Materializer }
import akka.util.ByteString
import play.api.libs.json.Json

import scala.concurrent.{ ExecutionContext, Future }
import scala.util.Random

object CreatePosts extends App {
  implicit val system: ActorSystem = ActorSystem()
  implicit val mat: Materializer = ActorMaterializer()
  implicit val ec: ExecutionContext = system.dispatcher

  def terminate: Future[Terminated] =
    system.terminate()

  sys.addShutdownHook {
    terminate
  }

  object Post {
    implicit val format = Json.format[Post]
  }

  final case class Post(
    commentCount: Int,
    lastActivityDate: String,
    ownerUserId: Long,
    body: String,
    score: Int,
    creationDate: String,
    viewCount: Int,
    title: String,
    tags: String,
    answerCount: Int,
    acceptedAnswerId: Long,
    postTypeId: Long,
    id: Long
  )

  def rng = Random.nextInt(20000)

  def now: String = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date())

  val lorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam fringilla magna et pharetra vestibulum."
  val title = " Ut id placerat sapien. Aliquam vel metus orci."
  Source.fromIterator(() => Iterator from 0).map { id =>
    Post(rng, now, rng, List.fill(Random.nextInt(5))(lorem).mkString("\n"), rng, now, rng, s"$rng - $title", title, rng, rng, rng, id)
  }.map(Json.toJson(_).toString)
    .map(json => ByteString(json + "\n"))
    .take(1000000)
    .via(LogProgress.flow())
    .runWith(FileIO.toPath(Paths.get("/tmp/posts.json"), Set(WRITE, TRUNCATE_EXISTING, CREATE)))
    .flatMap { done =>
      println(done)
      terminate
    }.recoverWith {
      case cause: Throwable =>
        cause.printStackTrace()
        terminate
    }

}

Source File: Implicits.scala From activemq-cli with Apache License 2.0

5 votes

package activemq.cli.util

import com.typesafe.config.Config
import java.util.Date
import java.util.Locale
import java.text.SimpleDateFormat
import javax.jms.Message
import javax.jms.TextMessage
import scala.collection.JavaConversions._

object Implicits {

  implicit class RichConfig(val underlying: Config) extends AnyVal {
    def getOptionalString(path: String): Option[String] = if (underlying.hasPath(path)) {
      Some(underlying.getString(path))
    } else {
      None
    }
  }

  
  implicit def optionStringToBoolean(o: Option[String]): Boolean = {
    !o.getOrElse("").isEmpty
  }

  implicit class MessageImprovements(val message: Message) {

    val prettyPrinter = new scala.xml.PrettyPrinter(100000, 2) //scalastyle:ignore

    def toXML(timestampFormat: Option[String] = None): String = {

      val addOptional = (condition: Boolean, xml: scala.xml.Elem) ⇒ if (condition) xml else scala.xml.NodeSeq.Empty

      prettyPrinter.format(<jms-message>
                             <header>
                               <message-id>{ message.getJMSMessageID }</message-id>
                               { addOptional(Option(message.getJMSCorrelationID).isDefined, <correlation-id>{ message.getJMSCorrelationID }</correlation-id>) }
                               <delivery-mode>{ message.getJMSDeliveryMode }</delivery-mode>
                               <destination>{ message.getJMSDestination }</destination>
                               <expiration>{ message.getJMSExpiration }</expiration>
                               <priority>{ message.getJMSPriority }</priority>
                               <redelivered>{ message.getJMSRedelivered }</redelivered>
                               { addOptional(Option(message.getJMSReplyTo).isDefined, <reply-to>{ message.getJMSReplyTo }</reply-to>) }
                               <timestamp>{
                                 timestampFormat match {
                                   case Some(matched)⇒ new SimpleDateFormat(matched).format(new Date(message.getJMSTimestamp))
                                   case _⇒ message.getJMSTimestamp
                                 }
                               }</timestamp>
                               { addOptional(Option(message.getJMSType).isDefined, <type>{ message.getJMSType }</type>) }
                             </header>
                             {
                               addOptional(message.getPropertyNames.hasMoreElements, <properties> {
                                 message.getPropertyNames.map(name ⇒
                                   <property><name>{ name }</name><value>{ message.getStringProperty(name.toString) }</value></property>)
                               } </properties>)
                             }
                             {
                               message match {
                                 case textMessage: TextMessage if Option(textMessage.getText).isDefined ⇒ addOptional(
                                   textMessage.getText,
                                   <body>{ scala.xml.PCData(textMessage.getText.replaceAll("]]>", "]]]]><![CDATA[>")) }</body>
                                 )
                                 case _⇒ scala.xml.NodeSeq.Empty
                               }
                             }
                           </jms-message>)
    }

    def textMatches(regex: String): Boolean = {
      if (regex) {
        message match {
          case textMessage: TextMessage ⇒ (regex.r findFirstIn textMessage.getText)
          case _                        ⇒ false
        }
      } else {
        true
      }
    }
  }
}

Source File: DateUtils.scala From sundial with MIT License

5 votes

package util

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

object DateUtils {

  val basicDateTimeFormat = new SimpleDateFormat("MMM d, H:mm z")

  def prettyRelativeTime(when: Date, now: Date): String = {
    s"${prettyDuration(when, now)} ago"
  }

  def prettyDuration(start: Date, end: Date): String = {
    val diff = end.getTime - start.getTime
    prettyDuration(diff, TimeUnit.MILLISECONDS)
  }

  def prettyDuration(amount: Long, unit: TimeUnit): String = {
    if (unit.toSeconds(amount) < 120) {
      s"${unit.toSeconds(amount)} seconds"
    } else if (unit.toMinutes(amount) < 180) {
      s"${unit.toMinutes(amount)} minutes"
    } else if (unit.toHours(amount) < 72) {
      s"${unit.toHours(amount)} hours"
    } else {
      s"${unit.toDays(amount)} days"
    }
  }

}

Source File: Util.scala From aardpfark with Apache License 2.0

5 votes

package com.ibm.aardpfark.pfa.utils

import java.text.SimpleDateFormat
import java.util.{Date, TimeZone}

object Utils {
  def getCurrentDate = {
    val fmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'")
    fmt.setTimeZone(TimeZone.getTimeZone("UTC"))
    fmt.format(new Date())
  }

  def getCurrentTs = {
    new Date().getTime
  }
}

Source File: StreamingProducer.scala From Scala-Programming-Projects with MIT License

4 votes

package coinyser

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.TimeZone

import cats.effect.IO
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.pusher.client.Client
import com.pusher.client.channel.SubscriptionEventListener
import com.typesafe.scalalogging.StrictLogging

object StreamingProducer extends StrictLogging {

  def subscribe(pusher: Client)(onTradeReceived: String => Unit): IO[Unit] =
    for {
      _ <- IO(pusher.connect())
      channel <- IO(pusher.subscribe("live_trades"))

      _ <- IO(channel.bind("trade", new SubscriptionEventListener() {
        override def onEvent(channel: String, event: String, data: String): Unit = {
          logger.info(s"Received event: $event with data: $data")
          onTradeReceived(data)
        }
      }))
    } yield ()


  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    // Very important: the storage must be in UTC
    sdf.setTimeZone(TimeZone.getTimeZone("UTC"))
    m.setDateFormat(sdf)
  }

  def deserializeWebsocketTransaction(s: String): WebsocketTransaction =
    mapper.readValue(s, classOf[WebsocketTransaction])

  def convertWsTransaction(wsTx: WebsocketTransaction): Transaction =
    Transaction(
      timestamp = new Timestamp(wsTx.timestamp.toLong * 1000), tid = wsTx.id,
      price = wsTx.price, sell = wsTx.`type` == 1, amount = wsTx.amount)

  def serializeTransaction(tx: Transaction): String =
    mapper.writeValueAsString(tx)

}

java.text.SimpleDateFormat Scala Examples