java.text.SimpleDateFormat Scala Examples

The following examples show how to use java.text.SimpleDateFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: JacksonMessageWriter.scala    From drizzle-spark   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
} 
Example 2
Source File: PMMLModelExport.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
} 
Example 3
Source File: SimpleDateParam.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
} 
Example 4
Source File: Train.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.models.vgg

import java.text.SimpleDateFormat
import java.util.Date

import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.dataset.DataSet
import com.intel.analytics.bigdl.dataset.image._
import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, Module}
import com.intel.analytics.bigdl.optim._
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, OptimizerV1, OptimizerV2, T, Table}
import com.intel.analytics.bigdl.visualization.{TrainSummary, ValidationSummary}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext

object Train {
  LoggerFilter.redirectSparkInfoLogs()


  import Utils._

  def main(args: Array[String]): Unit = {
    trainParser.parse(args, new TrainParams()).map(param => {
      val conf = Engine.createSparkConf().setAppName("Train Vgg on Cifar10")
        // Will throw exception without this config when has only one executor
          .set("spark.rpc.message.maxSize", "200")
      val sc = new SparkContext(conf)
      Engine.init

      val trainDataSet = DataSet.array(Utils.loadTrain(param.folder), sc) ->
        BytesToBGRImg() -> BGRImgNormalizer(trainMean, trainStd) ->
        BGRImgToBatch(param.batchSize)

      val model = if (param.modelSnapshot.isDefined) {
        Module.load[Float](param.modelSnapshot.get)
      } else {
        if (param.graphModel) VggForCifar10.graph(classNum = 10) else VggForCifar10(classNum = 10)
      }

      if (param.optimizerVersion.isDefined) {
        param.optimizerVersion.get.toLowerCase match {
          case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1)
          case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2)
        }
      }

      val optimMethod = if (param.stateSnapshot.isDefined) {
        OptimMethod.load[Float](param.stateSnapshot.get)
      } else {
        new SGD[Float](learningRate = param.learningRate, learningRateDecay = 0.0,
          weightDecay = param.weightDecay, momentum = 0.9, dampening = 0.0, nesterov = false,
          learningRateSchedule = SGD.EpochStep(25, 0.5))
      }

      val optimizer = Optimizer(
        model = model,
        dataset = trainDataSet,
        criterion = new ClassNLLCriterion[Float]()
      )

      val validateSet = DataSet.array(Utils.loadTest(param.folder), sc) ->
        BytesToBGRImg() -> BGRImgNormalizer(testMean, testStd) ->
        BGRImgToBatch(param.batchSize)

      if (param.checkpoint.isDefined) {
        optimizer.setCheckpoint(param.checkpoint.get, Trigger.everyEpoch)
      }

      if (param.overWriteCheckpoint) {
        optimizer.overWriteCheckpoint()
      }

      if (param.summaryPath.isDefined) {
        val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
        val timeStamp = sdf.format(new Date())
        val trainSummry = new TrainSummary(param.summaryPath.get,
          s"vgg-on-cifar10-train-$timeStamp")
        optimizer.setTrainSummary(trainSummry)
        val validationSummary = new ValidationSummary(param.summaryPath.get,
          s"vgg-on-cifar10-val-$timeStamp")
        optimizer.setValidationSummary(validationSummary)
      }

      optimizer
        .setValidation(Trigger.everyEpoch, validateSet, Array(new Top1Accuracy[Float]))
        .setOptimMethod(optimMethod)
        .setEndWhen(Trigger.maxEpoch(param.maxEpoch))
        .optimize()

      sc.stop()
    })
  }
} 
Example 5
Source File: WikiETL.scala    From CarbonDataLearning   with GNU General Public License v3.0 5 votes vote down vote up
package org.github.xubo245.carbonDataLearning.etl

import java.io.{File, PrintWriter}
import java.text.SimpleDateFormat
import java.util.Date

import scala.io.Source
import scala.util.Random

object WikiETL {
  def main(args: Array[String]): Unit = {
    val directory = "/root/xubo/data"
    val files = new File(directory)
    val out = new PrintWriter("/root/xubo/data/pageviews-20150505time")
    var flag:Int = 10000000;
    var typeMap= Map (("b","wikibooks")
      ,("d","wiktionary")
      ,("m","wikimedia")
      ,("mw","wikipedia mobile")
      ,("n","wikinews")
      ,("q","wikiquote")
      ,("s","wikisource")
      ,("v","wikiversity")
      ,("w","mediawiki"))

    for (file <- files.listFiles().sorted.filter(_.getCanonicalFile.getName.contains("pageviews-20150505-"))) {
      val filePath = file.getCanonicalPath
      println(filePath)
      //            val out = new PrintWriter(filePath + "WithTime")
      val reader = Source.fromFile(filePath)
      val fileName = file.getCanonicalFile.getName
      val delimiter = "\t"
      for (line <- reader.getLines()) {
        val stringBuffer = new StringBuffer()
        val random = new Random()
        val id = flag+random.nextInt(1000000)
        stringBuffer
          .append(id).append(delimiter)
          .append(fileName.substring(10, 14)).append(delimiter)
          .append(fileName.substring(14, 16)).append(delimiter)
          .append(fileName.substring(16, 18)).append(delimiter)
          .append(fileName.substring(19, 21)).append(delimiter)
        val array=line.mkString.split("\\s+")

        if (array.length == 4 && array(2).matches("[0-9]*") && !array(1).contains("\"")) {
          val domain = array(0).split('.')
          stringBuffer.append(domain(0)).append(delimiter)
          if (domain.length > 1) {
            var value: String = typeMap.getOrElse(domain(1), "wiki")
            stringBuffer.append(value).append(delimiter)
          } else {
            stringBuffer.append("wiki").append(delimiter)
          }
          val time = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
          val tid= id*10+random.nextInt(5)
          stringBuffer.append(array(1).replace('_',' ')).append(delimiter)
            .append(tid).append(delimiter)
            .append(array(2)).append(delimiter)
            .append(random.nextInt(100000)).append(delimiter)
            .append(time)

          //          for (i <- 0 until array.length-1){
          //            stringBuffer.append(array(i)).append(delimiter)
          //          }
          //          stringBuffer.append(array(array.length-1))

          //        if (array.length == 4 && array(2).matches("[0-9]*")) {
          //          id = id + 1
          out.println(stringBuffer.toString)
        }
      }
    }
    out.close()
  }
} 
Example 6
Source File: DataConverter.scala    From spark-cdm   with MIT License 5 votes vote down vote up
package com.microsoft.cdm.utils

import java.text.SimpleDateFormat
import java.util.{Locale, TimeZone}
import java.sql.Timestamp

import org.apache.commons.lang.time.DateUtils
import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String


class DataConverter() extends Serializable {

  val dateFormatter = new SimpleDateFormat(Constants.SINGLE_DATE_FORMAT)
  val timestampFormatter = TimestampFormatter(Constants.TIMESTAMP_FORMAT, TimeZone.getTimeZone("UTC"))


  val toSparkType: Map[CDMDataType.Value, DataType] = Map(
    CDMDataType.int64 -> LongType,
    CDMDataType.dateTime -> DateType,
    CDMDataType.string -> StringType,
    CDMDataType.double -> DoubleType,
    CDMDataType.decimal -> DecimalType(Constants.DECIMAL_PRECISION,0),
    CDMDataType.boolean -> BooleanType,
    CDMDataType.dateTimeOffset -> TimestampType
  )

  def jsonToData(dt: DataType, value: String): Any = {
    return dt match {
      case LongType => value.toLong
      case DoubleType => value.toDouble
      case DecimalType() => Decimal(value)
      case BooleanType => value.toBoolean
      case DateType => dateFormatter.parse(value)
      case TimestampType => timestampFormatter.parse(value)
      case _ => UTF8String.fromString(value)
    }
  }

  def toCdmType(dt: DataType): CDMDataType.Value = {
    return dt match {
      case IntegerType => CDMDataType.int64
      case LongType => CDMDataType.int64
      case DateType => CDMDataType.dateTime
      case StringType => CDMDataType.string
      case DoubleType => CDMDataType.double
      case DecimalType() => CDMDataType.decimal
      case BooleanType => CDMDataType.boolean
      case TimestampType => CDMDataType.dateTimeOffset
    }
  }  

  def dataToString(data: Any, dataType: DataType): String = {
    (dataType, data) match {
      case (_, null) => null
      case (DateType, _) => dateFormatter.format(data)
      case (TimestampType, v: Number) => timestampFormatter.format(data.asInstanceOf[Long])
      case _ => data.toString
    }
  }

} 
Example 7
Source File: CORSFilter.scala    From daf-semantics   with Apache License 2.0 5 votes vote down vote up
package it.almawave.kb.http.providers

import javax.ws.rs.container.ContainerRequestContext
import javax.ws.rs.container.ContainerResponseContext
import javax.ws.rs.container.ContainerResponseFilter
import javax.ws.rs.ext.Provider
import java.text.SimpleDateFormat
import java.util.Date
import java.net.URI

@Provider
class CORSFilter extends ContainerResponseFilter {

  override def filter(request: ContainerRequestContext, response: ContainerResponseContext) {

    val headers = response.getHeaders()

    headers.add("Access-Control-Allow-Origin", "*")
    headers.add("Access-Control-Allow-Headers", "origin, content-type, accept, authorization")
    headers.add("Access-Control-Allow-Credentials", "true")
    headers.add("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS, HEAD")

    // custom headers
    //    headers.add("Server", "Simple Jersey/Jetty HTTP server for RDF")
    //    headers.add("Pragma", "Pragma: no-cache")
    //    headers.add("Link", new URI("http://almawave.it"))

  }

} 
Example 8
Source File: EachRunNewFileAppender.scala    From sddf   with GNU General Public License v3.0 5 votes vote down vote up
package de.unihamburg.vsis.sddf.logging

import java.io.File
import java.text.SimpleDateFormat
import java.util.Date

import org.apache.log4j.FileAppender

import de.unihamburg.vsis.sddf.config.Config

class EachRunNewFileAppender extends FileAppender {
  override def setFile(fileName: String, append: Boolean, bufferedIO: Boolean, bufferSize: Int) = {
    val oldFile = new File(fileName)
    val dir = if (oldFile.isDirectory()) oldFile else oldFile.getParentFile
    val fileSuffix = if (oldFile.isDirectory()) ".log" else oldFile.getName
    val newFileName = EachRunNewFileAppender.runUuid + fileSuffix
    val newLogFile = new File(dir, newFileName)
    super.setFile(newLogFile.getPath, append, bufferedIO, bufferSize)
  }
}

object EachRunNewFileAppender {
  val dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
  val runUuid = System.currentTimeMillis().toString() + "-" + dateFormat.format(new Date())
} 
Example 9
Source File: DataWorkCloudEngineApplication.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine

import java.text.SimpleDateFormat
import java.util.Date

import com.webank.wedatasphere.linkis.DataWorkCloudApplication
import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.engine.conf.EngineConfiguration
import com.webank.wedatasphere.linkis.server.conf.ServerConfiguration
import org.apache.commons.lang.StringUtils
import org.slf4j.LoggerFactory


object DataWorkCloudEngineApplication {

  val userName:String = System.getProperty("user.name")
  val hostName:String = Utils.getComputerName
  val appName:String = EngineConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue
  val prefixName:String = EngineConfiguration.ENGINE_LOG_PREFIX.getValue
  val timeStamp:Long = System.currentTimeMillis()
  private val timeFormat = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss")
  private val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
  val time:String = timeFormat.format(new Date(timeStamp))
  val date:String = dateFormat.format(new Date(timeStamp))

  val isTimeStampSuffix:Boolean = "true".equalsIgnoreCase(EngineConfiguration.ENGINE_LOG_TIME_STAMP_SUFFIX.getValue)
  val shortLogFile:String =
    if (isTimeStampSuffix) appName + "_" + hostName + "_" + userName + "_"  + time + ".log"
    else appName + "_" + hostName + "_" + userName + ".log"
  val logName:String =
    if(isTimeStampSuffix) prefixName + "/" + userName + "/" + shortLogFile
    else prefixName + "/" + shortLogFile
  System.setProperty("engineLogFile", logName)
  System.setProperty("shortEngineLogFile", shortLogFile)
//  System.setProperty("engineLogFile", logName)
//  val context:LoggerContext = LogManager.getContext(false).asInstanceOf[LoggerContext]
//  val path:String = getClass.getResource("/").getPath
//  val log4j2XMLFile:File = new File(path + "/log4j2-engine.xml")
//  val configUri:URI = log4j2XMLFile.toURI
//  context.setConfigLocation(configUri)
  private val logger = LoggerFactory.getLogger(getClass)
  logger.info(s"Now log4j2 Rolling File is set to be $logName")
  logger.info(s"Now shortLogFile is set to be $shortLogFile")
  def main(args: Array[String]): Unit = {
    val parser = DWCArgumentsParser.parse(args)
    DWCArgumentsParser.setDWCOptionMap(parser.getDWCConfMap)
    val existsExcludePackages = ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.getValue
    if(StringUtils.isEmpty(existsExcludePackages))
      DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, "com.webank.wedatasphere.linkis.enginemanager")
    else
      DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, existsExcludePackages + ",com.webank.wedatasphere.linkis.enginemanager")
    DataWorkCloudApplication.main(DWCArgumentsParser.formatSpringOptions(parser.getSpringConfMap))
  }
} 
Example 10
Source File: ApplicationUtil.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.application.util

import java.text.SimpleDateFormat
import java.util.Date

import com.webank.wedatasphere.linkis.application.conf.{ApplicationConfiguration, ApplicationScalaConfiguration}


object ApplicationUtil {
  def getFlowsJson(user:String,date:Date):String ={
    val initExamplePath = ApplicationScalaConfiguration.INIT_EXAMPLE_PATH.getValue.toString + user + "/application/dataStudio/"
    val sqlName = ApplicationScalaConfiguration.INIT_EXAMPLE_SQL_NAME.getValue.toString
    val scalaName = ApplicationScalaConfiguration.INIT_EXAMPLE_SCALA_NAME.getValue.toString
    val spyName = ApplicationScalaConfiguration.INIT_EXAMPLE_SPY_NAME.getValue.toString
    val hqlName = ApplicationScalaConfiguration.INIT_EXAMPLE_HQL_NAME.getValue.toString
    val pythonName = ApplicationScalaConfiguration.INIT_EXAMPLE_PYTHON_NAME.getValue.toString
    val formateDate =  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(date)
    s"""[{"id":1,"name":"Default business process(默认业务流程)","createTime":"$formateDate","lastUpdateTime":"","description":"Default business process(默认业务流程)","version":"1.0.0","owner":"$user","canPublished":false,"params":{},"relations":[],"projectChildren":[],"flowChildren":[],"nodeChildren":{"dataExchange":[],"dataStudio":[{"id":1,"name":"$sqlName","type":"${sqlName.substring(sqlName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + sqlName}"}},{"id":2,"name":"$scalaName","type":"${scalaName.substring(scalaName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + scalaName}"}},{"id":3,"name":"$spyName","type":"${spyName.substring(spyName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + spyName}"}},{"id":4,"name":"$hqlName","type":"${hqlName.substring(hqlName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + hqlName}"}},{"id":5,"name":"$pythonName","type":"${pythonName.substring(pythonName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + pythonName}"}}],"dataBI":[],"resources":[]}}]"""
  }
} 
Example 11
Source File: DWSHttpClient.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
class DWSHttpClient(clientConfig: DWSClientConfig, clientName: String)
  extends AbstractHttpClient(clientConfig, clientName) {

  override protected def createDiscovery(): Discovery = new DWSGatewayDiscovery


  override protected def prepareAction(requestAction: HttpAction): HttpAction = {
    requestAction match {
      case dwsAction: DWSHttpAction => dwsAction.setDWSVersion(clientConfig.getDWSVersion)
      case _ =>
    }
    requestAction
  }

  override protected def httpResponseToResult(response: HttpResponse, requestAction: HttpAction, responseBody: String): Option[Result] = {
    var entity = response.getEntity
    val statusCode: Int = response.getStatusLine.getStatusCode
    val url: String = requestAction.getURL
    val contentType: String = entity.getContentType.getValue
    DWSHttpMessageFactory.getDWSHttpMessageResult(url).map { case DWSHttpMessageResultInfo(_, clazz) =>
      clazz match {
        case c if ClassUtils.isAssignable(c, classOf[DWSResult]) =>
          val dwsResult = clazz.getConstructor().newInstance().asInstanceOf[DWSResult]
          dwsResult.set(responseBody, statusCode, url, contentType)
          BeanUtils.populate(dwsResult, dwsResult.getData)
          return Some(dwsResult)
        case _ =>
      }

      def transfer(value: Result, map: Map[String, Object]): Unit = {
        value match {
          case httpResult: HttpResult =>
            httpResult.set(responseBody, statusCode, url, contentType)
          case _ =>
        }
        val javaMap = mapAsJavaMap(map)
        BeanUtils.populate(value, javaMap)
        fillResultFields(javaMap, value)
      }
      deserializeResponseBody(response) match {
        case map: Map[String, Object] =>
          val value = clazz.getConstructor().newInstance().asInstanceOf[Result]
          transfer(value, map)
          value
        case list: List[Map[String, Object]] =>
          val results = list.map { map =>
            val value = clazz.getConstructor().newInstance().asInstanceOf[Result]
            transfer(value, map)
            value
          }.toArray
          new ListResult(responseBody, results)
      }
    }.orElse(nonDWSResponseToResult(response, requestAction))
  }

  protected def nonDWSResponseToResult(response: HttpResponse, requestAction: HttpAction): Option[Result] = None

  protected def fillResultFields(responseMap: util.Map[String, Object], value: Result): Unit = {}

  //TODO Consistent with workspace, plus expiration time(与workspace保持一致,加上过期时间)
  override protected def getFsByUser(user: String, path: FsPath): Fs = FSFactory.getFsByProxyUser(path, user)

}
object DWSHttpClient {
  val jacksonJson = new ObjectMapper().setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"))
} 
Example 12
Source File: TypeCast.scala    From spark-select   with Apache License 2.0 5 votes vote down vote up
package io.minio.spark.select.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.{SimpleDateFormat, NumberFormat}
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try


  @throws[IllegalArgumentException]
  private[select] def toChar(str: String): Char = {
    if (str.charAt(0) == '\\') {
      str.charAt(1)
      match {
        case 't' => '\t'
        case 'r' => '\r'
        case 'b' => '\b'
        case 'f' => '\f'
        case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
        case '\'' => '\''
        case 'u' if str == """\u0000""" => '\u0000'
        case _ =>
          throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
      }
    } else if (str.length == 1) {
      str.charAt(0)
    } else {
      throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
    }
  }
} 
Example 13
Source File: Authorize.scala    From keycloak-benchmark   with Apache License 2.0 5 votes vote down vote up
package io.gatling.keycloak

import java.text.SimpleDateFormat
import java.util.{Date, Collections}

import akka.actor.ActorDSL.actor
import akka.actor.ActorRef
import io.gatling.core.action.Interruptable
import io.gatling.core.action.builder.ActionBuilder
import io.gatling.core.config.Protocols
import io.gatling.core.result.writer.DataWriterClient
import io.gatling.core.session._
import io.gatling.core.validation._
import org.jboss.logging.Logger
import org.keycloak.adapters.spi.AuthOutcome
import org.keycloak.adapters.KeycloakDeploymentBuilder
import org.keycloak.adapters.spi.HttpFacade.Cookie
import org.keycloak.common.enums.SslRequired
import org.keycloak.representations.adapters.config.AdapterConfig

import scala.collection.JavaConverters._

case class AuthorizeAttributes(
  requestName: Expression[String],
  uri: Expression[String],
  cookies: Expression[List[Cookie]],
  sslRequired: SslRequired = SslRequired.EXTERNAL,
  resource: String = null,
  password: String = null,
  realm: String = null,
  realmKey: String = null,
  authServerUrl: Expression[String] = _ => Failure("no server url")
) {
  def toAdapterConfig(session: Session) = {
    val adapterConfig = new AdapterConfig
    adapterConfig.setSslRequired(sslRequired.toString)
    adapterConfig.setResource(resource)
    adapterConfig.setCredentials(Collections.singletonMap("secret", password))
    adapterConfig.setRealm(realm)
    adapterConfig.setRealmKey(realmKey)
    adapterConfig.setAuthServerUrl(authServerUrl(session).get)
    adapterConfig
  }
}

class AuthorizeActionBuilder(attributes: AuthorizeAttributes) extends ActionBuilder {
  def newInstance(attributes: AuthorizeAttributes) = new AuthorizeActionBuilder(attributes)

  def sslRequired(sslRequired: SslRequired) = newInstance(attributes.copy(sslRequired = sslRequired))
  def resource(resource: String) = newInstance(attributes.copy(resource = resource))
  def clientCredentials(password: String) = newInstance(attributes.copy(password = password))
  def realm(realm: String) = newInstance(attributes.copy(realm = realm))
  def realmKey(realmKey: String) = newInstance(attributes.copy(realmKey = realmKey))
  def authServerUrl(authServerUrl: Expression[String]) = newInstance(attributes.copy(authServerUrl = authServerUrl))

  override def build(next: ActorRef, protocols: Protocols): ActorRef = {
    actor(actorName("authorize"))(new AuthorizeAction(attributes, next))
  }
}

object AuthorizeAction {
  val logger = Logger.getLogger(classOf[AuthorizeAction])
}

class AuthorizeAction(
                       attributes: AuthorizeAttributes,
                       val next: ActorRef
                     ) extends Interruptable with ExitOnFailure with DataWriterClient {
  override def executeOrFail(session: Session): Validation[_] = {
    val facade = new MockHttpFacade()
    val deployment = KeycloakDeploymentBuilder.build(attributes.toAdapterConfig(session));
    facade.request.setURI(attributes.uri(session).get);
    facade.request.setCookies(attributes.cookies(session).get.map(c => (c.getName, c)).toMap.asJava)
    var nextSession = session
    val requestAuth: MockRequestAuthenticator = session(MockRequestAuthenticator.KEY).asOption[MockRequestAuthenticator] match {
      case Some(ra) => ra
      case None =>
        val tmp = new MockRequestAuthenticator(facade, deployment, new MockTokenStore, -1, session.userId)
        nextSession = session.set(MockRequestAuthenticator.KEY, tmp)
        tmp
    }

    Blocking(() => {
      AuthorizeAction.logger.debugf("%s: Authenticating %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit)
      Stopwatch(() => requestAuth.authenticate())
        .check(result => result == AuthOutcome.AUTHENTICATED, result => {
          AuthorizeAction.logger.warnf("%s: Failed auth %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit)
          result.toString
        })
        .recordAndContinue(AuthorizeAction.this, nextSession, attributes.requestName(session).get)
    })
  }
} 
Example 14
Source File: SessionServlet.scala    From jboss-wildfly-test   with Apache License 2.0 5 votes vote down vote up
package servlet

import java.text.SimpleDateFormat
import java.util.Date
import javax.servlet.annotation._
import javax.servlet.http._

@WebServlet(value = Array("/SessionServlet"))
class SessionServlet extends HttpServlet {

  def formatTime(timestamp: Long): String = {
    val sdf = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.SSS")
    sdf.format(new Date(timestamp))
  }

  override def doGet(request: HttpServletRequest, response: HttpServletResponse) {
    response.setContentType("text/html")
    response.setCharacterEncoding("UTF-8")

    val out = response.getWriter
    out.println("<h3>Session Test Example</h3>")

    val session = request.getSession(true)
    out.println(
      s"""
        |Session Id: ${session.getId} <br/>
        |Created: ${formatTime(session.getCreationTime)} <br/>
        |Last Accessed: ${formatTime(session.getLastAccessedTime)} <br/>
      """.stripMargin)

    Option(request.getParameter("dataname")).foreach { dataName ⇒
      Option(request.getParameter("datavalue")).foreach { dataValue ⇒
        session.setAttribute(dataName, dataValue);
      }
    }

    import scala.collection.JavaConversions._
    val xs = session.getAttributeNames
    val sessionDataString = xs.map(name ⇒ s"$name = ${session.getAttribute(name)}").mkString("<br/>")
    out.println(
      s"""
        |<p>
        |The following data is in your session: <br/><br/>
        |$sessionDataString
        |</p>
        |
        |<p>
        |POST based form <br/>
        |<form action='${response.encodeURL("SessionServlet")}' method='post'>
        | Name of session attribute: <input type='text' size='20' name='dataname'/><br/>
        | Value of session attribute: <input type='text' size='20' name='datavalue'/><br/>
        | <input type='submit'/>
        |</form>
        |</p>
        |
        |<p>
        |GET based form <br/>
        |<form action='${response.encodeURL("SessionServlet")}' method='get'>
        | Name of session attribute: <input type='text' size='20' name='dataname'/><br/>
        | Value of session attribute: <input type='text' size='20' name='datavalue'/><br/>
        | <input type='submit'/>
        |</form>
        |</p>
        |
        |<p><a href='${response.encodeURL("SessionServlet?dataname=foo&datavalue=bar")}'>URL encoded</a>
      """.stripMargin)

    out.close()
  }

  override def doPost(req: HttpServletRequest, resp: HttpServletResponse): Unit =
    doGet(req, resp)
} 
Example 15
Source File: Total.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.stream_actor

import java.text.SimpleDateFormat
import java.util.{Date, TimeZone}

import akka.Done
import akka.actor.Actor
import sample.stream_actor.Total.Increment

object Total {
  case class Increment(value: Long, avg: Double, id: String)
}

class Total extends Actor {
  var total: Long = 0

  override def receive: Receive = {
    case Increment(value, avg, id) =>
      println(s"Received $value new measurements from turbine with id: $id -  Avg wind speed is: $avg")
      total = total + value

      val date = new Date()
      val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
      df.setTimeZone(TimeZone.getTimeZone("Europe/Zurich"))

      println(s"${df.format(date) } - Current total of all measurements: $total")
      sender ! Done
  }
} 
Example 16
Source File: Warn.scala    From spatial   with MIT License 5 votes vote down vote up
package emul

import java.text.SimpleDateFormat
import java.util.Calendar
import java.io.PrintStream

object Warn {
  val now = Calendar.getInstance().getTime
  val fmt = new SimpleDateFormat("dd_MM_yyyy_hh_mm_aa")
  val timestamp = fmt.format(now)
  var warns: Int = 0

  lazy val log = new PrintStream(timestamp + ".log")
  def apply(x: => String): Unit = {
    log.println(x)
    warns += 1
  }
  def close(): Unit = {
    if (warns > 0) {
      println(Warn.warns + " warnings occurred during program execution. See " + Warn.timestamp + ".log for details")
      log.close()
    }
  }
} 
Example 17
Source File: Worker.scala    From EncryCore   with GNU General Public License v3.0 5 votes vote down vote up
package encry.local.miner

import java.util.Date

import akka.actor.{Actor, ActorRef}
import encry.EncryApp._

import scala.concurrent.duration._
import encry.consensus.{CandidateBlock, ConsensusSchemeReaders}
import encry.local.miner.Miner.MinedBlock
import encry.local.miner.Worker.{MineBlock, NextChallenge}
import java.text.SimpleDateFormat

import com.typesafe.scalalogging.StrictLogging
import org.encryfoundation.common.utils.constants.TestNetConstants

class Worker(myIdx: Int, numberOfWorkers: Int, miner: ActorRef) extends Actor with StrictLogging {

  val sdf: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss")
  var challengeStartTime: Date = new Date(System.currentTimeMillis())

  val initialNonce: Long = Long.MaxValue / numberOfWorkers * myIdx

  override def preRestart(reason: Throwable, message: Option[Any]): Unit =
    logger.warn(s"Worker $myIdx is restarting because of: $reason")

  override def receive: Receive = {
    case MineBlock(candidate: CandidateBlock, nonce: Long) =>
      logger.info(s"Trying nonce: $nonce. Start nonce is: $initialNonce. " +
        s"Iter qty: ${nonce - initialNonce + 1} on worker: $myIdx with diff: ${candidate.difficulty}")
      ConsensusSchemeReaders
        .consensusScheme.verifyCandidate(candidate, nonce)
        .fold(
          e => {
            self ! MineBlock(candidate, nonce + 1)
            logger.info(s"Mining failed cause: $e")
          },
          block => {
            logger.info(s"New block is found: (${block.header.height}, ${block.header.encodedId}, ${block.payload.txs.size} " +
              s"on worker $self at ${sdf.format(new Date(System.currentTimeMillis()))}. Iter qty: ${nonce - initialNonce + 1}")
            miner ! MinedBlock(block, myIdx)
          })
    case NextChallenge(candidate: CandidateBlock) =>
      challengeStartTime = new Date(System.currentTimeMillis())
      logger.info(s"Start next challenge on worker: $myIdx at height " +
        s"${candidate.parentOpt.map(_.height + 1).getOrElse(TestNetConstants.PreGenesisHeight.toString)} at ${sdf.format(challengeStartTime)}")
      self ! MineBlock(candidate, Long.MaxValue / numberOfWorkers * myIdx)
  }

}

object Worker {

  case class NextChallenge(candidateBlock: CandidateBlock)

  case class MineBlock(candidateBlock: CandidateBlock, nonce: Long)

} 
Example 18
Source File: ISODateConverter.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.avro.convert

import java.text.SimpleDateFormat
import java.time._

import hydra.common.logging.LoggingAdapter
import org.apache.avro.{Conversion, LogicalType, Schema}

import scala.util.Try


class ISODateConverter extends Conversion[ZonedDateTime] with LoggingAdapter {

  private val utc = ZoneOffset.UTC

  override def getLogicalTypeName: String = IsoDate.IsoDateLogicalTypeName

  override def getConvertedType: Class[ZonedDateTime] = classOf[ZonedDateTime]

  private val simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX")

  override def fromCharSequence(
      value: CharSequence,
      schema: Schema,
      `type`: LogicalType
  ): ZonedDateTime = {
    Try(OffsetDateTime.parse(value).toInstant)
      .orElse {
        Try(LocalDateTime.parse(value).toInstant(ZoneOffset.UTC))
      }
      .orElse {
        Try(simpleDateFormat.parse(value.toString).toInstant)
      }
      .recover {
        case e: Throwable =>
          log.error(e.getMessage, e)
          Instant.EPOCH
      }
      .map(_.atZone(utc))
      .get
  }
}

object IsoDate extends LogicalType("iso-datetime") {
  val IsoDateLogicalTypeName = "iso-datetime"

  override def validate(schema: Schema): Unit = {
    if (schema.getType() != Schema.Type.STRING) {
      throw new IllegalArgumentException(
        "Iso-datetime can only be used with an underlying string type"
      )
    }
  }
} 
Example 19
Source File: Trip.scala    From gihyo-spark-book-example   with Apache License 2.0 5 votes vote down vote up
package jp.gihyo.spark.ch05

import java.text.SimpleDateFormat

case class Trip(id: Int, duration: Int,
  startDate: java.sql.Timestamp, startStation: String, startTerminal: Int,
  endDate: java.sql.Timestamp, endStation: String, endTerminal: Int,
  bikeNum: Int, subscriberType: String, zipcode: String)

object Trip {

  def parse(line: String): Trip = {
    val dateFormat = new SimpleDateFormat("MM/dd/yyy HH:mm")
    val elms = line.split(",")

    val id = elms(0).toInt
    val duration = elms(1).toInt
    val startDate = new java.sql.Timestamp(dateFormat.parse(elms(2)).getTime)
    val startStation = elms(3)
    val startTerminal = elms(4).toInt
    val endDate = new java.sql.Timestamp(dateFormat.parse(elms(5)).getTime)
    val endStation = elms(6)
    val endTerminal = elms(7).toInt
    val bikeNum = elms(8).toInt
    val subscriberType = elms(9)
    val zipcode = elms(10)
    Trip(id, duration,
      startDate, startStation, startTerminal,
      endDate, endStation, endTerminal,
      bikeNum, subscriberType, zipcode)
  }
} 
Example 20
Source File: Station.scala    From gihyo-spark-book-example   with Apache License 2.0 5 votes vote down vote up
package jp.gihyo.spark.ch05

import java.text.SimpleDateFormat

case class Station(id: Int, name: String, lat: Double, lon: Double,
    dockcount: Int, landmark: String, installation: java.sql.Date)

object Station {

  def parse(line: String): Station = {
    val dateFormat = new SimpleDateFormat("MM/dd/yyy")

    val elms = line.split(",")
    val id = elms(0).toInt
    val name = elms(1)
    val lat = elms(2).toDouble
    val lon = elms(3).toDouble
    val dockcount = elms(4).toInt
    val landmark = elms(5)
    val parsedInstallation = dateFormat.parse(elms(6))
    val installation = new java.sql.Date(parsedInstallation.getTime)
    Station(id, name, lat, lon, dockcount, landmark, installation)
  }
} 
Example 21
Source File: StationSuite.scala    From gihyo-spark-book-example   with Apache License 2.0 5 votes vote down vote up
package jp.gihyo.spark.ch05

import java.sql.Timestamp
import java.text.SimpleDateFormat

import org.scalatest.FunSuite

class StationSuite extends FunSuite {

  test("should be parse") {
    val line = "2,San Jose Diridon Caltrain Station,37.329732,-121.901782,27,San Jose,8/6/2013"
    val station = Station.parse(line)

    val dateFormat = new SimpleDateFormat("MM/dd/yyy")
    assert(station.id === 2)
    assert(station.name === "San Jose Diridon Caltrain Station")
    assert(station.lat === 37.329732)
    assert(station.lon === -121.901782)
    assert(station.dockcount === 27)
    assert(station.landmark === "San Jose")
    assert(station.installation === new Timestamp(dateFormat.parse("8/6/2013").getTime))
  }
} 
Example 22
Source File: IOUtils.scala    From watr-works   with Apache License 2.0 5 votes vote down vote up
package edu.umass.cs.iesl.watr
package utils

object PathUtils {

  import ammonite.{ops => fs}

  import java.nio.{file => nio}

  def appendTimestamp(path: String): String = {
    import java.text.SimpleDateFormat
    import java.util.Date
    val dateStamp = new SimpleDateFormat("yyyyMMddhhmmss").format(new Date())
    s"$path-$dateStamp"
  }

  def nioToAmm(nioPath: nio.Path): fs.Path = {
    fs.FilePath(nioPath) match {
      case p: fs.Path =>  p
      case p: fs.RelPath => fs.pwd / p
      case _ => ???
    }
  }

  def strToAmmPath(str: String): fs.Path = {
    fs.FilePath(str) match {
      case p: fs.Path =>  p
      case p: fs.RelPath => fs.pwd / p
      case _ => ???
    }
  }

  implicit class RicherPathUtils_String(val self: String) extends AnyVal {

    def toPath(): fs.Path = {
      strToAmmPath(self)
    }
  }

  implicit class RicherPathUtils_NioPath(val self: nio.Path) extends AnyVal {

    def toFsPath(): fs.Path = {
      nioToAmm(self)
    }
  }

} 
Example 23
Source File: GenericMainClass.scala    From darwin   with Apache License 2.0 5 votes vote down vote up
package it.agilelab.darwin.app.spark

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.config.{Config, ConfigFactory}
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._

trait GenericMainClass {
  self: SparkManager =>

  val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager")

  private def makeFileSystem(session: SparkSession): FileSystem = {
    if (session.sparkContext.isLocal) {
      FileSystem.getLocal(session.sparkContext.hadoopConfiguration)
    }
    else {
      FileSystem.get(session.sparkContext.hadoopConfiguration)
    }
  }


  
  // scalastyle:off
  private def getGlobalConfig: Config = {
    genericMainClassLogger.debug("system environment vars")
    for ((k, v) <- System.getenv().asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    genericMainClassLogger.debug("system properties")
    for ((k, v) <- System.getProperties.asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    ConfigFactory.load()
  }

  // scalastyle:on

} 
Example 24
Source File: MetricsReporter.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.text.SimpleDateFormat

import com.codahale.metrics.{Gauge, MetricRegistry}

import org.apache.spark.internal.Logging
import org.apache.spark.metrics.source.{Source => CodahaleSource}
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.streaming.StreamingQueryProgress


class MetricsReporter(
    stream: StreamExecution,
    override val sourceName: String) extends CodahaleSource with Logging {

  override val metricRegistry: MetricRegistry = new MetricRegistry

  // Metric names should not have . in them, so that all the metrics of a query are identified
  // together in Ganglia as a single metric group
  registerGauge("inputRate-total", _.inputRowsPerSecond, 0.0)
  registerGauge("processingRate-total", _.processedRowsPerSecond, 0.0)
  registerGauge("latency", _.durationMs.get("triggerExecution").longValue(), 0L)

  private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
  timestampFormat.setTimeZone(DateTimeUtils.getTimeZone("UTC"))

  registerGauge("eventTime-watermark",
    progress => convertStringDateToMillis(progress.eventTime.get("watermark")), 0L)

  registerGauge("states-rowsTotal", _.stateOperators.map(_.numRowsTotal).sum, 0L)
  registerGauge("states-usedBytes", _.stateOperators.map(_.memoryUsedBytes).sum, 0L)

  private def convertStringDateToMillis(isoUtcDateStr: String) = {
    if (isoUtcDateStr != null) {
      timestampFormat.parse(isoUtcDateStr).getTime
    } else {
      0L
    }
  }

  private def registerGauge[T](
      name: String,
      f: StreamingQueryProgress => T,
      default: T): Unit = {
    synchronized {
      metricRegistry.register(name, new Gauge[T] {
        override def getValue: T = Option(stream.lastProgress).map(f).getOrElse(default)
      })
    }
  }
} 
Example 25
Source File: ExecutorNumListener.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor

import java.text.SimpleDateFormat
import java.util
import java.util.Date
import java.util.concurrent.atomic.AtomicBoolean

import com.fasterxml.jackson.annotation.JsonIgnore

import org.apache.spark.SparkContext
import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.{
  SparkListener,
  SparkListenerExecutorAdded,
  SparkListenerExecutorRemoved
}
import org.apache.spark.util.kvstore.KVIndex

class ExecutorNumListener extends SparkListener with Logging {

  lazy val kvstore = SparkContext.getActive.get.statusStore.store
  var initialized: AtomicBoolean = new AtomicBoolean(false)
  var lastPointTime: Long = new Date().getTime
  var recentEventTime: Long = new Date().getTime
  private val liveExecutors = new util.HashSet[String]()

  def initialize(): Unit = {
    SparkContext.getActive.map(_.ui).flatten.foreach {
      case ui =>
        ui.attachTab(new ExecutorNumTab(ui))
        ui.addStaticHandler("static", "/static/special")
    }
  }

  def maybeAddPoint(time: Long): Unit = {
    if (!initialized.get) {
      initialize()
      initialized.compareAndSet(false, true)
    }
    if (time - lastPointTime > 20 * 1000) {
      addPoint(recentEventTime)
      addPoint(time)
      lastPointTime = time
    }
    recentEventTime = time
  }
  def addPoint(time: Long): Unit = {
    val executorNum = liveExecutors.size
    kvstore.write(new ExecutorNumWrapper(new ExecutorNum(
      s"own ${executorNum} executors at ${new SimpleDateFormat("HH:mm:ss").format(new Date(time))}",
      IndexedSeq(time, executorNum))))
  }

  override def onExecutorAdded(event: SparkListenerExecutorAdded): Unit = {
    liveExecutors.add(event.executorId)
    maybeAddPoint(event.time)
  }

  override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = {
    liveExecutors.remove(event.executorId)
    maybeAddPoint(event.time)
  }

}

private[spark] class ExecutorNumWrapper(val point: ExecutorNum) {
  @JsonIgnore @KVIndex
  def id: Long = point.value(0)
}

private[spark] class ExecutorNum(val name: String, val value: IndexedSeq[Long]) 
Example 26
Source File: ApplicationMonitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor.application

import java.sql.{Connection, Timestamp}
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.spark.alarm.AlertMessage
import org.apache.spark.alarm.AlertType._
import org.apache.spark.monitor.Monitor
import org.apache.spark.monitor.MonitorItem.MonitorItem

abstract class ApplicationMonitor extends Monitor {
  override val alertType = Seq(Application)
}

class ApplicationInfo(
    title: MonitorItem,
    appName: String,
    appId: String,
    md5: String,
    startTime: Date,
    duration: Long,
    appUiUrl: String,
    historyUrl: String,
    eventLogDir: String,
    minExecutor: Int,
    maxExecutor: Int,
    executorCore: Int,
    executorMemoryMB: Long,
    executorAccu: Double,
    user: String)
  extends AlertMessage(title) {
  override def toCsv(): String = {
    s"${user},${appId}," +
      s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}," +
      s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}," +
      s"${executorMemoryMB},${executorCore},${executorAccu.formatted("%.2f")},${appName}"
  }
  // scalastyle:off
  override def toHtml(): String = {
    val html = <h1>任务完成! </h1>
        <h2>任务信息 </h2>
        <ul>
          <li>作业名:{appName}</li>
          <li>作业ID:{appId}</li>
          <li>开始时间:{new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}</li>
          <li>任务用时:{Duration(duration, TimeUnit.MILLISECONDS).toSeconds} s</li>
        </ul>
        <h2>资源用量</h2>
        <ul>
          <li>Executor个数:{minExecutor}~{maxExecutor}</li>
          <li>Executor内存:{executorMemoryMB} MB</li>
          <li>Executor核数:{executorCore}</li>
          <li>Executor累积用量:{executorAccu.formatted("%.2f")} executor*min</li>
        </ul>
        <h2>调试信息</h2>
        <ul>
          <li>回看链接1:<a href={appUiUrl.split(",").head}>{appUiUrl.split(",").head}</a></li>
          <li>回看链接2:<a href={historyUrl}>{historyUrl}</a></li>
          <li>日志文件所在目录:{eventLogDir}</li>
        </ul>
    html.mkString
  }

  override def toJdbc(conn: Connection, appId: String): Unit = {
    val query = "INSERT INTO `xsql_monitor`.`spark_history`(" +
      "`user`, `md5`, `appId`, `startTime`, `duration`, " +
      "`yarnURL`, `sparkHistoryURL`, `eventLogDir`, `coresPerExecutor`, `memoryPerExecutorMB`," +
      " `executorAcc`, `appName`, `minExecutors`, `maxExecutors`)" +
      " SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? FROM DUAL" +
      " WHERE NOT EXISTS (SELECT * FROM `xsql_monitor`.`spark_history` WHERE `appId` = ?);"

    val preparedStmt = conn.prepareStatement(query)
    preparedStmt.setString(1, user)
    preparedStmt.setString(2, md5)
    preparedStmt.setString(3, appId)
    preparedStmt.setTimestamp(4, new Timestamp(startTime.getTime))
    preparedStmt.setLong(5, Duration(duration, TimeUnit.MILLISECONDS).toSeconds)
    preparedStmt.setString(6, appUiUrl)
    preparedStmt.setString(7, historyUrl)
    preparedStmt.setString(8, eventLogDir)
    preparedStmt.setInt(9, executorCore)
    preparedStmt.setLong(10, executorMemoryMB)
    preparedStmt.setDouble(11, executorAccu)
    preparedStmt.setString(12, appName)
    preparedStmt.setInt(13, minExecutor)
    preparedStmt.setInt(14, maxExecutor)
    preparedStmt.setString(15, appId)
    preparedStmt.execute
  }
} 
Example 27
Source File: SQLMonitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor.sql

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.spark.alarm.AlertMessage
import org.apache.spark.alarm.AlertType._
import org.apache.spark.monitor.Monitor
import org.apache.spark.monitor.MonitorItem.MonitorItem

abstract class SQLMonitor extends Monitor {
  override val alertType = Seq(SQL)

}

class SQLInfo(
    title: MonitorItem,
    sqlId: String,
    aeFlag: Boolean,
    appId: String,
    executionId: Long,
    submissionTime: Date,
    duration: Long)
  extends AlertMessage(title) {
  override def toCsv(): String = {
    s"${sqlId},${aeFlag},${appId},${executionId}," +
      s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(submissionTime)}," +
      s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}"
  }

} 
Example 28
Source File: ExcelOutputWriter.scala    From spark-hadoopoffice-ds   with Apache License 2.0 5 votes vote down vote up
package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

} 
Example 29
Source File: BenchmarkUtil.scala    From CodeAnalyzerTutorial   with Apache License 2.0 5 votes vote down vote up
package tutor.utils

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.scalalogging.StrictLogging

object BenchmarkUtil extends StrictLogging {
  def record[T](actionDesc: String)(action: => T): T = {
    val beginTime = new Date
    logger.info(s"begin $actionDesc")
    val rs = action
    logger.info(s"end $actionDesc")
    val endTime = new Date
    val elapsed = new Date(endTime.getTime - beginTime.getTime)
    val sdf = new SimpleDateFormat("mm:ss.SSS")
    logger.info(s"$actionDesc total elapsed ${sdf.format(elapsed)}")
    rs
  }
  def recordStart(actionDesc: String):Date = {
    logger.info(s"$actionDesc begin")
    new Date
  }

  def recordElapse(actionDesc: String, beginFrom: Date):Unit = {
    logger.info(s"$actionDesc ended")
    val endTime = new Date
    val elapsed = new Date(endTime.getTime - beginFrom.getTime)
    val sdf = new SimpleDateFormat("mm:ss.SSS")
    logger.info(s"$actionDesc total elapsed ${sdf.format(elapsed)}")
  }
} 
Example 30
Source File: L3-DStreamMapping.scala    From prosparkstreaming   with Apache License 2.0 5 votes vote down vote up
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditMappingApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditMappingApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val sdf = new SimpleDateFormat("yyyy-MM-dd")
    val tsKey = "created_utc"
    val secs = 1000L
    val keyedByDay = comments.map(rec => {
      val ts = (parse(rec) \ tsKey).values
      (sdf.format(new Date(ts.toString.toLong * secs)), rec)
    })

    val keyedByDayPart = comments.mapPartitions(iter => {
      var ret = List[(String, String)]()
      while (iter.hasNext) {
        val rec = iter.next
        val ts = (parse(rec) \ tsKey).values
        ret.::=(sdf.format(new Date(ts.toString.toLong * secs)), rec)
      }
      ret.iterator
    })

    val wordTokens = comments.map(rec => {
      ((parse(rec) \ "body")).values.toString.split(" ")
    })

    val wordTokensFlat = comments.flatMap(rec => {
      ((parse(rec) \ "body")).values.toString.split(" ")
    })

    val filterSubreddit = comments.filter(rec =>
      (parse(rec) \ "subreddit").values.toString.equals("AskReddit"))

    val sortedByAuthor = comments.transform(rdd =>
      (rdd.sortBy(rec => (parse(rec) \ "author").values.toString)))

    ssc.start()
    ssc.awaitTermination()

  }
} 
Example 31
Source File: L3-DStreamKeyValue.scala    From prosparkstreaming   with Apache License 2.0 5 votes vote down vote up
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.HashPartitioner

object RedditKeyValueApp {
  def main(args: Array[String]) {
    if (args.length != 3) {
      System.err.println(
        "Usage: RedditKeyValueApp <appname> <input_path> <input_path_popular>")
      System.exit(1)
    }
    val Seq(appName, inputPath, inputPathPopular) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val popular = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPathPopular, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val topAuthors = comments.map(rec => ((parse(rec) \ "author").values.toString, 1))
      .groupByKey()
      .map(r => (r._2.sum, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val topAuthors2 = comments.map(rec => ((parse(rec) \ "author").values.toString, 1))
      .reduceByKey(_ + _)
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val topAuthorsByAvgContent = comments.map(rec => ((parse(rec) \ "author").values.toString, (parse(rec) \ "body").values.toString.split(" ").length))
      .combineByKey(
        (v) => (v, 1),
        (accValue: (Int, Int), v) => (accValue._1 + v, accValue._2 + 1),
        (accCombine1: (Int, Int), accCombine2: (Int, Int)) => (accCombine1._1 + accCombine2._1, accCombine1._2 + accCombine2._2),
        new HashPartitioner(ssc.sparkContext.defaultParallelism))
      .map({ case (k, v) => (k, v._1 / v._2.toFloat) })
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val keyedBySubreddit = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, rec))
    val keyedBySubreddit2 = popular.map(rec => ({
      val t = rec.split(",")
      (t(1).split("/")(4), t(0))
    }))
    val commentsWithIndustry = keyedBySubreddit.join(keyedBySubreddit2)

    val keyedBySubredditCo = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, rec))
    val keyedBySubredditCo2 = popular.map(rec => ({
      val t = rec.split(",")
      (t(1).split("/")(4), t(0))
    }))
    val commentsWithIndustryCo = keyedBySubreddit.cogroup(keyedBySubreddit2)

    val checkpointPath = "/tmp"
    ssc.checkpoint(checkpointPath)
    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
      val currentCount = values.sum
      val previousCount = state.getOrElse(0)
      Some(currentCount + previousCount)
    }
    val keyedBySubredditState = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, 1))
    val globalCount = keyedBySubredditState.updateStateByKey(updateFunc)
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    ssc.start()
    ssc.awaitTermination()

  }
} 
Example 32
Source File: L3-DStreamVariation.scala    From prosparkstreaming   with Apache License 2.0 5 votes vote down vote up
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditVariationApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditVariationApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val merged = comments.union(comments)

    val repartitionedComments = comments.repartition(4)

    val rddMin = comments.glom().map(arr =>
      arr.minBy(rec => ((parse(rec) \ "created_utc").values.toString.toInt)))

    ssc.start()
    ssc.awaitTermination()

  }
} 
Example 33
Source File: L3-DStreamWindowAndAction.scala    From prosparkstreaming   with Apache License 2.0 5 votes vote down vote up
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.HashPartitioner

object RedditWindowAndActionApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditWindowAndActionApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val checkpointPath = "/tmp"
    ssc.checkpoint(checkpointPath)
    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
      val currentCount = values.sum
      val previousCount = state.getOrElse(0)
      Some(currentCount + previousCount)
    }
    val keyedBySubredditState = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, 1))
    val globalCount = keyedBySubredditState.updateStateByKey(updateFunc)
      .map(r => (r._2, r._1))
      .transform(rdd => rdd.sortByKey(ascending = false))

    val distinctSubreddits = comments.map(rec => ((parse(rec)) \ "subreddit").values.toString)
    val windowedRecs = distinctSubreddits.window(Seconds(5), Seconds(5))
    val windowedCounts = windowedRecs.countByValue()

    windowedCounts.print(10)
    windowedCounts.saveAsObjectFiles("subreddit", "obj")
    windowedCounts.saveAsTextFiles("subreddit", "txt")

    globalCount.saveAsHadoopFiles("subreddit", "hadoop",
      classOf[IntWritable], classOf[Text], classOf[TextOutputFormat[IntWritable, Text]])
    globalCount.saveAsNewAPIHadoopFiles("subreddit", "newhadoop",
      classOf[IntWritable], classOf[Text], classOf[NewTextOutputFormat[IntWritable, Text]])
    comments.foreachRDD(rdd => {
      LOG.info("RDD: %s, Count: %d".format(rdd.id, rdd.count()))
    })

    ssc.start()
    ssc.awaitTermination()

  }
} 
Example 34
Source File: L3-DStreamAggregation.scala    From prosparkstreaming   with Apache License 2.0 5 votes vote down vote up
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditAggregationApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditAggregationApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val recCount = comments.count()

    val recCountValue = comments.countByValue()

    val totalWords = comments.map(rec => ((parse(rec) \ "body").values.toString))
      .flatMap(body => body.split(" "))
      .map(word => 1)
      .reduce(_ + _)

    ssc.start()
    ssc.awaitTermination()

  }
} 
Example 35
Source File: package.scala    From sbt-flaky   with Apache License 2.0 5 votes vote down vote up
package flaky

import java.io.File
import java.text.SimpleDateFormat
import java.util.Date

import scalatags.Text
import scalatags.Text.all.{a, hr, href, p, _}

package object web {
  def footer(): Text.TypedTag[String] = {
    p(
      hr(),
      p(
        ReportCss.footer,
        "Created with ",
        a(href := "https://github.com/otrebski/sbt-flaky", "sbt-flaky plugin"), br,
        s"Report generated at ${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())}",
        s"Fugue icons are on Creative Common license"
      )
    )
  }

  def indexHtml(reportFile: File, historyFile: Option[File]): String = {
    val history = historyFile match {
      case Some(fileName) => a(href := fileName.getName, "History trends")
      case None =>
        p(
          "History trends report is not created. To enable history check documentation at ",
          a(href := "https://github.com/otrebski/sbt-flaky", "https://github.com/otrebski/sbt-flaky")
        )
    }

    html(
      head(link(rel := "stylesheet", href := "report.css")),
      body(
        h1(ReportCss.title, "Flaky test report"),
        h4(ReportCss.subtitle, a(href := reportFile.getName, "Report for last build")),
        h4(ReportCss.subtitle, history),
        footer()
      )
    ).render
  }

  def anchorTest(test: Test): String = s"${test.clazz}_${test.test}"

  def anchorClass(test: Test): String = test.clazz

  def anchorTestRun(testCase: TestCase): String = testCase.runName

  def singleTestDir(test: Test): String = test.clazz

  def singleTestFileName(test: Test): String = s"${test.test.replaceAll("/", "_")}.html"

  def linkToSingleTest(test: Test): String = singleTestDir(test) + "/" + singleTestFileName(test)

  def linkToSingleTestClass(clazz: String): String = s"flaky-report.html#$clazz"

  def linkToRunNameInSingleTest(test: Test,runName:String) = s"${linkToSingleTest(test)}#$runName"
} 
Example 36
Source File: History.scala    From sbt-flaky   with Apache License 2.0 5 votes vote down vote up
package flaky.history

import java.io.{File, FileFilter, InputStream}
import java.text.SimpleDateFormat
import java.util.Date

import flaky.{Flaky, FlakyTestReport, Io}
import org.apache.commons.vfs2.VFS

import scala.xml.XML

class History(project: String, historyDir: File, flakyReportDir: File, projectDir: File) {

  private val zipFileFilter = new FileFilter {
    override def accept(pathname: File): Boolean = pathname.getName.endsWith(".zip")
  }

  private def runFiles(historyDir: File): List[File] = historyDir.listFiles(zipFileFilter).toList

  def addCurrentToHistory(): Unit = {
    val timestamp = System.currentTimeMillis()

    val date = new SimpleDateFormat(History.dateFormat).format(new Date(timestamp))
    val gitCommit = Git(projectDir).currentId().toOption
    val historyReportDescription = HistoryReportDescription(timestamp, gitCommit)
    HistoryReportDescription.save(historyReportDescription, new File(flakyReportDir, History.descriptorFile))
    Zip.compressFolder(new File(historyDir, s"$date.zip"), flakyReportDir)
  }

  def removeToOldFromHistory(maxToKeep: Int): Unit = {
    runFiles(historyDir)
      .take(Math.max(runFiles(historyDir).size - maxToKeep, 0))
      .foreach(_.delete())
  }

  def createHistoryReport(): HistoryReport = {

    val historicalRuns: List[HistoricalRun] = runFiles(historyDir)
      .map(History.loadHistory)
    val date = new SimpleDateFormat("HH:mm dd-MM-YYYY").format(new Date())
    HistoryReport(project, date, historicalRuns)
  }


  def processHistory(): HistoryReport = {
    historyDir.mkdirs()
    addCurrentToHistory()
    removeToOldFromHistory(20)
    createHistoryReport()
  }
}


case class HistoryReportDescription(timestamp: Long, gitCommitHash: Option[String])

object HistoryReportDescription {

  def load(in: InputStream): HistoryReportDescription = {
    val descriptorXml = XML.load(in)
    val timestamp = (descriptorXml \ "timestamp").text.trim.toLong
    val gitHash = (descriptorXml \ "gitCommitHash").text.trim
    HistoryReportDescription(timestamp, Some(gitHash))
  }

  def save(historyReportDescription: HistoryReportDescription, file: File): Unit = {
    val xml =
      <HistoryReportDescription>
        <timestamp>
          {historyReportDescription.timestamp}
        </timestamp>
        <gitCommitHash>
          {historyReportDescription.gitCommitHash.getOrElse("")}
        </gitCommitHash>
      </HistoryReportDescription>
    val prettyXml = new scala.xml.PrettyPrinter(80, 2).format(xml)
    Io.writeToFile(file, prettyXml)
  }
}

object History {
  val descriptorFile = "descriptor.xml"
  val dateFormat = "yyyyMMdd-HHmmss"

  def loadHistory: (File) => HistoricalRun = {
    file => {
      val manager = VFS.getManager
      val uri = file.toURI.toString.replace("file:/", "zip:/")
      val fo = manager.resolveFile(uri)
      val report: FlakyTestReport = Flaky.createReportFromHistory(fo)
      val descriptorFile = Option(fo.getChild(History.descriptorFile))
      val dateFromFileName = file.getName.replace(".zip","")
      val hrd = descriptorFile
        .filter(_.exists())
        .map(f => HistoryReportDescription.load(f.getContent.getInputStream))
        .getOrElse(HistoryReportDescription(new SimpleDateFormat(dateFormat).parse(dateFromFileName).getTime, None))
      HistoricalRun(hrd, report)
    }
  }
} 
Example 37
Source File: HistorySpec.scala    From sbt-flaky   with Apache License 2.0 5 votes vote down vote up
package flaky.history

import java.io.File
import java.text.SimpleDateFormat

import org.scalatest.{Matchers, WordSpec}

class HistorySpec extends WordSpec with Matchers {

  val fileWithDescriptor = "20170516-072750.zip"
  val fileWithoutDescriptor = "20170516-072825.zip"
  val dirWithReports = new File("./src/test/resources/history")


  "HistoryTest" should {

    "loadHistory with descriptor" in {
      val historicalRun: HistoricalRun = History.loadHistory.apply(new File(dirWithReports, fileWithDescriptor))
      historicalRun.historyReportDescription shouldBe HistoryReportDescription(123456L, Some("abcdefg"))
    }
    "loadHistory without descriptor" in {
      //Timestamp can't be hardcoded, because loadHistory tries to parse date from file name
      // with local time zone
      val timestamp = new SimpleDateFormat("yyyyMMdd-HHmmss").parse("20170516-072825").getTime
      val historicalRun: HistoricalRun = History.loadHistory.apply(new File(dirWithReports, fileWithoutDescriptor))
      historicalRun.historyReportDescription shouldBe HistoryReportDescription(timestamp, None)
    }

  }

} 
Example 38
Source File: FieldSequentialValue.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.test

import java.text.SimpleDateFormat
import java.util.Date

import org.schedoscope.dsl.{FieldLike, Structure}


object FieldSequentialValue {

  def get(f: FieldLike[_], i: Int, p: String): Any = {
    if (f.t == manifest[Int])
      i
    else if (f.t == manifest[Long])
      i.toLong
    else if (f.t == manifest[Byte])
      i.toByte
    else if (f.t == manifest[Boolean])
      i % 2 == 0
    else if (f.t == manifest[Double])
      i.toDouble
    else if (f.t == manifest[Float])
      i.toFloat
    else if (f.t == manifest[Date])
      new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date(i * 1000L))
    else if (f.t == manifest[String])
      f.n + "-" + p.format(i)
    else if (classOf[Structure].isAssignableFrom(f.t.runtimeClass)) {
      f.t.runtimeClass.newInstance().asInstanceOf[Structure].fields.map(sf => (sf.n, get(sf, i, p))).toMap
    } else if (f.t.runtimeClass == classOf[List[_]]) {
      List()
    } else if (f.t.runtimeClass == classOf[Map[_, _]])
      Map()
    else
      throw new RuntimeException("Cannot generate random values for: " + f.n + ", type is: " + f.t)
  }
} 
Example 39
Source File: Globals.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package schedoscope.example.osm

import java.text.SimpleDateFormat
import java.util.Date

import org.schedoscope.Settings
import org.schedoscope.dsl.View
import org.schedoscope.dsl.views.MonthlyParameterization

object Globals {
  def defaultHiveQlParameters(v: View) = {
    val baseParameters = Map(
      "env" -> v.env,
      "workflow_time" -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date),
      "workflow_name" -> v.getClass().getName())

    if (v.isInstanceOf[MonthlyParameterization])
      baseParameters ++ Map(
        "year" -> v.asInstanceOf[MonthlyParameterization].year.v.get,
        "month" -> v.asInstanceOf[MonthlyParameterization].month.v.get)
    else baseParameters
  }

  def defaultPigProperties(v: View) = Map(
    "exec.type" -> "MAPREDUCE",
    "mapred.job.tracker" -> Settings().jobTrackerOrResourceManager,
    "fs.default.name" -> Settings().nameNode,
    "workflow_time" -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date),
    "workflow_name" -> v.getClass().getName())
} 
Example 40
Source File: CustomTelemetryService.scala    From finagle-prometheus   with MIT License 5 votes vote down vote up
package com.samstarling.prometheusfinagle.examples

import java.text.SimpleDateFormat
import java.util.Calendar

import com.samstarling.prometheusfinagle.metrics.Telemetry
import com.twitter.finagle.Service
import com.twitter.finagle.http.{Request, Response, Status}
import com.twitter.util.Future

class CustomTelemetryService(telemetry: Telemetry)
    extends Service[Request, Response] {

  private val dayOfWeekFormat = new SimpleDateFormat("E")

  private val counter = telemetry.counter("requests_by_day_of_week",
                                          "Help text",
                                          Seq("day_of_week"))

  override def apply(request: Request): Future[Response] = {
    dayOfWeek
    counter.labels(dayOfWeek).inc()
    val rep = Response(request.version, Status.Ok)
    rep.setContentString("Your request was logged!")
    Future(rep)
  }

  private def dayOfWeek: String = {
    dayOfWeekFormat.format(Calendar.getInstance.getTime)
  }
} 
Example 41
Source File: DirectDataInjector.scala    From SparkOnKudu   with Apache License 2.0 5 votes vote down vote up
package org.kududb.spark.demo.gamer.cdc

import java.text.SimpleDateFormat
import java.util.Random

import org.kududb.client.{PartialRow, Operation, KuduClient}
import org.kududb.spark.demo.gamer.aggregates.GamerDataGenerator


class DirectDataInjector {
  val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy")
  val random = new Random
  def main(args:Array[String]): Unit = {

    if (args.length == 0) {
      println("<kuduMaster> <tableName> <numberOfRecords>")
      return
    }

    val kuduMaster = args(0)
    val tableName = args(1)
    val numberOfRecords = args(2).toInt


    val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build()
    val table = kuduClient.openTable(tableName)
    val session = kuduClient.newSession()

    for (i <- 0 to numberOfRecords) {
      val record = GamerDataGenerator.makeNewGamerRecord(100000)




      val pr = new PartialRow(table.getSchema)
      pr.addString(0, "record.gamerId")
      pr.addString(1, "")
      val scannerRows = kuduClient.newScannerBuilder(table).lowerBound(null).limit(1).build().nextRows()
      val op:Operation = if (scannerRows.hasNext) {
        val oldRow = scannerRows.next()

        val oldRecordUpdateOp = table.newInsert()

        val row = oldRecordUpdateOp.getRow
        row.addString("gamer_id", oldRow.getString("gamer_id"))
        row.addString("eff_to", simpleDateFormat.format(System.currentTimeMillis()))
        row.addString("eff_from", oldRow.getString("eff_from"))
        row.addLong("last_time_played", oldRow.getLong("last_time_played"))
        row.addInt("games_played", oldRow.getInt("games_played"))
        row.addInt("games_won", oldRow.getInt("games_won"))
        row.addInt("oks", oldRow.getInt("oks"))
        row.addInt("deaths", oldRow.getInt("deaths"))
        row.addInt("damage_given", oldRow.getInt("damage_given"))
        row.addInt("damage_taken", oldRow.getInt("damage_taken"))
        row.addInt("max_oks_in_one_game", oldRow.getInt("max_oks_in_one_game"))
        row.addInt("max_deaths_in_one_game", oldRow.getInt("max_deaths_in_one_game"))

        session.apply(oldRecordUpdateOp)
        table.newUpdate()
      } else {
        table.newInsert()
      }

      val row = op.getRow
      row.addString("gamer_id", record.gamerId)
      row.addString("eff_to", "")
      row.addString("eff_from", simpleDateFormat.format(System.currentTimeMillis()))
      row.addLong("last_time_played", record.lastTimePlayed)
      row.addInt("games_played", record.gamesPlayed)
      row.addInt("games_won", record.gamesWon)
      row.addInt("oks", record.oks)
      row.addInt("deaths", record.deaths)
      row.addInt("damage_given", record.damageGiven)
      row.addInt("damage_taken", record.damageTaken)
      row.addInt("max_oks_in_one_game", record.maxOksInOneGame)
      row.addInt("max_deaths_in_one_game", record.maxDeathsInOneGame)

      session.apply(op)
    }
    session.flush()

    kuduClient.close()


  }
} 
Example 42
Source File: DirectDataMultiThreadedInjector.scala    From SparkOnKudu   with Apache License 2.0 5 votes vote down vote up
package org.kududb.spark.demo.gamer.cdc

import java.text.SimpleDateFormat
import java.util.Random
import java.util.concurrent.atomic.AtomicInteger
import java.util.concurrent.{TimeUnit, Executors}

import org.kududb.client.{Operation, PartialRow, KuduClient}
import org.kududb.spark.demo.gamer.aggregates.GamerDataGenerator

object DirectDataMultiThreadedInjector {
  val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy")
  val random = new Random
  def main(args:Array[String]): Unit = {

    if (args.length == 0) {
      println("<kuduMaster> <tableName> <numberOfRecords> <numberOfThreads>")
      return
    }

    val kuduMaster = args(0)
    val tableName = args(1)
    val numberOfRecords = args(2).toInt
    val executor = Executors.newFixedThreadPool(args(3).toInt)
    val numberOfGamers = args(4).toInt
    val sleepTime = args(5).toInt

    val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build()
    val leftToRun = new AtomicInteger()

    for (i <- 0 to numberOfRecords) {
      leftToRun.incrementAndGet()
      executor.execute(new ApplyNewRecordRunnable(GamerDataGenerator.makeNewGamerRecord(numberOfGamers),
      kuduClient, tableName, leftToRun))
      println("Summited:" + i)

      Thread.sleep(sleepTime)
    }


    val startTime = System.currentTimeMillis()
    while (!executor.awaitTermination(10000, TimeUnit.SECONDS)) {
      val newTime = System.currentTimeMillis()
      println("> Still Waiting: {Time:" + (newTime - startTime) + ", LeftToRun:" + leftToRun + "}" )
    }


    kuduClient.close()


  }
} 
Example 43
Source File: ApplyNewRecordRunnable.scala    From SparkOnKudu   with Apache License 2.0 5 votes vote down vote up
package org.kududb.spark.demo.gamer.cdc

import java.text.SimpleDateFormat
import java.util.concurrent.atomic.AtomicInteger

import org.kududb.client.{Operation, PartialRow, KuduClient}
import org.kududb.spark.demo.gamer.GamerEvent

class ApplyNewRecordRunnable(val gameEvent: GamerEvent,
                              val kuduClient: KuduClient,
                              val tableName: String,
                              val leftToRun:AtomicInteger) extends Runnable{
  override def run(): Unit = {
    val table = kuduClient.openTable(tableName)
    val session = kuduClient.newSession()
    val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy")

    val record = gameEvent

    val pr = new PartialRow(table.getSchema)
    pr.addString(0, record.gamerId)
    pr.addString(1, "")
    val scannerRows = kuduClient.newScannerBuilder(table).lowerBound(pr).limit(1).build().nextRows()
    val op:Operation = if (scannerRows.hasNext) {
      println(" >> had next")
      val oldRow = scannerRows.next()

      val oldRecordUpdateOp = table.newInsert()

      val row = oldRecordUpdateOp.getRow
      row.addString("gamer_id", oldRow.getString("gamer_id"))
      row.addString("eff_to", simpleDateFormat.format(record.lastTimePlayed))
      row.addString("eff_from", oldRow.getString("eff_from"))
      row.addLong("last_time_played", oldRow.getLong("last_time_played"))
      row.addInt("games_played", oldRow.getInt("games_played"))
      row.addInt("games_won", oldRow.getInt("games_won"))
      row.addInt("oks", oldRow.getInt("oks"))
      row.addInt("deaths", oldRow.getInt("deaths"))
      row.addInt("damage_given", oldRow.getInt("damage_given"))
      row.addInt("damage_taken", oldRow.getInt("damage_taken"))
      row.addInt("max_oks_in_one_game", oldRow.getInt("max_oks_in_one_game"))
      row.addInt("max_deaths_in_one_game", oldRow.getInt("max_deaths_in_one_game"))

      session.apply(oldRecordUpdateOp)
      table.newUpdate()
    } else {
      table.newInsert()
    }

    val row = op.getRow
    row.addString("gamer_id", record.gamerId)
    row.addString("eff_to", "")
    row.addString("eff_from", simpleDateFormat.format(record.lastTimePlayed))
    row.addLong("last_time_played", record.lastTimePlayed)
    row.addInt("games_played", record.gamesPlayed)
    row.addInt("games_won", record.gamesWon)
    row.addInt("oks", record.oks)
    row.addInt("deaths", record.deaths)
    row.addInt("damage_given", record.damageGiven)
    row.addInt("damage_taken", record.damageTaken)
    row.addInt("max_oks_in_one_game", record.maxOksInOneGame)
    row.addInt("max_deaths_in_one_game", record.maxDeathsInOneGame)

    session.apply(op)

    session.flush()
    leftToRun.decrementAndGet()
    println(" >> finished Submit")
  }
} 
Example 44
Source File: SequenceFileSink.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.hadoop

import java.text.SimpleDateFormat

import org.apache.hadoop.fs.Path
import org.apache.hadoop.hdfs.HdfsConfiguration
import org.apache.hadoop.io.SequenceFile
import org.apache.gearpump.Message
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.streaming.hadoop.lib.HadoopUtil
import org.apache.gearpump.streaming.hadoop.lib.format.{DefaultSequenceFormatter, OutputFormatter}
import org.apache.gearpump.streaming.hadoop.lib.rotation.{FileSizeRotation, Rotation}
import org.apache.gearpump.streaming.sink.DataSink
import org.apache.gearpump.streaming.task.{TaskContext, TaskId}

class SequenceFileSink(
    userConfig: UserConfig,
    basePath: String,
    rotation: Rotation = new FileSizeRotation(128 * Math.pow(2, 20).toLong),
    sequenceFormat: OutputFormatter = new DefaultSequenceFormatter)
  extends DataSink{
  @transient private lazy val configuration = new HdfsConfiguration()
  private val dateFormat = new SimpleDateFormat("yyyy_MM_dd-HH-mm-ss")
  private var writer: SequenceFile.Writer = null
  private var taskId: TaskId = null
  private var appName: String = null

  
  override def close(): Unit = {
    closeWriter()
  }

  private def closeWriter(): Unit = {
    Option(writer).foreach { w =>
      w.hflush()
      w.close()
    }
  }

  private def getNextWriter: SequenceFile.Writer = {
    SequenceFile.createWriter(
      configuration,
      SequenceFile.Writer.file(getNextFilePath),
      SequenceFile.Writer.keyClass(sequenceFormat.getKeyClass),
      SequenceFile.Writer.valueClass(sequenceFormat.getValueClass)
    )
  }

  private def getNextFilePath: Path = {
    val base = new Path(basePath, s"$appName-task${taskId.processorId}_${taskId.index}")
    new Path(base, dateFormat.format(new java.util.Date))
  }
} 
Example 45
Source File: ImageProcessing.scala    From 006877   with MIT License 5 votes vote down vote up
package aia.routing

import java.text.SimpleDateFormat
import java.util.Date

case class Photo(license: String, speed: Int)

object ImageProcessing {
  val dateFormat = new SimpleDateFormat("ddMMyyyy HH:mm:ss.SSS")
  def getSpeed(image: String): Option[Int] = {
    val attributes = image.split('|')
    if (attributes.size == 3)
      Some(attributes(1).toInt)
    else
      None
  }
  def getTime(image: String): Option[Date] = {
    val attributes = image.split('|')
    if (attributes.size == 3)
      Some(dateFormat.parse(attributes(0)))
    else
      None
  }
  def getLicense(image: String): Option[String] = {
    val attributes = image.split('|')
    if (attributes.size == 3)
      Some(attributes(2))
    else
      None
  }
  def createPhotoString(date: Date, speed: Int): String = {
    createPhotoString(date, speed, " ")
  }

  def createPhotoString(date: Date,
                        speed: Int,
                        license: String): String = {
    "%s|%s|%s".format(dateFormat.format(date), speed, license)
  }
} 
Example 46
Source File: ThriftJsonServlet.scala    From incubator-livy   with Apache License 2.0 5 votes vote down vote up
package org.apache.livy.thriftserver.ui

import java.text.SimpleDateFormat

import org.apache.livy.server.JsonServlet
import org.apache.livy.thriftserver.LivyThriftServer


class ThriftJsonServlet(val basePath: String) extends JsonServlet {

  private val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z")

  case class SessionInfo(
      sessionId: String,
      livySessionId: String,
      owner: String,
      createdAt: String)

  get("/sessions") {
    val thriftSessions = LivyThriftServer.getInstance.map { server =>
      val sessionManager = server.getSessionManager
      sessionManager.getSessions.map { sessionHandle =>
        val info = sessionManager.getSessionInfo(sessionHandle)
        SessionInfo(sessionHandle.getSessionId.toString,
          sessionManager.livySessionId(sessionHandle).map(_.toString).getOrElse(""),
          info.username,
          df.format(info.creationTime))
      }.toSeq
    }.getOrElse(Seq.empty)
    val from = params.get("from").map(_.toInt).getOrElse(0)
    val size = params.get("size").map(_.toInt).getOrElse(100)

    Map(
      "from" -> from,
      "total" -> thriftSessions.length,
      "sessions" -> thriftSessions.view(from, from + size))
  }
} 
Example 47
package org.sparksamples.gmm

import java.text.SimpleDateFormat

import org.apache.spark.SparkConf
import org.apache.spark.ml.clustering.{GaussianMixture}
import org.apache.spark.sql.SparkSession


object GMMClusteringPersist {
  val PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/"
  val BASE = "./data/movie_lens_libsvm_2f"

  val time = System.currentTimeMillis()
  val formatter = new SimpleDateFormat("dd_MM_yyyy_hh_mm_ss")

  import java.util.Calendar
  val calendar = Calendar.getInstance()
  calendar.setTimeInMillis(time)
  val date_time = formatter.format(calendar.getTime())

  def main(args: Array[String]): Unit = {

    val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp").
      set("spark.driver.allowMultipleContexts", "true")

    val spark = SparkSession
      .builder()
      .appName("Spark SQL Example")
      .config(spConfig)
      .getOrCreate()

    val datasetUsers = spark.read.format("libsvm").load(
      BASE + "/movie_lens_2f_users_libsvm/part-00000")
    datasetUsers.show(3)

    val gmmUsers = new GaussianMixture().setK(5).setSeed(1L)
    gmmUsers.setMaxIter(20)
    val modelUsers = gmmUsers.fit(datasetUsers)

    val predictedDataSetUsers = modelUsers.transform(datasetUsers)
    val predictionsUsers = predictedDataSetUsers.select("prediction").rdd.map(x=> x(0))
    predictionsUsers.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_users")


    val dataSetItems = spark.read.format("libsvm").load(BASE +
      "/movie_lens_2f_items_libsvm/part-00000")


    val gmmItems = new GaussianMixture().setK(5).setSeed(1L)
    val modelItems = gmmItems.fit(dataSetItems)

    val predictedDataSetItems = modelItems.transform(dataSetItems)
    val predictionsItems = predictedDataSetItems.select("prediction").rdd.map(x=> x(0))
    predictionsItems.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_items")
    spark.stop()
  }
} 
Example 48
package org.sparksamples.kmeans

import java.text.SimpleDateFormat

import org.apache.spark.SparkConf
import org.apache.spark.ml.clustering.BisectingKMeans
import org.apache.spark.sql.SparkSession


object BisectingKMeansPersist {
  val PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/"
  val BASE = "./data/movie_lens_libsvm_2f"

  val time = System.currentTimeMillis()
  val formatter = new SimpleDateFormat("dd_MM_yyyy_hh_mm_ss")

  import java.util.Calendar
  val calendar = Calendar.getInstance()
  calendar.setTimeInMillis(time)
  val date_time = formatter.format(calendar.getTime())

  def main(args: Array[String]): Unit = {

    val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp").
      set("spark.driver.allowMultipleContexts", "true")

    val spark = SparkSession
      .builder()
      .appName("Spark SQL Example")
      .config(spConfig)
      .getOrCreate()

    val datasetUsers = spark.read.format("libsvm").load(
      BASE + "/movie_lens_2f_users_xy/part-00000")
    datasetUsers.show(3)
    val bKMeansUsers = new BisectingKMeans()
    bKMeansUsers.setMaxIter(10)
    bKMeansUsers.setMinDivisibleClusterSize(5)

    val modelUsers = bKMeansUsers.fit(datasetUsers)
    val predictedUserClusters = modelUsers.transform(datasetUsers)

    modelUsers.clusterCenters.foreach(println)
    val predictedDataSetUsers = modelUsers.transform(datasetUsers)
    val predictionsUsers = predictedDataSetUsers.select("prediction").rdd.map(x=> x(0))
    predictionsUsers.saveAsTextFile(BASE + "/prediction/" + date_time + "/bkmeans_2f_users")


    val datasetItems = spark.read.format("libsvm").load(BASE +
      "/movie_lens_2f_items_xy/part-00000")
    datasetItems.show(3)

    val kmeansItems = new BisectingKMeans().setK(5).setSeed(1L)
    val modelItems = kmeansItems.fit(datasetItems)


    val predictedDataSetItems = modelItems.transform(datasetItems)
    val predictionsItems = predictedDataSetItems.select("prediction").rdd.map(x=> x(0))
    predictionsItems.saveAsTextFile(BASE + "/prediction/" + date_time + "/bkmeans_2f_items")
    spark.stop()
  }
} 
Example 49
Source File: ScalaApp.scala    From Machine-Learning-with-Spark-Second-Edition   with MIT License 5 votes vote down vote up
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{ALS, Rating}

//import org.apache.spark.

    val predictedRating = model.predict(789, 123)
    println(predictedRating)
    val userId = 789
    val K = 10
    val topKRecs = model.recommendProducts(userId, K)
    println(topKRecs.mkString("\n"))

    val movies = sc.textFile(PATH + "/ml-100k/u.item")
    val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap()
    titles(123)
    // res68: String = Frighteners, The (1996)
    val moviesForUser = ratings.keyBy(_.user).lookup(789)
    // moviesForUser: Seq[org.apache.spark.mllib.recommendation.Rating] = WrappedArray(Rating(789,1012,4.0), Rating(789,127,5.0), Rating(789,475,5.0), Rating(789,93,4.0), ...
    // ...
    println(moviesForUser.size)
    moviesForUser.sortBy(-_.rating).take(10).map(rating => (titles(rating.product), rating.rating)).foreach(println)
    topKRecs.map(rating => (titles(rating.product), rating.rating)).foreach(println)
    sc.stop()
    //bw.close()
  }

  class Util {
    def getDate(): String = {
      val today = Calendar.getInstance().getTime()
      // (2) create a date "formatter" (the date format we want)
      val formatter = new SimpleDateFormat("yyyy-MM-dd-hh.mm.ss")
   
      // (3) create a new String using the date format we want
      val folderName = formatter.format(today)
      return folderName
    }
  }

} 
Example 50
Source File: Util.scala    From Machine-Learning-with-Spark-Second-Edition   with MIT License 5 votes vote down vote up
package com.sparksample


object Util {
  val PATH = "../.."
  val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
  var sc = new SparkContext(spConfig)

  def getMovieData() : RDD[String] = {
    val movie_data = sc.textFile(PATH + "/data/ml-100k/u.item")
    return movie_data
  }
  def getUserData() : RDD[String] = {
    val user_data = sc.textFile(PATH + "/data/ml-100k/u.data")
    return user_data
  }
  def getDate(): String = {
    val today = Calendar.getInstance().getTime()
    // (2) create a date "formatter" (the date format we want)
    val formatter = new SimpleDateFormat("yyyy-MM-dd-hh.mm.ss")

    // (3) create a new String using the date format we want
    val folderName = formatter.format(today)
    return folderName
  }

  def cosineSimilarity(vec1: DoubleMatrix, vec2: DoubleMatrix): Double = {
    vec1.dot(vec2) / (vec1.norm2() * vec2.norm2())
  }

  def avgPrecisionK(actual: Seq[Int], predicted: Seq[Int], k: Int): Double = {
    val predK = predicted.take(k)
    var score = 0.0
    var numHits = 0.0
    for ((p, i) <- predK.zipWithIndex) {
      if (actual.contains(p)) {
        numHits += 1.0
        score += numHits / (i.toDouble + 1.0)
      }
    }
    if (actual.isEmpty) {
      1.0
    } else {
      score / scala.math.min(actual.size, k).toDouble
    }
  }

} 
Example 51
Source File: PMMLModelExport.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.{Date, Locale}

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
} 
Example 52
Source File: JacksonMessageWriter.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, Locale, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
} 
Example 53
Source File: SimpleDateParam.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.{Locale, TimeZone}
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
} 
Example 54
Source File: PlainText.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils.meta

import java.text.SimpleDateFormat
import java.util.Calendar
import java.util.TimeZone

import org.clulab.wm.eidos.utils.EidosException
import org.clulab.timenorm.scate.SimpleInterval
import org.clulab.wm.eidos.context.DCT
import org.clulab.wm.eidos.document.Metadata

class PlainText(text: String,
  titleOpt: Option[String] = None,
  idOpt: Option[String] = None,
  dateOpt: Option[String] = None,
  locationOpt: Option[String] = None
) extends EidosText {
  protected val metadata = {
    val dctOpt: Option[DCT] = {
      dateOpt.map { date =>
        val calendar = try {
          val parsed = PlainText.dateFormat.parse(date)
          val calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"))

          calendar.setTime(parsed)
          calendar
        }
        catch {
          case throwable: Throwable =>
            throw new EidosException(s"""Could not decipher "${date}" as a date""", throwable)
        }

        val simpleInterval = SimpleInterval.of(calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH) + 1, calendar.get(Calendar.DAY_OF_MONTH))
        DCT(simpleInterval, date)
      }
    }

    new Metadata(dctOpt, idOpt, titleOpt, locationOpt)
  }

  def getText: String = text

  def getMetadata: Metadata = metadata
}

object PlainText {
  protected val dateFormat: SimpleDateFormat = {
    val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
    val timeZone = TimeZone.getTimeZone("UTC")

    dateFormat.setTimeZone(timeZone)
    dateFormat
  }
} 
Example 55
Source File: CustomScalarSpec.scala    From sangria   with Apache License 2.0 5 votes vote down vote up
package sangria.schema

import java.text.SimpleDateFormat
import java.util.Date

import sangria.ast
import sangria.util.Pos
import sangria.util.SimpleGraphQlSupport._
import sangria.validation.ValueCoercionViolation

import scala.util.{Failure, Success, Try}
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

class CustomScalarSpec extends AnyWordSpec with Matchers {
  "Schema" should {
    "allow to define custom scalar types" in {
      val dateFormat = new SimpleDateFormat("yyyy-MM-dd")

      case object DateCoercionViolation extends ValueCoercionViolation("Date value expected")

      def parseDate(s: String) = Try(dateFormat.parse(s)) match {
        case Success(d) => Right(d)
        case Failure(error) => Left(DateCoercionViolation)
      }

      val DateType = ScalarType[Date]("Date",
        description = Some("An example of date scalar type"),
        coerceOutput = (d, _) => dateFormat.format(d),
        coerceUserInput = {
          case s: String => parseDate(s)
          case _ => Left(DateCoercionViolation)
        },
        coerceInput = {
          case ast.StringValue(s, _, _, _, _) => parseDate(s)
          case _ => Left(DateCoercionViolation)
        })

      val DateArg = Argument("dateInput", DateType)

      val QueryType = ObjectType("Query", fields[Unit, Unit](
        Field("foo", DateType,
          arguments = DateArg :: Nil,
          resolve = ctx => {
            val date: Date = ctx.arg(DateArg)
            new Date(date.getTime + 1000 * 60 * 60 * 24 * 5)
          })
      ))

      val schema = Schema(QueryType)

      check(schema, (),
        """
          {
            foo(dateInput: "2015-05-11")
          }
        """,
        Map("data" -> Map("foo" -> "2015-05-16"))
      )

      checkContainsErrors(schema, (),
        """
          {
            foo(dateInput: "2015-05-test")
          }
        """,
        null,
        List("""Expected type 'Date!', found '"2015-05-test"'. Date value expected""" -> List(Pos(3, 28)))
      )
    }
  }
} 
Example 56
Source File: CarbonLoadParams.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.management

import java.text.SimpleDateFormat
import java.util

import scala.collection.mutable

import org.apache.hadoop.conf.Configuration
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.command.UpdateTableModel
import org.apache.spark.sql.execution.datasources.LogicalRelation

import org.apache.carbondata.core.indexstore.PartitionSpec
import org.apache.carbondata.core.statusmanager.SegmentStatus
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.events.OperationContext
import org.apache.carbondata.processing.loading.model.CarbonLoadModel


case class CarbonLoadParams(
    sparkSession: SparkSession,
    tableName: String,
    sizeInBytes: Long,
    isOverwriteTable: Boolean,
    carbonLoadModel: CarbonLoadModel,
    hadoopConf: Configuration,
    logicalPartitionRelation: LogicalRelation,
    dateFormat : SimpleDateFormat,
    timeStampFormat : SimpleDateFormat,
    optionsOriginal: Map[String, String],
    finalPartition : Map[String, Option[String]],
    currPartitions: util.List[PartitionSpec],
    partitionStatus : SegmentStatus,
    var dataFrame: Option[DataFrame],
    scanResultRDD : Option[RDD[InternalRow]],
    updateModel: Option[UpdateTableModel],
    operationContext: OperationContext) {
} 
Example 57
Source File: TestUpdateAndDeleteWithLargeData.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.testsuite.iud

import java.text.SimpleDateFormat

import org.apache.spark.sql.test.util.QueryTest
import org.apache.spark.sql.{DataFrame, Row, SaveMode}
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties

class TestUpdateAndDeleteWithLargeData extends QueryTest with BeforeAndAfterAll {
  var df: DataFrame = _

  override def beforeAll {
    dropTable()
    buildTestData()
  }

  private def buildTestData(): Unit = {

    CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd")

    // Simulate data and write to table orders
    import sqlContext.implicits._

    val sdf = new SimpleDateFormat("yyyy-MM-dd")
    df = sqlContext.sparkSession.sparkContext.parallelize(1 to 1500000)
      .map(value => (value, new java.sql.Date(sdf.parse("2015-07-" + (value % 10 + 10)).getTime),
        "china", "aaa" + value, "phone" + 555 * value, "ASD" + (60000 + value), 14999 + value,
        "ordersTable" + value))
      .toDF("o_id", "o_date", "o_country", "o_name",
        "o_phonetype", "o_serialname", "o_salary", "o_comment")
    createTable()

  }

  private def createTable(): Unit = {
    df.write
      .format("carbondata")
      .option("tableName", "orders")
      .option("tempCSV", "true")
      .option("compress", "true")
      .mode(SaveMode.Overwrite)
      .save()
  }

  private def dropTable() = {
    sql("DROP TABLE IF EXISTS orders")

  }

  test("test the update and delete delete functionality for large data") {

    sql(
      """
            update ORDERS set (o_comment) = ('yyy')""").show()
    checkAnswer(sql(
      """select o_comment from orders limit 2 """), Seq(Row("yyy"), Row("yyy")))

    sql("delete from orders where exists (select 1 from orders)")

    checkAnswer(sql(
      """
           SELECT count(*) FROM orders
           """), Row(0))
  }

} 
Example 58
Source File: RowStreamParserImp.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.streaming.parser

import java.text.SimpleDateFormat
import java.util

import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.processing.loading.ComplexDelimitersEnum
import org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants


class RowStreamParserImp extends CarbonStreamParser {

  var configuration: Configuration = null
  var isVarcharTypeMapping: Array[Boolean] = null
  var structType: StructType = null
  var encoder: ExpressionEncoder[Row] = null

  var timeStampFormat: SimpleDateFormat = null
  var dateFormat: SimpleDateFormat = null
  var complexDelimiters: util.ArrayList[String] = new util.ArrayList[String]()
  var serializationNullFormat: String = null

  override def initialize(configuration: Configuration,
      structType: StructType, isVarcharTypeMapping: Array[Boolean]): Unit = {
    this.configuration = configuration
    this.structType = structType
    this.encoder = RowEncoder.apply(this.structType).resolveAndBind()
    this.isVarcharTypeMapping = isVarcharTypeMapping

    this.timeStampFormat = new SimpleDateFormat(
      this.configuration.get(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT))
    this.dateFormat = new SimpleDateFormat(
      this.configuration.get(CarbonCommonConstants.CARBON_DATE_FORMAT))
    this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_1"))
    this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_2"))
    this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_3"))
    this.complexDelimiters.add(ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value())
    this.serializationNullFormat =
      this.configuration.get(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT)
  }

  override def parserRow(value: InternalRow): Array[Object] = {
    this.encoder.fromRow(value).toSeq.zipWithIndex.map { case (x, i) =>
      FieldConverter.objectToString(
        x, serializationNullFormat, complexDelimiters,
        timeStampFormat, dateFormat,
        isVarcharType = i < this.isVarcharTypeMapping.length && this.isVarcharTypeMapping(i),
        binaryCodec = null)
    } }.toArray

  override def close(): Unit = {
  }

} 
Example 59
Source File: Commons.scala    From spark-structured-streaming   with MIT License 5 votes vote down vote up
package com.kafkaToSparkToCass


import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}

object Commons {

  case class UserEvent(user_id: String, time: Timestamp, event: String)
      extends Serializable

  def getTimeStamp(timeStr: String): Timestamp = {
    val dateFormat1: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val dateFormat2: DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")

    val date: Option[Timestamp] = {
      try {
        Some(new Timestamp(dateFormat1.parse(timeStr).getTime))
      } catch {
        case e: java.text.ParseException =>
          Some(new Timestamp(dateFormat2.parse(timeStr).getTime))
      }
    }
    date.getOrElse(Timestamp.valueOf(timeStr))
  }

} 
Example 60
Source File: DateUtils.scala    From common4s   with Apache License 2.0 5 votes vote down vote up
package commons.mapper.utils

import java.text.{ ParseException, ParsePosition, SimpleDateFormat }
import java.util.Date


	def parseDateWithLeniency(str : String, parsePatterns : Array[String], lenient : Boolean) : Date = {
		if (str == null || parsePatterns == null) {
			throw new IllegalArgumentException("Date and Patterns must not be null");
		}

		val parser = new SimpleDateFormat();
		parser.setLenient(lenient);
		val pos = new ParsePosition(0);

		for (parsePattern <- parsePatterns) {

			var pattern = parsePattern;

			// LANG-530 - need to make sure 'ZZ' output doesn't get passed to SimpleDateFormat
			if (parsePattern.endsWith("ZZ")) {
				pattern = pattern.substring(0, pattern.length() - 1);
			}

			parser.applyPattern(pattern);
			pos.setIndex(0);

			var str2 = str;
			// LANG-530 - need to make sure 'ZZ' output doesn't hit SimpleDateFormat as it will ParseException
			if (parsePattern.endsWith("ZZ")) {
				str2 = str.replaceAll("([-+][0-9][0-9]):([0-9][0-9])$", "$1$2");
			}

			val date = parser.parse(str2, pos);
			if (date != null && pos.getIndex() == str2.length()) {
				return date;
			}
		}

		throw new ParseException("Unable to parse the date: " + str, -1);
	}
} 
Example 61
Source File: JsonUtil.scala    From ionroller   with MIT License 5 votes vote down vote up
package ionroller

import java.text.SimpleDateFormat

import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
import play.api.libs.json.Json

object JsonUtil {

  object Implicits {
    implicit class Unmarshallable(unMarshallMe: String) {
      def toMap: Map[String, Any] = JsonUtil.toMap(unMarshallMe)

      def toMapOf[V]()(implicit m: Manifest[V]): Map[String, V] = JsonUtil.toMap[V](unMarshallMe)

      def fromJson[T]()(implicit m: Manifest[T]): T = JsonUtil.fromJson[T](unMarshallMe)
    }

    implicit class Marshallable[T](marshallMe: T) {
      def toJson: String = JsonUtil.toJson(marshallMe)

      def toJsonValue = Json.parse(JsonUtil.toJson(marshallMe))
    }
  }

  val mapper = new ObjectMapper() with ScalaObjectMapper
  mapper.registerModule(DefaultScalaModule)
  mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
  mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"))

  def toJson(value: Map[Symbol, Any]): String = {
    toJson(value map { case (k, v) => k.name -> v })
  }

  def toJson(value: Any): String = {
    mapper.writeValueAsString(value)
  }

  def toMap[V](json: String)(implicit m: Manifest[V]) = fromJson[Map[String, V]](json)

  def fromJson[T](json: String)(implicit m: Manifest[T]): T = {
    mapper.readValue[T](json)
  }
} 
Example 62
Source File: DateUtil.scala    From real-time-stream-processing-engine   with Apache License 2.0 5 votes vote down vote up
package com.knoldus.streaming.util

import java.text.SimpleDateFormat
import java.util.Date


object DateUtil {


  private val dateFormats = List(
    "yyyyMMdd'T'HHmmss.SSSZ",
    "EEE, dd MMM yyyy HH:mm:ss Z",
    "yyyy-MM-dd HH:mm:ss",
    "EEE MMM dd HH:mm:ss Z yyyy",
    "MMM dd, yyyy, HH:mm a",
    "MMM dd, yyyy HH:mm a",
    "yyyy-MM-dd'T'HH:mm:ss",
    "dd MMM yyyy HH:mm:ss:S Z",
    "E MMM dd HH:mm:ss z yyyy",
    "dd MMM yyyy HH:mm:ss:SSS",
    "dd MMM yyyy H:mm:ss:SSS",
    "MM-dd-yyyy HH:mm:ss:SSS",
    "MM/dd/yyyy HH:mm:ss:SSS",
    "dd/MM/yyyy HH:mm:ss:SSS",
    "dd-MM-yyyy HH:mm:ss:SSS",
    "MMM/dd/yyyy HH:mm:ss:SSS",
    "MMM-dd-yyyy HH:mm:ss:SSS",
    "dd-MMM-yyyy HH:mm:ss:SSS",
    "MM-dd-yyyy H:mm:ss:SSS",
    "MM/dd/yyyy H:mm:ss:SSS",
    "dd/MM/yyyy H:mm:ss:SSS",
    "dd-MM-yyyy H:mm:ss:SSS",
    "MMM/dd/yyyy H:mm:ss:SSS",
    "MMM-dd-yyyy H:mm:ss:SSS",
    "dd-MMM-yyyy H:mm:ss:SSS",
    "MM-dd-yyyy HH:mm:ss",
    "MM/dd/yyyy HH:mm:ss",
    "dd/MM/yyyy HH:mm:ss",
    "dd-MM-yyyy HH:mm:ss",
    "MMM/dd/yyyy HH:mm:ss",
    "MMM-dd-yyyy HH:mm:ss",
    "dd-MMM-yyyy HH:mm:ss",
    "MM-dd-yyyy H:mm:ss",
    "MM/dd/yyyy H:mm:ss",
    "dd/MM/yyyy H:mm:ss",
    "dd-MM-yyyy H:mm:ss",
    "MMM/dd/yyyy H:mm:ss",
    "MMM-dd-yyyy H:mm:ss",
    "dd-MMM-yyyy H:mm:ss",
    "yyyy-MM-dd",
    "MM-dd-yyyy",
    "MM/dd/yyyy",
    "dd/MM/yyyy",
    "dd-MM-yyyy",
    "MMM/dd/yyyy",
    "MMM-dd-yyyy",
    "dd-MMM-yyyy")


  private val esDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ")

  def getESDateFormat(dateString: String): String = {
    def getDate(dateFormats: Seq[String], dateString: String): String =
      try {
        val dateFormat = new SimpleDateFormat(dateFormats.head)
        val date = dateFormat.parse(dateString)
        esDateFormat.format(date)
      } catch {
        case _ if (dateFormats.size > 1) => getDate(dateFormats.tail, dateString)
        case _: Exception => esDateFormat.format(new Date())
      }
    getDate(dateFormats, dateString)
  }

} 
Example 63
Source File: httpserverplugin_staticfile.scala    From scalabpe   with Apache License 2.0 5 votes vote down vote up
package scalabpe.plugin.http

import java.io.File
import java.net.URLEncoder
import java.text.SimpleDateFormat
import java.util.Calendar
import java.util.GregorianCalendar
import java.util.Locale
import java.util.TimeZone

import scala.collection.mutable.HashMap

import org.jboss.netty.handler.codec.http.HttpHeaders

import scalabpe.core.HashMapStringAny

class StaticFilePlugin extends HttpServerPlugin with HttpServerStaticFilePlugin {

    val ETAG_TAG = "etag"
    val EXPIRE_TAG = "expire"
    val ATTACHMENT = "attachment"
    val FILENAME = "filename"

    val HTTP_DATE_FORMAT = "EEE, dd MMM yyyy HH:mm:ss zzz";
    val HTTP_DATE_GMT_TIMEZONE = "GMT";

    val df_tl = new ThreadLocal[SimpleDateFormat]() {
        override def initialValue(): SimpleDateFormat = {
            val df = new SimpleDateFormat(HTTP_DATE_FORMAT, Locale.US)
            df.setTimeZone(TimeZone.getTimeZone(HTTP_DATE_GMT_TIMEZONE));
            df
        }
    }

    def generateStaticFile(serviceId: Int, msgId: Int, errorCode: Int, errorMessage: String, body: HashMapStringAny, pluginParam: String, headers: HashMap[String, String]): String = {

        if (body.ns(FILENAME) == "") {
            return null
        }

        val filename = body.ns(FILENAME)
        if (!new File(filename).exists()) {
            return null
        }
        if (body.ns(ETAG_TAG) != "") {
            headers.put("ETag", body.ns(ETAG_TAG))
        }

        if (body.ns(EXPIRE_TAG) != "") {
            body.i(EXPIRE_TAG) match {
                case 0 | -1 =>
                    headers.put(HttpHeaders.Names.CACHE_CONTROL, "no-cache")
                case n => // seconds
                    val time = new GregorianCalendar();
                    time.add(Calendar.SECOND, n);
                    headers.put(HttpHeaders.Names.EXPIRES, df_tl.get.format(time.getTime()));
                    headers.put(HttpHeaders.Names.CACHE_CONTROL, "max-age=" + n);
            }
        }

        val ext = parseExt(filename)
        if (ext != "")
            body.put("__file_ext__", ext)

        if (body.ns(ATTACHMENT, "1") == "1") {
            val filename = body.ns(FILENAME)
            val v = "attachment; filename=\"%s\"".format(URLEncoder.encode(parseFilename(filename), "UTF-8"))
            headers.put("Content-Disposition", v)
        }

        filename
    }

    def parseFilename(name: String): String = {
        val p = name.lastIndexOf("/")
        if (p < 0) return name
        name.substring(p + 1)
    }
    def parseExt(name: String): String = {
        val p = name.lastIndexOf(".")
        if (p < 0) return ""
        name.substring(p + 1).toLowerCase()
    }

} 
Example 64
Source File: IlluminaBasecallsToSam.scala    From dagr   with MIT License 5 votes vote down vote up
package dagr.tasks.picard

import java.text.SimpleDateFormat

import dagr.core.execsystem._
import dagr.core.tasksystem.{JvmRanOutOfMemory, VariableResources}
import dagr.tasks.DagrDef.{DirPath, FilePath}
import htsjdk.samtools.util.Iso8601Date
import picard.util.IlluminaUtil.IlluminaAdapterPair

import scala.collection.mutable.ListBuffer

class IlluminaBasecallsToSam(basecallsDir: DirPath,
                             lane: Int,
                             runBarcode: String,
                             readStructure: String,
                             libraryParamsFile: FilePath,
                             runDate: Option[Iso8601Date] = None,
                             sequencingCenter: Option[String] = None,
                             includeNonPfReads: Boolean = false,
                             ignoreUnexpectedBarcodes: Boolean = false,
                             minThreads: Int = 4,
                             maxThreads: Int = 16,
                             adapterPairs: Seq[IlluminaAdapterPair] = Seq(
                               IlluminaAdapterPair.INDEXED,
                               IlluminaAdapterPair.DUAL_INDEXED,
                               IlluminaAdapterPair.NEXTERA_V2,
                               IlluminaAdapterPair.FLUIDIGM
                             ),
                             barcodesDir: Option[DirPath] = None,
                             maxReadsInRamPerTile: Option[Int] = Some(500000),
                             firstTile: Option[Int] = None,
                             tileLimit: Option[Int] = None,
                             tmpDir: Option[DirPath] = None
                            ) extends PicardTask with VariableResources with JvmRanOutOfMemory {

  protected val byMemoryPerThread: Memory = Memory("1GB")
  protected var memoryPerThread: Memory = Memory("2GB")

  
  override def pickResources(resources: ResourceSet): Option[ResourceSet] = {
    Range.inclusive(start=maxThreads, end=minThreads, step= -1)
      .flatMap { cores =>
        resources.subset(Cores(cores), Memory(cores * memoryPerThread.value))
      }.headOption
  }

  override protected def addPicardArgs(buffer: ListBuffer[Any]): Unit = {
    buffer += "BASECALLS_DIR=" + basecallsDir
    buffer += "LANE=" + lane
    buffer += "RUN_BARCODE=" + runBarcode
    barcodesDir.foreach(dir => buffer += "BARCODES_DIR=" + dir)
    runDate.foreach(date => buffer += "RUN_START_DATE=" + new SimpleDateFormat("yyyy/MM/dd").format(date))
    buffer += "SEQUENCING_CENTER=" + sequencingCenter.getOrElse("null")
    buffer += "NUM_PROCESSORS=" + resources.cores.toInt
    buffer += "READ_STRUCTURE=" + readStructure.toString
    buffer += "LIBRARY_PARAMS=" + libraryParamsFile
    buffer += "INCLUDE_NON_PF_READS=" + includeNonPfReads
    if (ignoreUnexpectedBarcodes) buffer += "IGNORE_UNEXPECTED_BARCODES=true"
    if (adapterPairs.isEmpty) buffer += "ADAPTERS_TO_CHECK=null"
    else adapterPairs.foreach(buffer += "ADAPTERS_TO_CHECK=" + _)
    maxReadsInRamPerTile.foreach(n => buffer += "MAX_READS_IN_RAM_PER_TILE=" + n)
    firstTile.foreach(buffer += "FIRST_TILE=" + _) // If set, this is the first tile to be processed (used for debugging).
    tileLimit.foreach(buffer += "TILE_LIMIT=" + _) // If set, process no more than this many tiles (used for debugging).
    tmpDir.foreach(tmp => buffer += "TMP_DIR=" + tmp)
  }
} 
Example 65
Source File: StringToTimestampParser.scala    From bandar-log   with Apache License 2.0 5 votes vote down vote up
package com.aol.one.dwh.infra.parser

import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import com.aol.one.dwh.infra.util.{ExceptionPrinter, LogTrait}

import scala.util.control.NonFatal
import scala.util.{Failure, Try}


object StringToTimestampParser extends LogTrait with ExceptionPrinter {

  def parse(value: String, format: String): Option[Long] = {

    Try {
      val dateFormat: DateFormat = new SimpleDateFormat(format)
      dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
      dateFormat.parse(value).getTime
    }.recoverWith {
        case NonFatal(e) =>
          logger.error(s"Could not parse value:[$value] using format:[$format]. Catching exception {}", e.getStringStackTrace)
          Failure(e)
    }.toOption
  }
} 
Example 66
Source File: Bench.scala    From akka-nbench   with Apache License 2.0 5 votes vote down vote up
package bench

import akka.actor._
import akka.pattern.ask
import akka.util.Timeout

import scala.concurrent.duration._
import scala.reflect.runtime.universe._
import scala.concurrent.Await

import com.typesafe.config._
import net.ceedubs.ficus.Ficus._

import java.util.Properties
import java.nio.file._
import java.util.Date
import java.text.SimpleDateFormat
import java.util.Date

import Tapper._

object Bench extends App {

  def prepareOutputDirs(): String = {
    val csvDateTimeDir = FileSystems.getDefault().getPath(
      "tests/" + new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()))
    Files.createDirectories(csvDateTimeDir)
    val csvSymlink = FileSystems.getDefault().getPath("tests/current")
    if(Files.isSymbolicLink(csvSymlink)){
      Files.delete(csvSymlink)
    } else if (Files.exists(csvSymlink)) {
      throw new NotASymbolicLinkException(s"test/current is not a symbolic link. Path: $csvSymlink")
    }
    Files.createSymbolicLink(csvSymlink, csvDateTimeDir.toAbsolutePath)
    csvDateTimeDir.toAbsolutePath.toString
  }

  def parseOptions(): String = {
     val usage = """
         Usage: activator -mem 4096 "run-main bench.Bench scenario_name"
     """
    if (args.length != 1) println(usage)
    return args(0)
  }

  val scenario = parseOptions
  val config = ConfigFactory.load().getConfig(scenario)
  val duration = config.getInt("duration")
  val concurrent = config.getInt("concurrent")
  val csvDateTimeDir = prepareOutputDirs

  val system = ActorSystem("bench")
  val actorProps = Props(classOf[StatsCollector], csvDateTimeDir, config)
  val statsCollector = system.actorOf(actorProps, name = "statscollector")

  val operationsWithRatio: Map[String, Int] = config.as[Map[String, Int]]("operations")
  val numer = operationsWithRatio.values.sum
  if (concurrent < numer){
    val msg = s"concurrent($concurrent) must greater than sum of operations ratio($numer)"
    System.err.println(msg)
    throw new ApplicationConfigException(msg)
  }
  val operations = for((key, value) <- operationsWithRatio) yield {
    List.range(0, concurrent * operationsWithRatio(key) / numer).map(_ => key)
  }

  implicit val timeout = Timeout(duration * 2, SECONDS)
  var driverClz = Class.forName(config.getString("driver"))
  val drivers = operations.flatten.zipWithIndex.map{ case (operation, i) =>
    system.actorOf(Props(driverClz, operation, statsCollector, config).withDispatcher("my-dispatcher"), name = s"driver_$i")
  }

  drivers.par.map(actor => actor ? Ready()).foreach{ f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  val startAt = new Date()
  val doUntil = new Date(startAt.getTime + duration * 1000)
  drivers.par.map(actor => actor ? Go(doUntil)).foreach { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  (statsCollector ? TearDown()).tap { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  drivers.par.map(actor => actor ? TearDown()).foreach { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  (drivers.head ? TearDown()).tap { f =>
    Await.result(f, timeout.duration).asInstanceOf[OK]
  }

  system.awaitTermination()
} 
Example 67
Source File: Utils.scala    From graphcool-framework   with Apache License 2.0 5 votes vote down vote up
package cool.graph.rabbit

import java.text.SimpleDateFormat
import java.util.{Date, UUID}
import java.util.concurrent.ThreadFactory
import java.util.concurrent.atomic.AtomicLong

object Utils {
  def timestamp: String = {
    val formatter = new SimpleDateFormat("HH:mm:ss.SSS-dd.MM.yyyy")
    val now       = new Date()
    formatter.format(now)
  }

  def timestampWithRandom: String = timestamp + "-" + UUID.randomUUID()

  def newNamedThreadFactory(name: String): ThreadFactory = new ThreadFactory {
    val count = new AtomicLong(0)

    override def newThread(runnable: Runnable): Thread = {
      val thread = new Thread(runnable)
      thread.setName(s"$name-" + count.getAndIncrement)
      thread.setDaemon(true)
      thread
    }
  }
} 
Example 68
Source File: LDBCRouter.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.ldbc.routers

import java.text.SimpleDateFormat
import java.util.Date

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.EdgeAdd
import com.raphtory.core.model.communication.EdgeDelete
import com.raphtory.core.model.communication.Type
import com.raphtory.core.model.communication.VertexAdd
import com.raphtory.core.model.communication.VertexDelete
import com.raphtory.examples.random.actors.RandomSpout

class LDBCRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {
  override protected def parseTuple(value: Any): Unit = {

    val fileLine           = value.asInstanceOf[String].split("\\|")
    val date               = fileLine(1).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
    val date2              = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
    val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime()
    val deletionDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date2).getTime()
    val vertexDeletion = sys.env.getOrElse("LDBC_VERTEX_DELETION", "false").trim.toBoolean
    val edgeDeletion = sys.env.getOrElse("LDBC_EDGE_DELETION", "false").trim.toBoolean
    fileLine(0) match {
      case "person" =>
        sendGraphUpdate(VertexAdd(creationDate, assignID("person" + fileLine(3)), Type("person")))
        //sendGraphUpdate(VertexAdd(creationDate, fileLine(3).toLong,Type("person")))
        if(vertexDeletion)
          sendGraphUpdate(VertexDelete(deletionDate, assignID("person" + fileLine(3))))
      case "person_knows_person" =>
        //sendGraphUpdate(EdgeAdd(creationDate, fileLine(3).toLong,fileLine(4).toLong,Type("person_knows_person")))
        sendGraphUpdate(
                EdgeAdd(
                        creationDate,
                        assignID("person" + fileLine(3)),
                        assignID("person" + fileLine(4)),
                        Type("person_knows_person")
                )
        )
        if(edgeDeletion)
          sendGraphUpdate(EdgeDelete(deletionDate, assignID("person"+fileLine(3)),assignID("person"+fileLine(4))))
    }
  }
}
//2012-11-01T09:28:01.185+00:00|2019-07-22T11:24:24.362+00:00|35184372093644|Jose|Garcia|female|1988-05-20|111.68.47.44|Firefox 
Example 69
Source File: LDBCOldRouter.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.ldbc.routers

import java.text.SimpleDateFormat
import java.util.Date

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.EdgeAdd
import com.raphtory.core.model.communication.EdgeDelete
import com.raphtory.core.model.communication.Type
import com.raphtory.core.model.communication.VertexAdd
import com.raphtory.core.model.communication.VertexDelete

class LDBCOldRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {
  override protected def parseTuple(value: Any): Unit = {

    val fileLine = value.asInstanceOf[String].split("\\|")

    //val deletionDate:Long  = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date2).getTime()
    fileLine(0) match {
      case "person" =>
        val date = fileLine(6).substring(0, 10) + fileLine(5).substring(11, 23); //extract the day of the event
        //val date2 = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
        val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime()
        sendGraphUpdate(VertexAdd(creationDate, assignID("person" + fileLine(1)), Type("person")))
      //sendGraphUpdate(VertexAdd(creationDate, fileLine(3).toLong,Type("person")))
      //    sendGraphUpdate(VertexDelete(deletionDate, assignID("person"+fileLine(3))))
      case "person_knows_person" =>
        val date = fileLine(3).substring(0, 10) + fileLine(3).substring(11, 23); //extract the day of the event
        //val date2 = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event
        val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime()
        //sendGraphUpdate(EdgeAdd(creationDate, fileLine(3).toLong,fileLine(4).toLong,Type("person_knows_person")))
        sendGraphUpdate(
                EdgeAdd(
                        creationDate,
                        assignID("person" + fileLine(1)),
                        assignID("person" + fileLine(2)),
                        Type("person_knows_person")
                )
        )
      //sendGraphUpdate(EdgeDelete(deletionDate, assignID("person"+fileLine(3)),assignID("person"+fileLine(4))))
    }
  }
}
//2012-11-01T09:28:01.185+00:00|2019-07-22T11:24:24.362+00:00|35184372093644|Jose|Garcia|female|1988-05-20|111.68.47.44|Firefox 
Example 70
Source File: rumourInteractRouter.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.twitterRumour

import java.text.SimpleDateFormat

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication._
import spray.json._

import scala.io.Source

class rumourInteractRouter(override val routerId: Int,override val workerID:Int, val initialManagerCount: Int) extends RouterWorker {

  override protected def parseTuple(cmd: Any): Unit = {
    //println("im at router top...")
    val List(r_status, tweet) = cmd.asInstanceOf[String].split("__").toList
    val json                  = Source.fromFile(tweet)
    for (line <- json.getLines) {
      // println("reading json"+cmd)
      var user = line.parseJson.asJsObject.fields("user").asJsObject
      val post = line.parseJson.asJsObject

      val replyTime = post.fields("created_at").toString.toString.split("\"")(1)
      val source    = user.fields("id").toString
      //      if (source.toLong <0){println("this is converting worng.."+source)
      //      sys.exit()}
      val dist = post.fields("in_reply_to_user_id").toString

      if (dist != "null")
        sendGraphUpdate(
                EdgeAddWithProperties(
                        getTwitterDate(replyTime),
                        source.toLong,
                        dist.toLong,
                        properties = Properties(ImmutableProperty("rumourStatus", r_status))
                )
        )
      else
        sendGraphUpdate(
                VertexAddWithProperties(
                        getTwitterDate(replyTime),
                        source.toLong,
                        properties = Properties(ImmutableProperty("rumourStatus", r_status))
                )
        )
    }
  }

  def getTwitterDate(date: String): Long = {
    // println(">>> converting time...")
    val twitter = "EEE MMM dd HH:mm:ss ZZZZZ yyyy"
    val sf      = new SimpleDateFormat(twitter)
    //println(date)
    try
    //println("converted time ///"+t)
    return sf.parse(date).getTime()
    catch {
      case e: Throwable =>
        println("-----time not properly converting" + date)
        sys.exit()
        return 0
    }

  }
} 
Example 71
Source File: ChainalysisABRouter.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.blockchain.routers

import java.text.SimpleDateFormat

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.Type
import com.raphtory.core.model.communication._

class ChainalysisABRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {

  def parseTuple(record: Any): Unit = {
    val dp = formatLine(record.asInstanceOf[String].split(",").map(_.trim))
    val transactionTime = dp.time
    val srcClusterId = assignID(dp.srcCluster.toString)
    val dstClusterId = assignID(dp.dstCluster.toString)
    val transactionId = assignID(dp.txid.toString)
    val btcAmount = dp.amount
    val usdAmount = dp.usd

    sendGraphUpdate(VertexAdd(transactionTime, srcClusterId, Type("Cluster")))
    sendGraphUpdate(VertexAdd(transactionTime, dstClusterId, Type("Cluster")))
    sendGraphUpdate(VertexAdd(transactionTime, transactionId, Type("Transaction")))

    sendGraphUpdate(
      EdgeAddWithProperties(transactionTime,
        srcClusterId,
        transactionId,
        Properties(DoubleProperty("BitCoin", btcAmount), DoubleProperty("USD",usdAmount)),
        Type("Incoming Payment")
      )
    )
    sendGraphUpdate(
      EdgeAddWithProperties(transactionTime,
        transactionId,
        dstClusterId,
        Properties(DoubleProperty("BitCoin", btcAmount), DoubleProperty("USD",usdAmount)),
        Type("Outgoing Payment")
      )
    )

  }

  //converts the line into a case class which has all of the data via the correct name and type
  def formatLine(line: Array[String]): Datapoint =
    Datapoint(
      line(1).toDouble / 100000000, 			//Amount of transaction in BTC
      line(2).toLong, 			            //ID of destination cluster
      line(3).toLong, 			            //ID of source cluster
      line(4).toLong * 1000,            //Time of transaction in seconds (milli in Raph)
      line(5).toLong, 			            //ID of transaction, can be similar for many records
      line(6).toDouble / 100000 			    //Amount of transaction in USD

    )

  def longCheck(data: String): Option[Long] = if (data equals "") None else Some(data.toLong)


  case class Datapoint(
                        amount: Double,       //Amount of transaction in Satoshi
                        dstCluster: Long,   //ID of destination cluster
                        srcCluster: Long,   //ID of source cluster
                        time: Long,         //Time of transaction in seconds
                        txid: Long,          //ID of transaction, can be similar for many records
                        usd: Double          //Amount of transaction in USD
                      )
} 
Example 72
Source File: OutDegree.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.random.depricated

import java.text.SimpleDateFormat
import java.util.Date

import com.raphtory.core.analysis.API.Analyser
import com.raphtory.core.utils.Utils

import scala.collection.mutable.ArrayBuffer

class OutDegree(args:Array[String]) extends Analyser(args){

  override def analyse(): Unit = {
    var results = ArrayBuffer[Int]()
    proxy.getVerticesSet().foreach { v =>
      val vertex     = proxy.getVertex(v._2)
      val totalEdges = vertex.getOutgoingNeighbors.size
      //  println("Total edges for V "+v+" "+vertex.getOutgoingNeighbors + " "+vertex.getIngoingNeighbors )
      results += totalEdges
    }
    // println("THIS IS HOW RESULTS LOOK: "+ results.groupBy(identity).mapValues(_.size))
    results.groupBy(identity).mapValues(_.size).toList
  }
  override def setup(): Unit = {}

  override def defineMaxSteps(): Int = 1

  override def processResults(results: ArrayBuffer[Any], timeStamp: Long, viewCompleteTime: Long): Unit = {}

  override def processViewResults(results: ArrayBuffer[Any], timestamp: Long, viewCompleteTime: Long): Unit = {
    val output_file = System.getenv().getOrDefault("GAB_PROJECT_OUTPUT", "/app/defout.csv").trim

    val inputFormat  = new SimpleDateFormat("E MMM dd HH:mm:ss z yyyy")
    val outputFormat = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
    var finalResults = ArrayBuffer[(Int, Int)]()

    for (kv <- results)
      // println("KV RESULTS: " + kv)
      for (pair <- kv.asInstanceOf[List[(Int, Int)]])
        finalResults += pair

    val currentDate   = new Date(timestamp)
    val formattedDate = outputFormat.format(inputFormat.parse(currentDate.toString))
    var degrees       = finalResults.groupBy(_._1).mapValues(seq => seq.map(_._2).reduce(_ + _)).toList.sortBy(_._1) //.foreach(println)
    for ((degree, total) <- degrees) {
      var text = formattedDate + "," + degree + "," + total
      Utils.writeLines(output_file, text, "Date,OutDegree,Total")

    }
  }

  override def processWindowResults(
      results: ArrayBuffer[Any],
      timestamp: Long,
      windowSize: Long,
      viewCompleteTime: Long
  ): Unit = ???

  override def returnResults(): Any = ???
} 
Example 73
Source File: CitationRouter.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.citationNetwork

import java.text.SimpleDateFormat

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication._

class CitationRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker {

  def parseTuple(record: Any): Unit = {

    val fileLine = record.asInstanceOf[String].split(",").map(_.trim)
    //extract the values from the data source in the form of:
    // 0-sourceNode,1-targetNode,2-sourceCitedTargetOn,3-targetCreationDate,4-targetLastCitedOn
    val sourceNode          = fileLine(0).toInt
    val targetNode          = fileLine(1).toInt
    val sourceCitedTargetOn = dateToUnixTime(timestamp = fileLine(2))
    val targetCreationDate  = dateToUnixTime(timestamp = fileLine(3))
    val targetLastCitedOn   = dateToUnixTime(timestamp = fileLine(4))

    //create sourceNode
    sendGraphUpdate(VertexAdd(sourceCitedTargetOn, sourceNode))
    //create destinationNode
    sendGraphUpdate(VertexAdd(targetCreationDate, targetNode))
    //create edge
    sendGraphUpdate(EdgeAdd(sourceCitedTargetOn, sourceNode, targetNode))

    if (sourceCitedTargetOn == targetLastCitedOn)
      sendGraphUpdate(EdgeDelete(targetLastCitedOn, sourceNode, targetNode))

  }

  def dateToUnixTime(timestamp: => String): Long = {
    //if(timestamp == null) return null;
    println(timestamp)
    val sdf = new SimpleDateFormat("dd/MM/yyyy")
    println(sdf)
    val dt = sdf.parse(timestamp)
    println(dt)
    val epoch = dt.getTime()
    println(epoch)
    epoch / 1000

  }
} 
Example 74
Source File: S3MigrationHandlerBase.scala    From flyway-awslambda   with MIT License 5 votes vote down vote up
package crossroad0201.aws.flywaylambda

import java.text.SimpleDateFormat
import java.util.Date

import com.amazonaws.services.lambda.runtime.Context
import com.amazonaws.services.s3.AmazonS3
import crossroad0201.aws.flywaylambda.deploy.{FlywayDeployment, S3SourceFlywayDeployer}
import crossroad0201.aws.flywaylambda.migration.{FlywayMigrator, MigrationInfo, MigrationResult}
import spray.json.DefaultJsonProtocol

import scala.util.Try

object MigrationResultProtocol extends DefaultJsonProtocol {
  import spray.json._

  implicit val DateFormat = new RootJsonFormat[Date] {
    override def write(value: Date): JsValue = if (value == null) JsNull else JsString(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").format(value))
    override def read(json: JsValue): Date = ???
  }
  implicit val migrationInfoFormat = jsonFormat6(MigrationInfo.apply)
  implicit val migrationResultFormat = jsonFormat5(MigrationResult.apply)
}

trait S3MigrationHandlerBase extends FlywayMigrator {

  type ResultJson = String
  type ResultStoredPath = String

  protected def migrate(bucketName: String, prefix: String, flywayConfFileName: String = "flyway.conf")(implicit context: Context, s3Client: AmazonS3): Try[ResultJson] = {
    val logger = context.getLogger

    def resultJson(result: MigrationResult): ResultJson = {
      import MigrationResultProtocol._
      import spray.json._

      result.toJson.prettyPrint
    }

    def storeResult(deployment: FlywayDeployment, result: MigrationResult): ResultStoredPath = {
      val jsonPath = s"${deployment.sourcePrefix}/migration-result.json"
      s3Client.putObject(deployment.sourceBucket, jsonPath, resultJson(result))
      jsonPath
    }

    for {
      // Deploy Flyway resources.
      d <- new S3SourceFlywayDeployer(s3Client, bucketName, prefix, flywayConfFileName).deploy
      _ = {
        logger.log(
          s"""--- Flyway configuration ------------------------------------
             |flyway.url      = ${d.url}
             |flyway.user     = ****
             |flyway.password = ****
             |
             |SQL locations   = ${d.location}
             |SQL files       = ${d.sqlFiles.mkString(", ")}
             |-------------------------------------------------------------
              """.stripMargin)
      }

      // Migrate DB.
      r = migrate(d)
      _ = {
        logger.log(s"${r.message}!. ${r.appliedCount} applied.")
        r.infos.foreach { i =>
          logger.log(s"Version=${i.version}, Type=${i.`type`}, State=${i.state} InstalledAt=${i.installedAt} ExecutionTime=${i.execTime} Description=${i.description}")
        }
      }

      // Store migration result.
      storedPath = storeResult(d, r)
      _ = logger.log(s"Migration result stored to $bucketName/$storedPath.")

    } yield resultJson(r)
  }

} 
Example 75
Source File: CliLogger.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.jnsaf.native_statistics

import java.io.{File, FileWriter, PrintWriter}
import java.text.SimpleDateFormat
import java.util.Date

 
object CliLogger {
  
  def timeStamp = new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date)
  
  def outPrint(s : String) {
    scala.Console.out.print(s)
    scala.Console.out.flush()
  }

  def outPrintln(s : String) {
    scala.Console.out.println(s)
    scala.Console.out.flush()
  }

  def outPrintln() {
    scala.Console.out.println()
    scala.Console.out.flush()
  }

  def errPrintln(s : String) {
    scala.Console.err.println(s)
    scala.Console.err.flush()
  }

  def errPrintln() {
    scala.Console.err.println()
    scala.Console.err.flush()
  }
  
  def logError(dir: File, text: String, e: Throwable) {
    outPrintln()
    errPrintln(text + e.getMessage)
    val f = new File(dir, ".errorlog")
    f.getParentFile.mkdirs
    val fw = new FileWriter(f)
    try {
      val pw = new PrintWriter(fw)
      pw.println("An error occurred on " + timeStamp)
      e.printStackTrace(pw)
      fw.close()
      outPrintln("Written: " + f.getAbsolutePath)
    } catch {
      case e : Throwable =>
        errPrintln("Error: " + e.getMessage)
    }
  }
} 
Example 76
Source File: CliLogger.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.saf.cli.util

import java.io.{File, FileWriter, PrintWriter}
import java.text.SimpleDateFormat
import java.util.Date

 
object CliLogger {
  
  def timeStamp: String = new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date)
  
  def outPrint(s : String) {
    scala.Console.out.print(s)
    scala.Console.out.flush()
  }

  def outPrintln(s : String) {
    scala.Console.out.println(s)
    scala.Console.out.flush()
  }

  def outPrintln() {
    scala.Console.out.println()
    scala.Console.out.flush()
  }

  def errPrintln(s : String) {
    scala.Console.err.println(s)
    scala.Console.err.flush()
  }

  def errPrintln() {
    scala.Console.err.println()
    scala.Console.err.flush()
  }
  
  def logError(dir: File, text: String, e: Throwable) {
    outPrintln()
    errPrintln(text + e.getMessage)
    val f = new File(dir, ".errorlog")
    f.getParentFile.mkdirs
    val fw = new FileWriter(f)
    try {
      val pw = new PrintWriter(fw)
      pw.println("An error occurred on " + timeStamp)
      e.printStackTrace(pw)
      fw.close()
      outPrintln("Written: " + f.getAbsolutePath)
    } catch {
      case e : Throwable =>
        errPrintln("Error: " + e.getMessage)
    }
  }
} 
Example 77
Source File: StatsSender.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.stats

import java.net.{DatagramPacket, DatagramSocket, InetAddress}
import java.util.Calendar
import java.text.SimpleDateFormat
import akka.actor.{Actor, ActorSystem, Props}
import akka.actor.Actor.Receive


case class Message(msg: String, host: String, port: Int)

class SenderActor extends Actor {
  private val dsocket = new DatagramSocket()

  sys.addShutdownHook {
    dsocket.close()
  }

  override def receive: Receive = {
    case Message(msg, host, port) =>
      val address = InetAddress.getByName(host)
      val packet = new DatagramPacket(msg.getBytes(), msg.length, address, port)
      dsocket.send(packet)
  }
}

class StatsSender(path: String, host: String = "localhost", port: Int = 8125) {

  object Sender {
    val system = ActorSystem("mySystem")
    val actor = system.actorOf(Props[SenderActor], "SenderActor")
    def send(message: String) {
      actor ! Message(message, host, port)
    }

  }

  private def getCurrentTimeStr: String = {
    val now = Calendar.getInstance().getTime()
    val dateFormat = new SimpleDateFormat("ddMMyyyy_hhmm")
    dateFormat.format(now)
  }

  private def getMachineName: String = {
    java.net.InetAddress.getLocalHost().getHostName().split('.')(0)
  }

  private def getName(p: String, action: String): String = {
    p.replace("{MachineName}", getMachineName).replace("{DateTime}", getCurrentTimeStr) + "." + action
      .replace(".", "-")
      .replace(" ", "_")
  }

  def sendCounts(action: String, num: Int) {
    val message = getName(path, action) + ":" + num + "|c"
    Sender.send(message)
  }

  def sendTimings(action: String, num: Int) {
    val message = getName(path, action) + ":" + num + "|ms"
    Sender.send(message)
  }

  def sendGauges(action: String, num: Int) {
    val message = getName(path, action) + ":" + num + "|g"
    Sender.send(message)
  }

  def sendSets(action: String) {
    val message = getName(path, action) + "|s"
    Sender.send(message)
  }
} 
Example 78
Source File: IotMessageConverterTest.scala    From toketi-kafka-connect-iothub   with MIT License 5 votes vote down vote up
// Copyright (c) Microsoft. All rights reserved.

package com.microsoft.azure.iot.kafka.connect.source

import java.text.SimpleDateFormat
import java.time.Instant

import com.microsoft.azure.eventhubs.impl.AmqpConstants
import com.microsoft.azure.iot.kafka.connect.source.testhelpers.DeviceTemperature
import org.apache.kafka.connect.data.Struct
import org.json4s.jackson.Serialization._
import org.scalatest.{FlatSpec, GivenWhenThen}

import scala.collection.mutable
import scala.util.Random

class IotMessageConverterTest extends FlatSpec with GivenWhenThen with JsonSerialization {

  private val random: Random = new Random

  "IotMessage Converter" should "populate right values for kafka message struct fields" in {

    Given("IotMessage object")
    val deviceTemp = DeviceTemperature(100.01, "F")
    val deviceTempStr = write(deviceTemp)

    val sequenceNumber = random.nextLong()
    val correlationId = random.nextString(10)
    val offset = random.nextString(10)
    val enqueuedDate = new SimpleDateFormat("MM/dd/yyyy").parse("12/01/2016")
    val systemProperties = mutable.Map[String, Object](
      "iothub-connection-device-id" → "device10",
      AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME → sequenceNumber.asInstanceOf[Object],
      AmqpConstants.AMQP_PROPERTY_CORRELATION_ID → correlationId,
      AmqpConstants.OFFSET_ANNOTATION_NAME → offset,
      AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME → enqueuedDate)

    val timestamp = Instant.now().toString
    val messageProperties = mutable.Map[String, Object](
      "timestamp" → timestamp,
      "contentType" → "temperature"
    )

    val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties)

    When("getIotMessageStruct is called with IotMessage object")
    val kafkaMessageStruct: Struct = IotMessageConverter.getIotMessageStruct(iotMessage)

    Then("The struct has all the expected properties")
    assert(kafkaMessageStruct.getString("deviceId") == "device10")
    assert(kafkaMessageStruct.getString("offset") == offset)
    assert(kafkaMessageStruct.getString("contentType") == "temperature")
    assert(kafkaMessageStruct.getString("enqueuedTime") == enqueuedDate.toInstant.toString)
    assert(kafkaMessageStruct.getInt64("sequenceNumber") == sequenceNumber)
    assert(kafkaMessageStruct.getString("content") == deviceTempStr)

    val structSystemProperties = kafkaMessageStruct.getMap[String, String]("systemProperties")
    assert(structSystemProperties != null)
    assert(structSystemProperties.size == 1)
    assert(structSystemProperties.get(AmqpConstants.AMQP_PROPERTY_CORRELATION_ID) == correlationId)

    val structProperties = kafkaMessageStruct.getMap[String, String]("properties")
    assert(structProperties != null)
    assert(structProperties.size == 1)
    assert(structProperties.get("timestamp") == timestamp)
  }

  it should "use default values for missing properties" in {

    val deviceTemp = DeviceTemperature(100.01, "F")
    val deviceTempStr = write(deviceTemp)

    val systemProperties = mutable.Map.empty[String, Object]
    val messageProperties = mutable.Map.empty[String, Object]

    val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties)

    When("getIotMessageStruct is called with IotMessage object")
    val kafkaMessageStruct: Struct = IotMessageConverter.getIotMessageStruct(iotMessage)

    Then("The struct has all the expected properties")
    assert(kafkaMessageStruct.getString("deviceId") == "")
    assert(kafkaMessageStruct.getString("offset") == "")
    assert(kafkaMessageStruct.getString("contentType") == "")
    assert(kafkaMessageStruct.getString("enqueuedTime") == "")
    assert(kafkaMessageStruct.getInt64("sequenceNumber") == 0)
    assert(kafkaMessageStruct.getString("content") == deviceTempStr)

    val structSystemProperties = kafkaMessageStruct.getMap[String, String]("systemProperties")
    assert(structSystemProperties != null)
    assert(structSystemProperties.size == 0)

    val structProperties = kafkaMessageStruct.getMap[String, String]("properties")
    assert(structProperties != null)
    assert(structProperties.size == 0)
  }
} 
Example 79
Source File: MockDataReceiver.scala    From toketi-kafka-connect-iothub   with MIT License 5 votes vote down vote up
// Copyright (c) Microsoft. All rights reserved.

package com.microsoft.azure.iot.kafka.connect.source.testhelpers

import java.text.SimpleDateFormat
import java.time.{Duration, Instant}

import com.microsoft.azure.eventhubs.impl.AmqpConstants
import com.microsoft.azure.iot.kafka.connect.source.{DataReceiver, IotMessage, JsonSerialization}
import org.json4s.jackson.Serialization.write

import scala.collection.mutable
import scala.util.Random

class MockDataReceiver(val connectionString: String, val receiverConsumerGroup: String, val partition: String,
    var offset: Option[String], val startTime: Option[Instant], val receiveTimeout: Duration
    ) extends DataReceiver with JsonSerialization {

  private val random: Random = new Random

  override def receiveData(batchSize: Int): Iterable[IotMessage] = {
    val list = scala.collection.mutable.ListBuffer.empty[IotMessage]
    for (i <- 0 until batchSize) {
      list += generateIotMessage(i)
    }
    list
  }

  def generateIotMessage(index: Int): IotMessage = {
    val temp = 70 + random.nextInt(10) + random.nextDouble()
    val deviceTemp = DeviceTemperature(temp, "F")
    val deviceTempStr = write(deviceTemp)

    val systemProperties = mutable.Map[String, Object](
      "iothub-connection-device-id" → s"device$index",
      AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME → index.toLong.asInstanceOf[Object],
      AmqpConstants.AMQP_PROPERTY_CORRELATION_ID → random.nextString(10),
      AmqpConstants.OFFSET_ANNOTATION_NAME → random.nextString(10),
      AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME → new SimpleDateFormat("MM/dd/yyyy").parse("12/01/2016"))

    val messageProperties = mutable.Map[String, Object](
      "timestamp" → Instant.now().toString,
      "contentType" → "temperature"
    )

    val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties)
    iotMessage
  }

  override def close(): Unit = {}
} 
Example 80
Source File: PMMLModelExport.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.{Date, Locale}

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
} 
Example 81
Source File: JacksonMessageWriter.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, Locale, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
} 
Example 82
Source File: SimpleDateParam.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.{Locale, TimeZone}
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
} 
Example 83
Source File: AppAnalyzer.scala    From sparklens   with Apache License 2.0 5 votes vote down vote up
package com.qubole.sparklens.analyzer

import java.util.Date
import java.util.concurrent.TimeUnit

import com.qubole.sparklens.common.AppContext

import scala.collection.mutable.ListBuffer


  def pd(millis: Long) : String = {
    "%02dm %02ds".format(
      TimeUnit.MILLISECONDS.toMinutes(millis),
      TimeUnit.MILLISECONDS.toSeconds(millis) -
        TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))
    )
  }

  def pcm(millis: Long) : String = {
    val millisForMinutes = millis % (60*60*1000)

    "%02dh %02dm".format(
      TimeUnit.MILLISECONDS.toHours(millis),
      TimeUnit.MILLISECONDS.toMinutes(millisForMinutes))
  }

  implicit class PrintlnStringBuilder(sb: StringBuilder) {
    def println(x: Any): StringBuilder = {
      sb.append(x).append("\n")
    }
    def print(x: Any): StringBuilder = {
      sb.append(x)
    }
  }
}

object AppAnalyzer {
  def startAnalyzers(appContext: AppContext): Unit = {
    val list = new ListBuffer[AppAnalyzer]
    list += new SimpleAppAnalyzer
    list += new HostTimelineAnalyzer
    list += new ExecutorTimelineAnalyzer
    list += new AppTimelineAnalyzer
    list += new JobOverlapAnalyzer
    list += new EfficiencyStatisticsAnalyzer
    list += new ExecutorWallclockAnalyzer
    list += new StageSkewAnalyzer


    list.foreach( x => {
      try {
        val output = x.analyze(appContext)
        println(output)
      } catch {
        case e:Throwable => {
          println(s"Failed in Analyzer ${x.getClass.getSimpleName}")
          e.printStackTrace()
        }
      }
    })
  }

} 
Example 84
Source File: PMMLModelExport.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = new PMML

  setHeader(pmml)

  private def setHeader(pmml: PMML): Unit = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application().withName("Apache Spark MLlib").withVersion(version)
    val timestamp = new Timestamp()
      .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .withApplication(app)
      .withTimestamp(timestamp)
    pmml.setHeader(header)
  }
} 
Example 85
Source File: DateUtils.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.sql.Date
import java.text.SimpleDateFormat
import java.util.{Calendar, TimeZone}

import org.apache.spark.sql.catalyst.expressions.Cast


object DateUtils {
  private val MILLIS_PER_DAY = 86400000

  // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
  private val LOCAL_TIMEZONE = new ThreadLocal[TimeZone] {
    override protected def initialValue: TimeZone = {
      Calendar.getInstance.getTimeZone
    }
  }

  private def javaDateToDays(d: Date): Int = {
    millisToDays(d.getTime)
  }

  // we should use the exact day as Int, for example, (year, month, day) -> day
  def millisToDays(millisLocal: Long): Int = {
    ((millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal)) / MILLIS_PER_DAY).toInt
  }

  private def toMillisSinceEpoch(days: Int): Long = {
    val millisUtc = days.toLong * MILLIS_PER_DAY
    millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc)
  }

  def fromJavaDate(date: java.sql.Date): Int = {
    javaDateToDays(date)
  }

  def toJavaDate(daysSinceEpoch: Int): java.sql.Date = {
    new java.sql.Date(toMillisSinceEpoch(daysSinceEpoch))
  }

  def toString(days: Int): String = Cast.threadLocalDateFormat.get.format(toJavaDate(days))

  def stringToTime(s: String): java.util.Date = {
    if (!s.contains('T')) {
      // JDBC escape string
      if (s.contains(' ')) {
        java.sql.Timestamp.valueOf(s)
      } else {
        java.sql.Date.valueOf(s)
      }
    } else if (s.endsWith("Z")) {
      // this is zero timezone of ISO8601
      stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
    } else if (s.indexOf("GMT") == -1) {
      // timezone with ISO8601
      val inset = "+00.00".length
      val s0 = s.substring(0, s.length - inset)
      val s1 = s.substring(s.length - inset, s.length)
      if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
        stringToTime(s0 + "GMT" + s1)
      } else {
        stringToTime(s0 + ".0GMT" + s1)
      }
    } else {
      // ISO8601 with GMT insert
      val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
      ISO8601GMT.parse(s)
    }
  }
} 
Example 86
Source File: JacksonMessageWriter.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
} 
Example 87
Source File: SimpleDateParam.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.text.SimpleDateFormat
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

import scala.util.Try

private[v1] class SimpleDateParam(val originalValue: String) {
  val timestamp: Long = {
    SimpleDateParam.formats.collectFirst {
      case fmt if Try(fmt.parse(originalValue)).isSuccess =>
        fmt.parse(originalValue).getTime()
    }.getOrElse(
      throw new WebApplicationException(
        Response
          .status(Status.BAD_REQUEST)
          .entity("Couldn't parse date: " + originalValue)
          .build()
      )
    )
  }
}

private[v1] object SimpleDateParam {

  val formats: Seq[SimpleDateFormat] = {

    val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
    gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))

    Seq(
      new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz"),
      gmtDay
    )
  }
} 
Example 88
Source File: DataFrameReportPerformanceSpec.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import ai.deepsense.commons.utils.{DoubleUtils, Logging}
import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report()
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
} 
Example 89
Source File: OutputInterceptorFactory.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.outputintercepting

import java.io.File
import java.text.SimpleDateFormat
import java.util.logging._
import java.util.{Calendar, UUID}

import com.google.inject.Inject
import com.google.inject.name.Named
import org.apache.spark.launcher.SparkLauncher

import ai.deepsense.commons.models.ClusterDetails



case class OutputInterceptorHandle private [outputintercepting] (
    private val logger: Logger,
    private val childProcLoggerName: String,
    private val loggerFileHandler: FileHandler ) {

  def attachTo(sparkLauncher: SparkLauncher): Unit = {
    sparkLauncher.setConf(
      "spark.launcher.childProcLoggerName", childProcLoggerName
    )
  }

  def writeOutput(text: String): Unit = {
    logger.info(text)
  }

  def close(): Unit = {
    loggerFileHandler.close()
  }

}

class OutputInterceptorFactory @Inject()(
  @Named("session-executor.spark-applications-logs-dir") val executorsLogDirectory: String
) {

  def prepareInterceptorWritingToFiles(clusterDetails: ClusterDetails): OutputInterceptorHandle = {
    new File(executorsLogDirectory).mkdirs()

    val childProcLoggerName = s"WE-app-${UUID.randomUUID()}"
    val logger = Logger.getLogger(childProcLoggerName)

    val fileName = {
      val time = Calendar.getInstance().getTime()
      // Colons are not allowed in Windows filenames
      val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss")
      val formattedTime = format.format(time)
      val illegalFileNameCharactersRegExp = "[^a-zA-Z0-9.-]"
      s"$formattedTime-${clusterDetails.name.replaceAll(illegalFileNameCharactersRegExp, "_")}.log"
    }
    val fileHandler = new FileHandler(s"$executorsLogDirectory/$fileName")
    fileHandler.setFormatter(new SimpleFormaterWithoutOutputRedirectorNoise)
    logger.addHandler(fileHandler)
    sys.addShutdownHook {
      fileHandler.close()
    }
    OutputInterceptorHandle(logger, childProcLoggerName, fileHandler)
  }

  class SimpleFormaterWithoutOutputRedirectorNoise extends Formatter {

    val simpleFormatter = new SimpleFormatter

    override def format(logRecord: LogRecord): String = {
      val formatted = simpleFormatter.format(logRecord)
      val redirectorNoise = "org.apache.spark.launcher.OutputRedirector redirect\nINFO: "
      val beginningOfRedirectorNoise = formatted.indexOf(redirectorNoise)

      val endOfRedirectorNoise = if (beginningOfRedirectorNoise > 0) {
        beginningOfRedirectorNoise + redirectorNoise.length
      } else {
        0
      }

      formatted.substring(endOfRedirectorNoise)
    }
  }

} 
Example 90
Source File: GeneratorTest.scala    From kafka-connect-kcql-smt   with Apache License 2.0 5 votes vote down vote up
package com.landoop.connect.sql

import java.text.SimpleDateFormat
import java.util.Date

import com.landoop.json.sql.JacksonJson
import com.sksamuel.avro4s.SchemaFor
import org.scalatest.{Matchers, WordSpec}

import scala.util.Random


class GeneratorTest extends WordSpec with Matchers {
  "Generator" should {
    "generate schema" in {
      val sql = Sql.parse("SELECT * FROM `order-topic`")
      val schema = SchemaFor[Product]()
      val str = schema.toString
      println(str)
    }

    "generate data" in {
      val rnd = new Random(System.currentTimeMillis())
      val f = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss.FFF")
      val products = (1 to 4).map { i =>
        Product(i, f.format(new Date()), s"product_$i", Payment(rnd.nextDouble(), i * rnd.nextInt(3), "GBP"))
      }.map(JacksonJson.toJson).mkString(s"${System.lineSeparator()}")
      println(products)
    }
  }

}


case class Product(id: Int,
                   created: String,
                   name: String,
                   payment: Payment)

case class Payment(price: Double,
                   quantity: Int,
                   currency: String) 
Example 91
Source File: RandomListTest.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.testkit

import java.text.SimpleDateFormat

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import com.salesforce.op.testkit.RandomList.{NormalGeolocation, UniformGeolocation}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.language.postfixOps


@RunWith(classOf[JUnitRunner])
class RandomListTest extends FlatSpec with TestCommon with Assertions {
  private val numTries = 10000
  private val rngSeed = 314159214142136L

  private def check[D, T <: OPList[D]](
    g: RandomList[D, T],
    minLen: Int, maxLen: Int,
    predicate: (D => Boolean) = (_: D) => true
  ) = {
    g reset rngSeed

    def segment = g limit numTries

    segment count (_.value.length < minLen) shouldBe 0
    segment count (_.value.length > maxLen) shouldBe 0
    segment foreach (list => list.value foreach { x =>
      predicate(x) shouldBe true
    })
  }

  private val df = new SimpleDateFormat("dd/MM/yy")

  Spec[Text, RandomList[String, TextList]] should "generate lists of strings" in {
    val sut = RandomList.ofTexts(RandomText.countries, 0, 4)
    check[String, TextList](sut, 0, 4, _.length > 0)

    (sut limit 7 map (_.value.toList)) shouldBe
      List(
        List("Madagascar", "Gondal", "Zephyria"),
        List("Holy Alliance"),
        List("North American Union"),
        List("Guatemala", "Estonia", "Kolechia"),
        List(),
        List("Myanmar", "Bhutan"),
        List("Equatorial Guinea")
      )
  }

  Spec[Date, RandomList[Long, DateList]] should "generate lists of dates" in {
    val dates = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000)
    val sut = RandomList.ofDates(dates, 11, 22)
    var d0 = 0L
    check[Long, DateList](sut, 11, 22, d => {
      val d1 = d0
      d0 = d
      d > d1
    })
  }

  Spec[DateTimeList, RandomList[Long, DateTimeList]] should "generate lists of datetimes" in {
    val datetimes = RandomIntegral.datetimes(df.parse("01/01/2017"), 1000, 1000000)
    val sut = RandomList.ofDateTimes(datetimes, 11, 22)
    var d0 = 0L
    check[Long, DateTimeList](sut, 11, 22, d => {
      val d1 = d0
      d0 = d
      d > d1
    })
  }

  Spec[UniformGeolocation] should "generate uniformly distributed geolocations" in {
    val sut = RandomList.ofGeolocations
    val segment = sut limit numTries
    segment foreach (_.value.length shouldBe 3)
  }

  Spec[NormalGeolocation] should "generate geolocations around given point" in {
    for {accuracy <- GeolocationAccuracy.values} {
      val geolocation = RandomList.ofGeolocationsNear(37.444136, 122.163160, accuracy)
      val segment = geolocation limit numTries
      segment foreach (_.value.length shouldBe 3)
    }
  }
} 
Example 92
Source File: RandomIntegralTest.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.testkit

import java.text.SimpleDateFormat

import com.salesforce.op.features.types._
import com.salesforce.op.test.TestCommon
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Assertions, FlatSpec}

import scala.language.postfixOps

@RunWith(classOf[JUnitRunner])
class RandomIntegralTest extends FlatSpec with TestCommon with Assertions {
  private val numTries = 10000
  private val rngSeed = 314159214142135L

  private def check[T <: Integral](
    g: RandomIntegral[T],
    predicate: Long => Boolean = _ => true
  ) = {
    g reset rngSeed

    def segment = g limit numTries

    val numberOfEmpties = segment count (_.isEmpty)

    val expectedNumberOfEmpties = g.probabilityOfEmpty * numTries

    withClue(s"numEmpties = $numberOfEmpties, expected $expectedNumberOfEmpties") {
      math.abs(numberOfEmpties - expectedNumberOfEmpties) < 2 * math.sqrt(numTries) shouldBe true
    }

    val maybeValues = segment filterNot (_.isEmpty) map (_.value)
    val values = maybeValues collect { case Some(s) => s }

    values foreach (x => predicate(x) shouldBe true)

    withClue(s"number of distinct values = ${values.size}, expected:") {
      math.abs(maybeValues.size - values.toSet.size) < maybeValues.size / 20
    }

  }

  private val df = new SimpleDateFormat("dd/MM/yy")

  Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers" in {
    val sut0 = RandomIntegral.integrals
    val sut = sut0.withProbabilityOfEmpty(0.3)
    check(sut)
    sut.probabilityOfEmpty shouldBe 0.3
  }

  Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers in some range" in {
    val sut0 = RandomIntegral.integrals(100, 200)
    val sut = sut0.withProbabilityOfEmpty(0.3)
    check(sut, i => i >= 100 && i < 200)
    sut.probabilityOfEmpty shouldBe 0.3
  }

  Spec[RandomIntegral[Date]] should "generate dates" in {
    val sut = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000)
    var d0 = 0L
    check(sut withProbabilityOfEmpty 0.01, d => {
      val d1 = d0
      d0 = d
      d0 > d1
    })
  }

  Spec[RandomIntegral[DateTime]] should "generate dates with times" in {
    val sut = RandomIntegral.datetimes(df.parse("08/24/2017"), 1000, 1000000)
    var d0 = 0L
    check(sut withProbabilityOfEmpty 0.001, d => {
      val d1 = d0
      d0 = d
      d0 > d1
    })
  }
} 
Example 93
package co.ledger.wallet.app.ui.m2fa

import java.text.SimpleDateFormat
import java.util.Locale

import android.content.DialogInterface
import android.os.Bundle
import android.view.{View, ViewGroup, LayoutInflater}

import co.ledger.wallet.R
import co.ledger.wallet.app.base.BaseDialogFragment
import co.ledger.wallet.core.bitcoin.AmountFormatter
import co.ledger.wallet.app.api.m2fa.IncomingTransactionAPI
import co.ledger.wallet.core.utils.TR
import co.ledger.wallet.core.view.DialogActionBarController
import co.ledger.wallet.core.widget.TextView

class IncomingTransactionDialogFragment extends BaseDialogFragment {

  lazy val actions = DialogActionBarController(R.id.dialog_action_bar).noNeutralButton
  lazy val amount = TR(R.id.amount).as[TextView]
  lazy val address = TR(R.id.address).as[TextView]
  lazy val date = TR(R.id.date).as[TextView]
  lazy val name = TR(R.id.dongle_name).as[TextView]

  private[this] var _transaction: Option[IncomingTransactionAPI#IncomingTransaction] = None

  def this(tx: IncomingTransactionAPI#IncomingTransaction) {
    this()
    _transaction = Option(tx)
    setCancelable(false)
  }

  override def onCreateView(inflater: LayoutInflater, container: ViewGroup, savedInstanceState: Bundle): View = {
    inflater.inflate(R.layout.incoming_transaction_dialog_fragment, container, false)
  }

  override def onResume(): Unit = {
    super.onResume()
    if (_transaction.isEmpty || _transaction.get.isDone)
      dismiss()
    _transaction.foreach(_.onCancelled(dismiss))
  }

  override def onPause(): Unit = {
    super.onPause()
    _transaction.foreach(_.onCancelled(null))
    dismissAllowingStateLoss()
  }

  override def onViewCreated(view: View, savedInstanceState: Bundle): Unit = {
    super.onViewCreated(view, savedInstanceState)
    actions onPositiveClick {
      _transaction.foreach(_.accept())
      _transaction = None
      dismiss()
    }
    actions onNegativeClick {
      _transaction.foreach(_.reject())
      _transaction = None
      dismiss()
    }
    _transaction match {
      case Some(transaction) =>
        amount.setText(AmountFormatter.Bitcoin.format(transaction.amount))
        address.setText(transaction.address)
        name.setText(transaction.dongle.name.get)
        val df = android.text.format.DateFormat.getDateFormat(getActivity)
        val hf = android.text.format.DateFormat.getTimeFormat(getActivity)
        date.setText(TR(R.string.incoming_tx_date).as[String].format(df.format(transaction.date), hf.format(transaction.date)))
      case _ =>
    }
  }

  override def onDismiss(dialog: DialogInterface): Unit = {
    super.onDismiss(dialog)
    _transaction.foreach(_.cancel())
  }
}

object IncomingTransactionDialogFragment {
  val DefaultTag = "IncomingTransactionDialogFragment"

} 
Example 94
Source File: TimerSchedule.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package scalaDemo

import java.text.SimpleDateFormat
import java.util.{Timer, TimerTask}

object TimerSchedule {
  
    val fTime = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
    val d1 = fTime.parse("2005/12/30 14:10:00");
    val timer: Timer = new Timer();
    timer.scheduleAtFixedRate(new TimerTask() {
      override def run(): Unit = {
        System.out.println("this is task you do6");
      }
    }, d1, 3 * 60 * 1000);

  }
} 
Example 95
Source File: PMMLModelExport.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = new PMML

  setHeader(pmml)

  private def setHeader(pmml: PMML): Unit = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application().withName("Apache Spark MLlib").withVersion(version)
    val timestamp = new Timestamp()
      .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .withApplication(app)
      .withTimestamp(timestamp)
    pmml.setHeader(header)
  }
} 
Example 96
Source File: JacksonMessageWriter.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
} 
Example 97
Source File: SimpleDateParam.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
} 
Example 98
Source File: BusinessLogger.scala    From languagedetector   with MIT License 5 votes vote down vote up
package biz.meetmatch.logging

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import org.slf4j.LoggerFactory

object BusinessLogger {
  def getDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
}

class BusinessLogger(module: String) {
  private val logger = LoggerFactory.getLogger("businessLogger")

  def calcStarted(options: String, sparkAppId: String): Unit = {
    log(s"CALC\tSTART\t$options\t$sparkAppId")
  }

  def calcStopped(result: String): Unit = {
    log(s"CALC\tSTOP\t$result")
  }

  def jobStarted(jobId: Int, jobDescription: String, stageCount: Int, executionId: Option[String]): Unit = {
    log(s"JOB\t$jobId\tSTART\t${jobDescription.replace("\n", " ").replace("\t", " ")}\t$stageCount\t${executionId.getOrElse("")}")
  }

  def jobStopped(jobId: Int, result: String): Unit = {
    log(s"JOB\t$jobId\tSTOP\t$result")
  }

  def transactionStarted(category: String, id: String, stageId: Int = -1, partitionId: Int = -1, taskId: Long = -1, message: String = ""): Unit = {
    log(s"TRANSACTION\t$category\t$id\tSTART\t$stageId\t$partitionId\t$taskId\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  def transactionStopped(category: String, id: String): Unit = {
    log(s"TRANSACTION\t$category\t$id\tSTOP")
  }

  def dataParquetRead(tableName: String, count: Long = -1): Unit = {
    log(s"DATA\tPARQUET\tREAD\t${tableName.replace("\n", " ").replace("\t", " ")}\t$count")
  }

  def dataParquetWritten(tableName: String, countBefore: Long, countAfter: Long): Unit = {
    log(s"DATA\tPARQUET\tWRITE\t${tableName.replace("\n", " ").replace("\t", " ")}\t$countBefore\t$countAfter")
  }

  def dataJdbcRead(tableName: String, count: Long = -1): Unit = {
    log(s"DATA\tJDBC\tREAD\t${tableName.replace("\n", " ").replace("\t", " ")}\t$count")
  }

  def dataJdbcWritten(tableName: String, countBefore: Long = -1, countAfter: Long = -1, countUpdated: Long = -1): Unit = {
    log(s"DATA\tJDBC\tWRITE\t${tableName.replace("\n", " ").replace("\t", " ")}\t$countBefore\t$countAfter\t$countUpdated")
  }

  def info(subject: String, message: String): Unit = {
    log(s"MESSAGE\tINFO\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  def warn(subject: String, message: String): Unit = {
    log(s"MESSAGE\tWARN\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  def error(subject: String, message: String): Unit = {
    log(s"MESSAGE\tERROR\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}")
  }

  private def log(line: String) = {
    logger.info(s"${BusinessLogger.getDateFormat.format(Calendar.getInstance.getTime)}\t$module\t$line")
  }
}

case class LogLineWorkflow(message: String, startDate: String, stopDate: Date, duration: String, state: String, options: Array[Array[String]], sparkAppId: String, calcs: Array[LogLineCalc], warnings: Int, errors: Int)

case class LogLineCalc(module: String, startDate: String, stopDate: Date, duration: String, state: String, options: Array[Array[String]], sparkAppId: String, jobs: Array[LogLineJob], transactionCategories: Array[LogLineTransactionCategory], dataReads: Array[LogLineDataRead], dataWrites: Array[LogLineDataWrite], messages: Array[LogLineMessage])
case class LogLineJob(id: Int, startDate: String, duration: String, state: String, description: String, stageCount: Int, executionId: Int = -1)

case class LogLineTransactionCategory(category: String, transactions: Array[LogLineTransaction], numberOfTransactions: Int, averageFinishedTransactionDuration: String)
case class LogLineTransaction(category: String, id: String, stageId: Int, partitionId: Int, taskId: Long, message: String, startDate: String, duration: String, state: String)

case class LogLineDataRead(storage: String, tableName: String, count: Int, date: String)

case class LogLineDataWrite(storage: String, tableName: String, countBefore: Int, countAfter: Int, countUpdated: Int, date: String)

case class LogLineMessage(category: String, subject: String, message: String, date: String) 
Example 99
Source File: PMMLModelExport.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.{Date, Locale}

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application("Apache Spark MLlib").setVersion(version)
    val timestamp = new Timestamp()
      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
    val header = new Header()
      .setApplication(app)
      .setTimestamp(timestamp)
    new PMML("4.2", header, null)
  }
} 
Example 100
Source File: JacksonMessageWriter.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.{Calendar, Locale, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
} 
Example 101
Source File: SimpleDateParam.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.{Locale, TimeZone}
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
} 
Example 102
Source File: StreamingDemo.scala    From flink-demos   with Apache License 2.0 5 votes vote down vote up
package com.dataartisans.flink.example.eventpattern

import java.text.SimpleDateFormat
import java.util
import java.util.{Calendar, Properties, UUID}

import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer

import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction}
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.util.Collector

import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.Requests


class StateMachineMapper extends RichFlatMapFunction[Event, Alert] {
  
  private[this] var currentState: ValueState[State] = _
    
  override def open(config: Configuration): Unit = {
    currentState = getRuntimeContext.getState(
      new ValueStateDescriptor("state", classOf[State], InitialState))
  }
  
  override def flatMap(t: Event, out: Collector[Alert]): Unit = {
    val state = currentState.value()
    val nextState = state.transition(t.event)
    
    nextState match {
      case InvalidTransition =>
        out.collect(Alert(t.sourceAddress, state, t.event))
      case x if x.terminal =>
        currentState.clear()
      case x =>
        currentState.update(nextState)
    }
  }
} 
Example 103
Source File: FECData.scala    From s4ds   with Apache License 2.0 5 votes vote down vote up
import java.io.File
import java.sql.Date
import java.text.SimpleDateFormat
import com.github.tototoshi.csv._

object FECData {

  val DataDirectory = "./data/"

  private val dateParser = new SimpleDateFormat("DD-MMM-YY")

  private def load(fileName:String):FECData = {
    val reader = CSVReader.open(new File(DataDirectory + fileName))
    val transactions = for { 
      row <- reader.iteratorWithHeaders
      id = None
      candidate = row("candidate")
      contributor = row("contributor_name")
      state = row("contributor_state")
      occupation = row("contributor_occupation") match {
        case "" => None
        case v => Some(v)
      }
      amount = (row("amount").toDouble*100).toInt
      date = new Date(dateParser.parse(row("date")).getTime)
    } yield Transaction(id, candidate, contributor, state, occupation, amount, date)
    
    new FECData(transactions)
  }

  def loadAll:FECData = load("us.csv")

  def loadOhio:FECData = load("ohio.csv")


}

class FECData(val transactions:Iterator[Transaction]) 
Example 104
Source File: PMMLModelExport.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import java.text.SimpleDateFormat
import java.util.Date

import scala.beans.BeanProperty

import org.dmg.pmml.{Application, Header, PMML, Timestamp}

private[mllib] trait PMMLModelExport {

  
  @BeanProperty
  val pmml: PMML = new PMML

  pmml.setVersion("4.2")
  setHeader(pmml)

  private def setHeader(pmml: PMML): Unit = {
    val version = getClass.getPackage.getImplementationVersion
    val app = new Application().withName("Apache Spark MLlib").withVersion(version)
    val timestamp = new Timestamp()
      .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
    val header = new Header()
      .withApplication(app)
      .withTimestamp(timestamp)
    pmml.setHeader(header)
  }
} 
Example 105
Source File: JacksonMessageWriter.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.io.OutputStream
import java.lang.annotation.Annotation
import java.lang.reflect.Type
import java.text.SimpleDateFormat
import java.util.{Calendar, SimpleTimeZone}
import javax.ws.rs.Produces
import javax.ws.rs.core.{MediaType, MultivaluedMap}
import javax.ws.rs.ext.{MessageBodyWriter, Provider}

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}


@Provider
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{

  val mapper = new ObjectMapper() {
    override def writeValueAsString(t: Any): String = {
      super.writeValueAsString(t)
    }
  }
  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
  mapper.enable(SerializationFeature.INDENT_OUTPUT)
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)

  override def isWriteable(
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Boolean = {
      true
  }

  override def writeTo(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType,
      multivaluedMap: MultivaluedMap[String, AnyRef],
      outputStream: OutputStream): Unit = {
    t match {
      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
      case _ => mapper.writeValue(outputStream, t)
    }
  }

  override def getSize(
      t: Object,
      aClass: Class[_],
      `type`: Type,
      annotations: Array[Annotation],
      mediaType: MediaType): Long = {
    -1L
  }
}

private[spark] object JacksonMessageWriter {
  def makeISODateFormat: SimpleDateFormat = {
    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
    iso8601.setCalendar(cal)
    iso8601
  }
} 
Example 106
Source File: SimpleDateParam.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.text.{ParseException, SimpleDateFormat}
import java.util.TimeZone
import javax.ws.rs.WebApplicationException
import javax.ws.rs.core.Response
import javax.ws.rs.core.Response.Status

private[v1] class SimpleDateParam(val originalValue: String) {

  val timestamp: Long = {
    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
    try {
      format.parse(originalValue).getTime()
    } catch {
      case _: ParseException =>
        val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
        gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
        try {
          gmtDay.parse(originalValue).getTime()
        } catch {
          case _: ParseException =>
            throw new WebApplicationException(
              Response
                .status(Status.BAD_REQUEST)
                .entity("Couldn't parse date: " + originalValue)
                .build()
            )
        }
    }
  }
} 
Example 107
Source File: JavaJsonUtils.scala    From asura   with MIT License 5 votes vote down vote up
package asura.common.util

import java.text.SimpleDateFormat

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}

object JavaJsonUtils extends JsonUtils {

  val mapper: ObjectMapper = new ObjectMapper()
  mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"))
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.configure(DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT, true)
  mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
  mapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false)
  mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true)
  mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true)
  mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true)
} 
Example 108
Source File: JsonUtils.scala    From asura   with MIT License 5 votes vote down vote up
package asura.common.util

import java.io.InputStream
import java.text.SimpleDateFormat

import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.core.`type`.TypeReference
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper

object JsonUtils extends JsonUtils {

  val mapper: ObjectMapper with ScalaObjectMapper = new ObjectMapper() with ScalaObjectMapper
  mapper.registerModule(DefaultScalaModule)
  mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"))
  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
  mapper.configure(DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT, true)
  mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
  mapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false)
  mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true)
  mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true)
  mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true)

}

trait JsonUtils {
  val mapper: ObjectMapper

  def stringify(obj: AnyRef): String = {
    mapper.writeValueAsString(obj)
  }

  def parse[T <: AnyRef](content: String, c: Class[T]): T = {
    mapper.readValue(content, c)
  }

  def parse[T <: AnyRef](input: InputStream, c: Class[T]): T = {
    mapper.readValue(input, c)
  }

  def parse[T <: AnyRef](content: String, typeReference: TypeReference[T]): T = {
    mapper.readValue(content, typeReference)
  }
} 
Example 109
Source File: MongodbSchemaIT.scala    From Spark-MongoDB   with Apache License 2.0 5 votes vote down vote up
package com.stratio.datasource.mongodb.schema

import java.text.SimpleDateFormat
import java.util.Locale

import com.stratio.datasource.MongodbTestConstants
import com.stratio.datasource.mongodb.config.{MongodbConfig, MongodbConfigBuilder}
import com.stratio.datasource.mongodb.partitioner.MongodbPartitioner
import com.stratio.datasource.mongodb.rdd.MongodbRDD
import com.stratio.datasource.mongodb._
import org.apache.spark.sql.mongodb.{TemporaryTestSQLContext, TestSQLContext}
import org.apache.spark.sql.types.{ArrayType, StringType, StructField, TimestampType}
import org.junit.runner.RunWith
import org.scalatest._
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class MongodbSchemaIT extends FlatSpec
with Matchers
with MongoEmbedDatabase
with TestBsonData
with MongodbTestConstants {

  private val host: String = "localhost"
  private val collection: String = "testCol"
  private val readPreference = "secondaryPreferred"

  val testConfig = MongodbConfigBuilder()
    .set(MongodbConfig.Host,List(host + ":" + mongoPort))
    .set(MongodbConfig.Database,db)
    .set(MongodbConfig.Collection,collection)
    .set(MongodbConfig.SamplingRatio,1.0)
    .set(MongodbConfig.ReadPreference, readPreference)
    .build()

  val mongodbPartitioner = new MongodbPartitioner(testConfig)

  val mongodbRDD = new MongodbRDD(TemporaryTestSQLContext, testConfig, mongodbPartitioner)

  behavior of "A schema"

  it should "be inferred from rdd with primitives" + scalaBinaryVersion in {
    withEmbedMongoFixture(primitiveFieldAndType) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 7
      schema.fieldNames should contain allOf("string", "integer", "long", "double", "boolean", "null")

      schema.printTreeString()
    }
  }

  it should "be inferred from rdd with complex fields" + scalaBinaryVersion in {
    withEmbedMongoFixture(complexFieldAndType1) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 13

      schema.fields filter {
        case StructField(name, ArrayType(StringType, _), _, _) => Set("arrayOfNull", "arrayEmpty") contains name
        case _ => false
      } should have size 2

      schema.printTreeString()
    }
  }

  it should "resolve type conflicts between fields" + scalaBinaryVersion in {
    withEmbedMongoFixture(primitiveFieldValueTypeConflict) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 7

      schema.printTreeString()
    }
  }

  it should "be inferred from rdd with more complex fields" + scalaBinaryVersion in {
    withEmbedMongoFixture(complexFieldAndType2) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 5

      schema.printTreeString()
    }
  }

  it should "read java.util.Date fields as timestamptype" + scalaBinaryVersion in {
    val dfunc = (s: String) => new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy", Locale.ENGLISH).parse(s)
    import com.mongodb.casbah.Imports.DBObject
    val stringAndDate = List(DBObject("string" -> "this is a simple string.", "date" -> dfunc("Mon Aug 10 07:52:49 EDT 2015")))
    withEmbedMongoFixture(stringAndDate) { mongodProc =>
      val schema = MongodbSchema(mongodbRDD, 1.0).schema()

      schema.fields should have size 3
      schema.fields.filter(_.name == "date").head.dataType should equal(TimestampType)
      schema.printTreeString()
    }
  }
} 
Example 110
Source File: TSQR.scala    From SparkAndMPIFactorizations   with MIT License 5 votes vote down vote up
package edu.berkeley.cs.amplab.mlmatrix

import java.util.concurrent.ThreadLocalRandom
import scala.collection.mutable.ArrayBuffer

import breeze.linalg._

import edu.berkeley.cs.amplab.mlmatrix.util.QRUtils
import edu.berkeley.cs.amplab.mlmatrix.util.Utils

import org.apache.spark.rdd.RDD
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.Accumulator
import org.apache.spark.SparkContext._

import java.util.Calendar
import java.text.SimpleDateFormat

class modifiedTSQR extends Serializable {

    def report(message: String, verbose: Boolean = true) = {
        val now = Calendar.getInstance().getTime()
        val formatter = new SimpleDateFormat("H:m:s")
        if (verbose) {
            println("STATUS REPORT (" + formatter.format(now) + "): " + message)
        }
    }

  
  private def reduceQR(
      acc: Accumulator[Double],
      a: Tuple2[DenseVector[Double], DenseMatrix[Double]],
      b: Tuple2[DenseVector[Double], DenseMatrix[Double]]): Tuple2[DenseVector[Double], DenseMatrix[Double]] = {
    val begin = System.nanoTime
    val outmat = QRUtils.qrR(DenseMatrix.vertcat(a._2, b._2), false)
    val outcolnorms = a._1 + b._1
    acc += ((System.nanoTime - begin) / 1e6)
    (outcolnorms, outmat)
  }

} 
Example 111
Source File: HtmlFetcher.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet

import java.text.SimpleDateFormat

import com.gravity.goose.{Configuration, Goose}
import io.gzet.HtmlFetcher.Content
import org.apache.commons.lang.StringUtils
import org.apache.spark.rdd.RDD

class HtmlFetcher(
                   connectionTimeout: Int = 10000,
                   socketTimeout: Int = 10000
                 ) extends Serializable {

  final val USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
  final val ISO_SDF = "yyyy-MM-dd'T'HH:mm:ssZ"

  def fetchWithContext(urlRdd: RDD[(Long, String)]): RDD[(Long, Content)] = {
    urlRdd mapPartitions { urls =>
      val sdf = new SimpleDateFormat(ISO_SDF)
      val goose = getGooseScraper
      urls map { case (id, url) =>
        (id, fetchUrl(goose, url, sdf))
      }
    }
  }

  def fetch(urlRdd: RDD[String]): RDD[Content] = {
    urlRdd mapPartitions { urls =>
      val sdf = new SimpleDateFormat(ISO_SDF)
      val goose = getGooseScraper
      urls map(url => fetchUrl(goose, url, sdf))
    }
  }

  private def getGooseScraper: Goose = {
    val conf: Configuration = new Configuration
    conf.setEnableImageFetching(false)
    conf.setBrowserUserAgent(USER_AGENT)
    conf.setConnectionTimeout(connectionTimeout)
    conf.setSocketTimeout(socketTimeout)
    new Goose(conf)
  }

  private def fetchUrl(goose: Goose, url: String, sdf: SimpleDateFormat) : Content = {

    try {

      val article = goose.extractContent(url)
      var body = None: Option[String]
      var title = None: Option[String]
      var description = None: Option[String]
      var publishDate = None: Option[String]

      if (StringUtils.isNotEmpty(article.cleanedArticleText))
        body = Some(article.cleanedArticleText)

      if (StringUtils.isNotEmpty(article.title))
        title = Some(article.title)

      if (StringUtils.isNotEmpty(article.metaDescription))
        description = Some(article.metaDescription)

      if (article.publishDate != null)
        publishDate = Some(sdf.format(article.publishDate))

      Content(url, title, description, body, publishDate)

    } catch {
      case e: Throwable =>
        Content(url, None, None, None, None)
    }
  }
}


object HtmlFetcher {

  case class Content(
                      url: String,
                      title: Option[String],
                      description: Option[String],
                      body: Option[String],
                      publishedDate: Option[String]
                    )

} 
Example 112
Source File: GdeltTagger.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet.tagging.gdelt

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.config.ConfigFactory
import io.gzet.tagging.classifier.Classifier
import io.gzet.tagging.html.HtmlHandler
import io.gzet.tagging.html.HtmlHandler.Content
import org.apache.spark.Accumulator
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.util.LongAccumulator
import org.elasticsearch.spark._

class GdeltTagger() extends Serializable {

  val config = ConfigFactory.load().getConfig("io.gzet.kappa")
  val isoSdf = "yyyy-MM-dd HH:mm:ss"
  val esIndex = config.getString("gdeltIndex")
  val vectorSize = config.getInt("vectorSize")
  val minProba = config.getDouble("minProba")

  def predict(gdeltStream: DStream[String], batchId: LongAccumulator) = {

    // Extract HTML content
    val gdeltContent = fetchHtmlContent(gdeltStream)

    // Predict each RDD
    gdeltContent foreachRDD { batch =>

      batch.cache()
      val count = batch.count()

      if (count > 0) {

        if (Classifier.model.isDefined) {
          val labels = Classifier.model.get.labels

          // Predict HashTags using latest Twitter model
          val textRdd = batch.map(_.body.get)
          val predictions = Classifier.predictProbabilities(textRdd)
          val taggedGdelt = batch.zip(predictions) map { case (content, probabilities) =>
            val validLabels = probabilities filter { case (label, probability) =>
              probability > minProba
            }

            val labels = validLabels.toSeq
              .sortBy(_._2)
              .reverse
              .map(_._1)

            (content, labels)
          }

          // Saving articles to Elasticsearch
          taggedGdelt map { case (content, hashTags) =>
            gdeltToJson(content, hashTags.toArray)
          } saveToEs esIndex

        } else {

          // Saving articles to Elasticsearch
          batch map { content =>
            gdeltToJson(content, Array())
          } saveToEs esIndex
        }

      }

      batch.unpersist(blocking = false)
    }
  }

  private def gdeltToJson(content: Content, hashTags: Array[String]) = {
    val sdf = new SimpleDateFormat(isoSdf)
    Map(
      "time" -> sdf.format(new Date()),
      "body" -> content.body.get,
      "url" -> content.url,
      "tags" -> hashTags,
      "title" -> content.title
    )
  }

  private def fetchHtmlContent(urlStream: DStream[String]) = {
    urlStream.map(_ -> 1).groupByKey().map(_._1) mapPartitions { urls =>
      val sdf = new SimpleDateFormat(isoSdf)
      val htmlHandler = new HtmlHandler()
      val goose = htmlHandler.getGooseScraper
      urls map { url =>
        htmlHandler.fetchUrl(goose, url, sdf)
      }
    } filter { content =>
      content.isDefined &&
        content.get.body.isDefined &&
        content.get.body.get.length > 255
    } map { content =>
      content.get
    }
  }
} 
Example 113
Source File: OilPriceFunc.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet.geomesa

import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.{udf, window, last, col, lag}

object OilPriceFunc {

    // use this if the window function misbehaves due to timezone e.g. BST
    // ./spark-shell --driver-java-options "-Duser.timezone=UTC"
    // ./spark-submit --conf 'spark.driver.extraJavaOptions=-Duser.timezone=UTC'

    // define a function to reformat the date field
    def convert(date:String) : String = {
      val df1 = new SimpleDateFormat("dd/MM/yyyy")
      val dt = df1.parse(date)
      val df2 = new SimpleDateFormat("yyyy-MM-dd")
      df2.format(dt)
    }

    // create and save oil price changes
    def createOilPriceDF(inputfile: String, outputfile: String, spark: SparkSession) = {

      val oilPriceDF = spark.
        read.
        option("header", "true").
        option("inferSchema", "true").
        csv(inputfile)

      val convertDateUDF = udf { (Date: String) => convert(Date) }

      val oilPriceDatedDF = oilPriceDF.withColumn("DATE", convertDateUDF(oilPriceDF("DATE")))

      // offset to start at beginning of week
      val windowDF = oilPriceDatedDF.groupBy(window(oilPriceDatedDF.col("DATE"), "7 days", "7 days", "4 days"))

      val windowLastDF = windowDF.agg(last("PRICE") as "last(PRICE)").sort("window")

//      windowLastDF.show(20, false)

      val sortedWindow = Window.orderBy("window.start")

      val lagLastCol = lag(col("last(PRICE)"), 1).over(sortedWindow)
      val lagLastColDF = windowLastDF.withColumn("lastPrev(PRICE)", lagLastCol)

//      lagLastColDF.show(20, false)

      val simplePriceChangeFunc = udf { (last: Double, prevLast: Double) =>
        var change = ((last - prevLast) compare 0).signum
        if (change == -1)
          change = 0
        change.toDouble
      }

      val findDateTwoDaysAgoUDF = udf { (date: String) =>
        val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
        val cal = Calendar.getInstance
        cal.setTime(dateFormat.parse(date))
        cal.add(Calendar.DATE, -3)
        dateFormat.format(cal.getTime)
      }

      val oilPriceChangeDF = lagLastColDF.withColumn("label", simplePriceChangeFunc(
        lagLastColDF("last(PRICE)"),
        lagLastColDF("lastPrev(PRICE)")
      )).withColumn("commonFriday", findDateTwoDaysAgoUDF(lagLastColDF("window.end")))

//      oilPriceChangeDF.show(20, false)

      oilPriceChangeDF.select("label", "commonFriday").
        write.
        format("com.databricks.spark.csv").
        option("header", "true").
        //.option("codec", "org.apache.hadoop.io.compress.GzipCodec")
        save(outputfile)
    }
} 
Example 114
Source File: PoFile.scala    From scalingua   with Apache License 2.0 5 votes vote down vote up
package ru.makkarpov.scalingua.pofile

import java.io._
import java.nio.charset.StandardCharsets
import java.text.SimpleDateFormat
import java.util.Date

import ru.makkarpov.scalingua.StringUtils
import ru.makkarpov.scalingua.pofile.parse.{ErrorReportingParser, PoLexer}

object PoFile {
  

  val encoding                = StandardCharsets.UTF_8

  val GeneratedPrefix = "!Generated:"
  private def headerComment(s: String) = s"#  $GeneratedPrefix $s"

  def apply(f: File): Seq[Message] = apply(new FileInputStream(f), f.getName)

  def apply(is: InputStream, filename: String = "<unknown>"): Seq[Message] = {
    val parser = new ErrorReportingParser(new PoLexer(new InputStreamReader(is, StandardCharsets.UTF_8), filename))
    parser.parse().value.asInstanceOf[Seq[Message]]
  }

  def update(f: File, messages: Seq[Message], escapeUnicode: Boolean = true): Unit = {
    val output = new NewLinePrintWriter(new OutputStreamWriter(new FileOutputStream(f), encoding), false)
    try {
      output.println(headerComment(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())))
      output.println()

      def printEntry(s: String, m: MultipartString): Unit = {
        output.print(s + " ")

        if (m.parts.isEmpty) output.println("\"\"")
        else for (p <- m.parts) output.println("\"" + StringUtils.escape(p, escapeUnicode) + "\"")
      }

      for (m <- messages) {
        for (s <- m.header.comments)
          output.println(s"#  $s")

        for (s <- m.header.extractedComments)
          output.println(s"#. $s")

        for (s <- m.header.locations.sorted)
          if (s.line < 0)
            output.println(s"#: ${s.fileString}")
          else
            output.println(s"#: ${s.fileString}:${s.line}")

        if (m.header.flags.nonEmpty)
          output.println(s"#, " + m.header.flags.map(_.toString).mkString(", "))

        for (t <- m.header.tag)
          output.println(s"#~ $t")

        for (c <- m.context)
          printEntry("msgctxt", c)

        printEntry("msgid", m.message)

        m match {
          case Message.Singular(_, _, _, tr) =>
            printEntry("msgstr", tr)

          case Message.Plural(_, _, _, id, trs) =>
            printEntry("msgid_plural", id)

            for ((m, i) <- trs.zipWithIndex)
              printEntry(s"msgstr[$i]", m)
        }

        output.println()
      }
    } finally output.close()
  }
} 
Example 115
Source File: Utils.scala    From lemon-schedule   with GNU General Public License v2.0 5 votes vote down vote up
package com.gabry.job.utils

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

import scala.collection.mutable.ArrayBuffer


  def getLoadedClass(classLoader: ClassLoader):Array[Class[_]] = {
    val loadedClass = ArrayBuffer.empty[Class[_]]

    val loadedClassEnum = classesField.get(classLoader).asInstanceOf[java.util.Vector[Class[_]]].elements()

    while(loadedClassEnum.hasMoreElements){
      val nextElement = loadedClassEnum.nextElement()
      loadedClass.append(nextElement)
    }
    loadedClass.toArray
  }
} 
Example 116
Source File: Output.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.spark.clustering.mtm
import java.io._
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.DenseVector
import scala.sys.process._
import java.util.Calendar
import java.text.SimpleDateFormat
import java.io.File
import java.io.FileWriter

object Output extends Serializable
{

  def saveStr(savingPath: String, value: String, fileName: String = "") =
  {
    s"mkdir -p ${savingPath}".!
    val finalPath = savingPath + fileName
    val fw = new FileWriter(finalPath, true)
    fw.write(value + "\n")
    fw.close()
  }

  def write(outputDir: String, datas: RDD[Array[Double]], model: AbstractModel, nbRowSOM:Int, nbColSOM: Int): String =
  {
      val now = Calendar.getInstance().getTime()
      val format = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss")
      val time = format.format(now)
      val dim = datas.first.size
      val datasWithIndex = datas.zipWithIndex.map(_.swap)

      val path: String = outputDir + "/EXP-" + time + "/"
      s"mkdir -p ${path}".!
    
      val mapMin = Array.fill[Byte](dim)(0).mkString(",")
      var header = "# mapDim=2 mapSize={"+ nbRowSOM +"," + nbColSOM + "}"
      header += " pointDim=" + dim + " pointRealDim=" + dim + " mapMin={" + mapMin + "}"
    
      val prototypes = model.prototypes.map( d => (d.id, d.point)).sortBy(_._1).map(_._2)
      println("Write Prototypes...")
      val protosString = prototypes.map( d => d.toArray.mkString(",")).mkString("\n")

      // Utiliser fileWriter
      saveStr(path, header + "\n" + protosString, "maps")

      val sumAffectedDatas = datas.map( d => (model.findClosestPrototype(d).id, 1)).reduceByKey{ case (sum1, sum2) => sum1 + sum2 }.collectAsMap 
    
      // fill in all the prototypes that have 0 observations
      val card = (0 until prototypes.length).map( d => if (sumAffectedDatas.contains(d)) sumAffectedDatas(d) + "" else "0" )
    
      println("Write Cardinalities...")
      var cardHeader = "# mapDim=2 mapSize={"+ nbRowSOM +"," + nbColSOM + "}" 
      cardHeader +=  "pointDim=1 pointRealDim=0 mapMin={0} mapMax={0}"
      val cardStr = card.mkString("\n")
      saveStr(path, cardHeader + "\n" + cardStr, "cards")

      val affHeader = "# mapDim=1 mapSize={" + datas.count() + "} pointDim=1 pointRealDim=0 mapMin={0} mapMax={0}"
      val aff = datasWithIndex.map(d => (d._1, model.findClosestPrototype(d._2).id + "")).sortByKey().values.collect.mkString("\n")

      println("Write Affiliate...")
      saveStr(path, affHeader + "\n" + aff, "affs")    
      println("Write Maps...")

      val maps = prototypes.zip(card).map(d => d._1.toArray.mkString(",") + "," + d._2).mkString("\n")
      saveStr(path, maps, "mapscard")
      println("Write successfully...")
      time
  }
} 
Example 117
Source File: KeysExtractor.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink

import java.text.SimpleDateFormat
import java.util.TimeZone

import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.data._
import org.json4s.JsonAST._


object KeysExtractor {

  private val ISO_DATE_FORMAT: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
  private val TIME_FORMAT: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss.SSSZ")

  ISO_DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"))

  def fromStruct(struct: Struct, keys: Set[String]): Set[(String, Any)] = {
    keys.map { key =>
      val schema = struct.schema().field(key).schema()
      val value = struct.get(key)

      val v = schema.`type`() match {
        case Schema.Type.INT32 =>
          if (schema != null && Date.LOGICAL_NAME == schema.name) ISO_DATE_FORMAT.format(Date.toLogical(schema, value.asInstanceOf[Int]))
          else if (schema != null && Time.LOGICAL_NAME == schema.name) TIME_FORMAT.format(Time.toLogical(schema, value.asInstanceOf[Int]))
          else value

        case Schema.Type.INT64 =>
          if (Timestamp.LOGICAL_NAME == schema.name) Timestamp.fromLogical(schema, value.asInstanceOf[(java.util.Date)])
          else value

        case Schema.Type.STRING => value.asInstanceOf[CharSequence].toString

        case Schema.Type.BYTES =>
          if (Decimal.LOGICAL_NAME == schema.name) value.asInstanceOf[BigDecimal].toDouble
          else throw new ConfigException(s"Schema.Type.BYTES is not supported for $key.")

        case Schema.Type.ARRAY =>
          throw new ConfigException(s"Schema.Type.ARRAY is not supported for $key.")

        case Schema.Type.MAP => throw new ConfigException(s"Schema.Type.MAP is not supported for $key.")
        case Schema.Type.STRUCT => throw new ConfigException(s"Schema.Type.STRUCT is not supported for $key.")
        case other => throw new ConfigException(s"$other is not supported for $key.")
      }
      key -> v
    }
  }

  def fromMap(map: java.util.Map[String, Any], keys: Set[String]): Set[(String, Any)] = {
    keys.map { key =>
      if (!map.containsKey(key)) throw new ConfigException(s"The key $key can't be found")
      val value = map.get(key) match {
        case t: String => t
        case t: Boolean => t
        case t: Int => t
        case t: Long => t
        case t: Double => t
        case t: BigInt => t.toLong
        case t: BigDecimal => t.toDouble
        case other => throw new ConfigException(s"The key $key is not supported for type ${Option(other).map(_.getClass.getName).getOrElse("NULL")}")
      }
      key -> value
    }
  }

  def fromJson(jvalue: JValue, keys: Set[String]): List[(String, Any)] = {
    jvalue match {
      case JObject(children) =>
        children.collect {
          case JField(name, value) if keys.contains(name) =>
            val v = value match {
              case JBool(b) => b
              case JDecimal(d) => d.toDouble
              case JDouble(d) => d
              case JInt(i) => i.toLong
              case JLong(l) => l
              case JString(s) => s
              case other => throw new ConfigException(s"Field $name is not handled as a key (${other.getClass}). it needs to be a int, long, string, double or decimal")
            }
            name -> v
        }
      case other => throw new ConfigException(s"${other.getClass} is not supported")
    }
  }
} 
Example 118
Source File: StructFieldsExtractorBytes.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.hbase

import java.text.SimpleDateFormat
import java.util.TimeZone

import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.data._

import scala.collection.JavaConverters._

trait FieldsValuesExtractor {
  def get(struct: Struct): Seq[(String, Array[Byte])]
}

case class StructFieldsExtractorBytes(includeAllFields: Boolean, fieldsAliasMap: Map[String, String]) extends FieldsValuesExtractor with StrictLogging {

  def get(struct: Struct): Seq[(String, Array[Byte])] = {
    val schema = struct.schema()
    val fields: Seq[Field] = if (includeAllFields) {
      schema.fields().asScala
    }
    else {
      val selectedFields = schema.fields().asScala.filter(f => fieldsAliasMap.contains(f.name()))
      val diffSet = fieldsAliasMap.keySet.diff(selectedFields.map(_.name()).toSet)
      if (diffSet.nonEmpty) {
        val errMsg = s"Following columns ${diffSet.mkString(",")} have not been found. Available columns:${fieldsAliasMap.keys.mkString(",")}"
        logger.error(errMsg)
        sys.error(errMsg)
      }
      selectedFields
    }

    val fieldsAndValues = fields.flatMap(field =>
      getFieldBytes(field, struct).map(bytes => fieldsAliasMap.getOrElse(field.name(), field.name()) -> bytes))

    fieldsAndValues
  }

  private def getFieldBytes(field: Field, struct: Struct): Option[Array[Byte]] = {
    Option(struct.get(field))
      .map { value =>
        Option(field.schema().name()).collect {
          case Decimal.LOGICAL_NAME =>
            value.asInstanceOf[Any] match {
              case _:java.math.BigDecimal => value.fromBigDecimal()
              case arr: Array[Byte] => Decimal.toLogical(field.schema, arr).asInstanceOf[Any].fromBigDecimal()
              case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
            }
          case Time.LOGICAL_NAME =>
            value.asInstanceOf[Any] match {
              case i: Int => StructFieldsExtractorBytes.TimeFormat.format(Time.toLogical(field.schema, i)).asInstanceOf[Any].fromString()
              case d:java.util.Date => StructFieldsExtractorBytes.TimeFormat.format(d).asInstanceOf[Any].fromString()
              case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
            }

          case Timestamp.LOGICAL_NAME =>
            value.asInstanceOf[Any] match {
              case d:java.util.Date => StructFieldsExtractorBytes.DateFormat.format(d).asInstanceOf[Any].fromString()
              case l: Long => StructFieldsExtractorBytes.DateFormat.format(Timestamp.toLogical(field.schema, l)).asInstanceOf[Any].fromString()
              case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
            }
        }.getOrElse {

          field.schema().`type`() match {
            case Schema.Type.BOOLEAN => value.fromBoolean()
            case Schema.Type.BYTES => value.fromBytes()
            case Schema.Type.FLOAT32 => value.fromFloat()
            case Schema.Type.FLOAT64 => value.fromDouble()
            case Schema.Type.INT8 => value.fromByte()
            case Schema.Type.INT16 => value.fromShort()
            case Schema.Type.INT32 => value.fromInt()
            case Schema.Type.INT64 => value.fromLong()
            case Schema.Type.STRING => value.fromString()
            case other => sys.error(s"$other is not a recognized schema!")
          }
        }
      }
  }
}


object StructFieldsExtractorBytes {
  val DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
  val TimeFormat = new SimpleDateFormat("HH:mm:ss.SSSZ")

  DateFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
} 
Example 119
Source File: StructFieldsExtractor.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.voltdb

import java.text.SimpleDateFormat
import java.util.TimeZone

import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.data.{Field, Struct, _}

import scala.collection.JavaConverters._

trait FieldsValuesExtractor {
  def get(struct: Struct): Map[String, Any]
}

case class StructFieldsExtractor(targetTable: String,
                                 includeAllFields: Boolean,
                                 fieldsAliasMap: Map[String, String],
                                 isUpsert: Boolean = false) extends FieldsValuesExtractor with StrictLogging {
  require(targetTable != null && targetTable.trim.length > 0)

  def get(struct: Struct): Map[String, Any] = {
    val schema = struct.schema()
    val fields: Seq[Field] = {
      if (includeAllFields) {
        schema.fields().asScala
      } else {
        val selectedFields = schema.fields().asScala.filter(f => fieldsAliasMap.contains(f.name()))
        val diffSet = fieldsAliasMap.keySet.diff(selectedFields.map(_.name()).toSet)
        if (diffSet.nonEmpty) {
          val errMsg = s"Following columns ${diffSet.mkString(",")} have not been found. Available columns:${fieldsAliasMap.keys.mkString(",")}"
          logger.error(errMsg)
          sys.error(errMsg)
        }
        selectedFields
      }
    }

    //need to select all fields including null. the stored proc needs a fixed set of params
    fields.map { field =>
      val schema = field.schema()
      val value = Option(struct.get(field))
        .map { value =>
          //handle specific schema
          schema.name() match {
            case Decimal.LOGICAL_NAME =>
              value.asInstanceOf[Any] match {
                case _:java.math.BigDecimal => value
                case arr: Array[Byte] => Decimal.toLogical(schema, arr)
                case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
              }
            case Time.LOGICAL_NAME =>
              value.asInstanceOf[Any] match {
                case i: Int => StructFieldsExtractor.TimeFormat.format(Time.toLogical(schema, i))
                case d:java.util.Date => StructFieldsExtractor.TimeFormat.format(d)
                case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
              }

            case Timestamp.LOGICAL_NAME =>
              value.asInstanceOf[Any] match {
                case d:java.util.Date => StructFieldsExtractor.DateFormat.format(d)
                case l: Long => StructFieldsExtractor.DateFormat.format(Timestamp.toLogical(schema, l))
                case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value")
              }

            case _ => value
          }
        }.orNull

      fieldsAliasMap.getOrElse(field.name(), field.name()) -> value
    }.toMap
  }
}


object StructFieldsExtractor {
  val DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
  val TimeFormat: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss.SSSZ")
  DateFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
} 
Example 120
Source File: DateFormatConstraint.scala    From drunken-data-quality   with Apache License 2.0 5 votes vote down vote up
package de.frosner.ddq.constraints

import java.text.SimpleDateFormat

import org.apache.spark.sql.functions._
import org.apache.spark.sql.{Column, DataFrame}

import scala.util.Try

case class DateFormatConstraint(columnName: String,
                                formatString: String) extends Constraint {

  val fun = (df: DataFrame) => {
    val cannotBeDate = udf((column: String) =>
      column != null && Try {
        val format = new SimpleDateFormat(formatString)
        format.setLenient(false)
        format.parse(column)
      }.isFailure)
    val maybeCannotBeDateCount = Try(df.filter(cannotBeDate(new Column(columnName))).count)
    DateFormatConstraintResult(
      this,
      data = maybeCannotBeDateCount.toOption.map(DateFormatConstraintResultData),
      status = ConstraintUtil.tryToStatus[Long](maybeCannotBeDateCount, _ == 0)
    )
  }

}

case class DateFormatConstraintResult(constraint: DateFormatConstraint,
                                      data: Option[DateFormatConstraintResultData],
                                      status: ConstraintStatus) extends ConstraintResult[DateFormatConstraint] {

  val message: String = {
    val format = constraint.formatString
    val columnName = constraint.columnName
    val maybeFailedRows = data.map(_.failedRows)
    val maybePluralS = maybeFailedRows.map(failedRows => if (failedRows == 1) "" else "s")
    val maybeVerb = maybeFailedRows.map(failedRows => if (failedRows == 1) "is" else "are")
    (status, maybeFailedRows, maybePluralS, maybeVerb) match {
      case (ConstraintSuccess, Some(0), _, _) =>
        s"Column $columnName is formatted by $format."
      case (ConstraintFailure, Some(failedRows), Some(pluralS), Some(verb)) =>
        s"Column $columnName contains $failedRows row$pluralS that $verb not formatted by $format."
      case (ConstraintError(throwable), None, None, None) =>
        s"Checking whether column $columnName is formatted by $format failed: $throwable"
      case default => throw IllegalConstraintResultException(this)
    }

  }

}

case class DateFormatConstraintResultData(failedRows: Long) 
Example 121
Source File: TimeHelper.scala    From ez-framework   with Apache License 2.0 5 votes vote down vote up
package com.ecfront.ez.framework.core.helper

import java.text.SimpleDateFormat
import java.time.ZonedDateTime
import java.util.{Calendar, Date, TimeZone}


object TimeHelper {

  val msf = new SimpleDateFormat("yyyyMMddHHmmssSSS")
  val sf = new SimpleDateFormat("yyyyMMddHHmmss")
  val mf = new SimpleDateFormat("yyyyMMddHHmm")
  val hf = new SimpleDateFormat("yyyyMMddHH")
  val df = new SimpleDateFormat("yyyyMMdd")
  val Mf = new SimpleDateFormat("yyyyMM")
  val yf = new SimpleDateFormat("yyyy")

  val yyyy_MM_dd_HH_mm_ss_SSS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS")
  val yyyy_MM_dd_HH_mm_ss = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
  val yyyy_MM_dd = new SimpleDateFormat("yyyy-MM-dd")

  def dateOffset(offsetValue: Int, offsetUnit: Int, currentTime: Long): Long = {
    val format = currentTime.toString.length match {
      case 8 => df
      case 10 => hf
      case 12 => mf
      case 14 => sf
      case 17 => msf
    }
    val calendar = Calendar.getInstance()
    calendar.setTime(format.parse(currentTime + ""))
    calendar.add(offsetUnit, offsetValue)
    format.format(calendar.getTime).toLong
  }

  def dateOffset(offsetValue: Int, offsetUnit: Int, currentDate: Date): Date = {
    val calendar = Calendar.getInstance()
    calendar.setTime(currentDate)
    calendar.add(offsetUnit, offsetValue)
    calendar.getTime
  }

  def utc2Local(utcTime: String, localTimePatten: String = "yyyy-MM-dd'T'HH:mm:ss"): String = {
    val utcDate = Date.from(ZonedDateTime.parse(utcTime).toInstant)
    val localF = new SimpleDateFormat(localTimePatten)
    localF.setTimeZone(TimeZone.getDefault)
    localF.format(utcDate.getTime)
  }

} 
Example 122
Source File: TaskMonitor.scala    From ez-framework   with Apache License 2.0 5 votes vote down vote up
package com.ecfront.ez.framework.core.monitor

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.ConcurrentHashMap

import com.ecfront.ez.framework.core.EZ
import com.ecfront.ez.framework.core.helper.{TimeHelper, TimerHelper}
import com.ecfront.ez.framework.core.logger.Logging

import scala.collection.JavaConversions._

object TaskMonitor extends Logging {

  private val tasks = new ConcurrentHashMap[String, (String, Date)]()

  def add(taskName: String): String = {
    val taskId = EZ.createUUID
    tasks += taskId -> (taskName, new Date())
    taskId
  }

  def get(taskId:String):(String,Date)={
    tasks.get(taskId)
  }

  def poll(taskId:String):(String,Date)={
    val d=tasks.get(taskId)
    tasks -= taskId
    d
  }

  def remove(taskId: String): Unit = {
    tasks -= taskId
  }

  def hasTask(): Boolean = {
    tasks.nonEmpty
  }

  
  def waitFinish(timeout: Long = Long.MaxValue): Unit = {
    logger.info("[Monitor]waiting task finish...")
    val waitStart = new Date().getTime
    while (tasks.nonEmpty && waitStart + timeout < new Date().getTime) {
      Thread.sleep(500)
      if (new Date().getTime - waitStart > 60 * 1000) {
        var warn = "[Monitor]has some unfinished tasks:\r\n"
        warn += tasks.map(task => s" > id:${task._1} name:${task._2._1} start time:${TimeHelper.yyyy_MM_dd_HH_mm_ss_SSS.format(task._2._2)}").mkString("\r\n")
        logger.warn(warn)
      }
    }
    if (tasks.nonEmpty) {
      var error = "[Monitor]has some unfinished tasks,but time is out:\r\n"
      error += tasks.map(task => s" > id:${task._1} name:${task._2._1} start time:${TimeHelper.yyyy_MM_dd_HH_mm_ss_SSS.format(task._2._2)}").mkString("\r\n")
      logger.error(error)
    }
    // 再等1秒
    Thread.sleep(1000)
  }

  private val yyyy_MM_dd_HH_mm_ss_SSS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS")
  TimerHelper.periodic(60L, {
    if (tasks.nonEmpty) {
      val info = new StringBuffer(s"\r\n--------------Current Execute Tasks : (${tasks.size()}) --------------\r\n")
      tasks.foreach {
            i =>
              info.append(s"------ ${yyyy_MM_dd_HH_mm_ss_SSS.format(i._2._2)} : [${i._1}]${i._2._1}\r\n")
      }
      logger.trace(info.toString)
    }
  })

} 
Example 123
Source File: LateDataMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.sideoutput.lateDataProcess

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.SECOND, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =74540;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("late_data", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(200)
      i = i + 1
//      System.exit(-1)
    }
  }

} 
Example 124
Source File: ErrorHandler.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.errors

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.scalalogging.StrictLogging

import scala.util.{Failure, Success, Try}


trait ErrorHandler extends StrictLogging {
  var errorTracker: Option[ErrorTracker] = None
  private val dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS'Z'")

  def initialize(maxRetries: Int, errorPolicy: ErrorPolicy): Unit = {
    errorTracker = Some(ErrorTracker(maxRetries, maxRetries, "", new Date(), errorPolicy))
  }

  def getErrorTrackerRetries() : Int = {
    errorTracker.get.retries
  }

  def errored() : Boolean = {
    errorTracker.get.retries != errorTracker.get.maxRetries
  }

  def handleTry[A](t : Try[A]) : Option[A] = {
    require(errorTracker.isDefined, "ErrorTracker is not set call. Initialize.")
    t
    match {
      case Success(s) => {
        //success, check if we had previous errors.
        if (errorTracker.get.retries != errorTracker.get.maxRetries) {
          logger.info(s"Recovered from error ${errorTracker.get.lastErrorMessage} at " +
            s"${dateFormatter.format(errorTracker.get.lastErrorTimestamp)}")
        }
        //cleared error
        resetErrorTracker()
        Some(s)
      }
      case Failure(f) =>
        //decrement the retry count
        logger.error(s"Encountered error ${f.getMessage}", f)
        this.errorTracker = Some(decrementErrorTracker(errorTracker.get, f.getMessage))
        handleError(f, errorTracker.get.retries, errorTracker.get.policy)
        None
    }
  }

  def resetErrorTracker() = {
    errorTracker = Some(ErrorTracker(errorTracker.get.maxRetries, errorTracker.get.maxRetries, "", new Date(),
      errorTracker.get.policy))
  }

  private def decrementErrorTracker(errorTracker: ErrorTracker, msg: String): ErrorTracker = {
    if (errorTracker.maxRetries == -1) {
      ErrorTracker(errorTracker.retries, errorTracker.maxRetries, msg, new Date(), errorTracker.policy)
    } else {
      ErrorTracker(errorTracker.retries - 1, errorTracker.maxRetries, msg, new Date(), errorTracker.policy)
    }
  }

  private def handleError(f: Throwable, retries: Int, policy: ErrorPolicy): Unit = {
    policy.handle(f, true, retries)
  }
} 
Example 125
Source File: DataFrameReportPerformanceSpec.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperables.dataframe

import java.sql.Timestamp
import java.text.{DateFormat, SimpleDateFormat}
import java.util.TimeZone

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType}
import org.scalatest.{BeforeAndAfter, Ignore}

import io.deepsense.commons.utils.{DoubleUtils, Logging}
import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport}

// It's ignored because it does not have got assertions, it only prints report generation time.
@Ignore
class DataFrameReportPerformanceSpec
    extends DeeplangIntegTestSupport
    with BeforeAndAfter
    with TestFiles
    with Logging {
  val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv"

  "DataFrame" should {
    "generate report" when {
      "DataFrame has 17K of rows" in {
        val numberOfTries = 10
        var results: Seq[Double] = Seq()
        for (i <- 1 to numberOfTries) {
          val dataFrame: DataFrame = demandDataFrame()
          val start = System.nanoTime()
          val report = dataFrame.report
          val end = System.nanoTime()
          val time1: Double = (end - start).toDouble / 1000000000.0
          results = results :+ time1
          logger.debug("Report generation time: {}", DoubleUtils.double2String(time1))
        }
        logger.debug(
          "Mean report generation time: {}",
          DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble))
      }
    }
  }

  private def demandDataFrame(): DataFrame = {
    val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile)
    val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row)
    executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data)
  }

  private def demandSchema: StructType = StructType(Seq(
    StructField("datetime", TimestampType),
    StructField("log_count", DoubleType),
    StructField("workingday", DoubleType),
    StructField("holiday", DoubleType),
    StructField("season2", DoubleType),
    StructField("season3", DoubleType),
    StructField("season4", DoubleType)))

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
}

private object DataFrameHelpers {
  def demandString2Row(s: String): Row = {
    val split = s.split(",")
    Row(
      timestamp(split(0)),
      split(1).toDouble,
      split(2).toDouble,
      split(3).toDouble,
      split(4).toDouble,
      split(5).toDouble,
      split(6).toDouble
    )
  }

  private def timestamp(s: String): Timestamp = {
    val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    format.setTimeZone(TimeZone.getTimeZone("UTC"))
    new Timestamp(format.parse(s).getTime)
  }
} 
Example 126
Source File: ReceiptRenderer.scala    From apple-of-my-iap   with MIT License 5 votes vote down vote up
package com.meetup.iap.receipt

import java.text.SimpleDateFormat

import com.meetup.iap.AppleApi
import AppleApi.{ReceiptResponse, ReceiptInfo}

import java.util.{Date, TimeZone}

import org.json4s.JsonDSL._
import org.json4s.native.JsonMethods._
import org.json4s.JsonAST.JValue
import org.slf4j.LoggerFactory

object ReceiptRenderer {
  val log = LoggerFactory.getLogger(ReceiptRenderer.getClass)

  private def appleDateFormat(date: Date): String = {
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'Etc/GMT'")
    sdf.setTimeZone(TimeZone.getTimeZone("UTC"))
    sdf.format(date)
  }

  def apply(response: ReceiptResponse): String = {
    pretty(render(
      ("status" -> response.statusCode) ~
        ("latest_receipt_info" -> response.latestReceiptInfo.reverse.map(renderReceipt)) ~
        ("latest_receipt" -> response.latestReceipt)))
  }

  private def renderReceipt(receiptInfo: ReceiptInfo): JValue = {
    val origPurchaseDate = receiptInfo.originalPurchaseDate
    val origPurchaseDateStr = appleDateFormat(origPurchaseDate)
    val origPurchaseDateMs = origPurchaseDate.getTime

    val purchaseDate = receiptInfo.purchaseDate
    val purchaseDateStr = appleDateFormat(purchaseDate)
    val purchaseDateMs = purchaseDate.getTime

    val expiresDate = receiptInfo.expiresDate
    val expiresDateStr = appleDateFormat(expiresDate)
    val expiresDateMs = expiresDate.getTime

    val cancellationDate = receiptInfo.cancellationDate.map { date =>
      appleDateFormat(date)
    }
    ("quantity" -> "1") ~
      ("product_id" -> receiptInfo.productId) ~
      ("transaction_id" -> receiptInfo.transactionId) ~
      ("original_transaction_id" -> receiptInfo.originalTransactionId) ~
      ("purchase_date" -> purchaseDateStr) ~
      ("purchase_date_ms" -> purchaseDateMs.toString) ~
      ("original_purchase_date" -> origPurchaseDateStr) ~
      ("original_purchase_date_ms" -> origPurchaseDateMs.toString) ~
      ("expires_date" -> expiresDateStr) ~
      ("expires_date_ms" -> expiresDateMs.toString) ~
      ("is_trial_period" -> receiptInfo.isTrialPeriod.toString) ~ //We mimic Apple's weird json here by converting the boolean type to a string
      ("is_in_intro_offer_period" -> receiptInfo.isInIntroOfferPeriod.map(_.toString)) ~
      ("cancellation_date" -> cancellationDate)
  }
} 
Example 127
Source File: KafkaOffsetRevertTest.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.kafka

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp(true))
    var i = 0;
    while (true) {

      //      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
      //      System.exit(-1)
    }
  }

} 
Example 128
Source File: SlotPartitionMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.demo

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {

    val prop = Common.getProp
    prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("slot_partition", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

} 
Example 129
Source File: IntervalJoinKafkaKeyMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.intervalJoin

import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object IntervalJoinKafkaKeyMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("topic_left")
      right("topic_right")
      Thread.sleep(500)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

  var idRight = 0

  def right(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idRight = idRight + 1
    val map = Map("id" -> idRight,  "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis()))
    val jsonObject: JSONObject = new JSONObject(map)
    println("right : \t\t\t\t\t\t\t\t" + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
    producer.send(msg)
    producer.flush()
  }

} 
Example 130
Source File: CacheFile.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.tableJoin

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.scala._
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.configuration.Configuration
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

import scala.io.Source


object CacheFile {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
      env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt")
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
      // file and register name
      env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt")
    }
    // cache table


    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp)


    env.addSource(source)
      .map(json => {

        val id = json.get("id").asText()
        val phone = json.get("phone").asText()

        Tuple2(id, phone)
      })
      .map(new RichMapFunction[(String, String), String] {

        var cache = Map("" -> "")

        override def open(parameters: Configuration): Unit = {

          // read cache file
          val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt")
          if (file.canRead) {
            val context = Source.fromFile(file, "utf-8").getLines().toArray

           context.foreach(line => {
             val tmp = line.split(",")
             cache += (tmp(0) -> tmp(1))
           })
          }
        }

        override def map(value: (String, String)): String = {
          val name = cache.get(value._1)

          value._1 + "," + value._2 + "," + cache.get(value._1)
        }

      })
      .print()

    env.execute("cacheFile")

  }

} 
Example 131
Source File: StreamingFileSinkDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.{BulkWriter, SimpleStringEncoder}
import org.apache.flink.api.scala._
import org.apache.flink.core.fs.Path
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

object StreamingFileSinkDemo {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }

    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp)
    // row format
    val sinkRow = StreamingFileSink
      .forRowFormat(new Path("D:\\idea_out\\rollfilesink"), new SimpleStringEncoder[ObjectNode]("UTF-8"))
      .withBucketAssigner(new DayBucketAssigner)
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()

    // use define BulkWriterFactory and DayBucketAssinger
    val sinkBuck = StreamingFileSink
      .forBulkFormat(new Path("D:\\idea_out\\rollfilesink"), new DayBulkWriterFactory)
      .withBucketAssigner(new DayBucketAssigner())
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()


    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
//        json.get("date") + "-" + json.toString
        json
      })
      .addSink(sinkBuck)

    env.execute("StreamingFileSink")
  }

} 
Example 132
Source File: FileSinkMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.filesink

import java.text.SimpleDateFormat
import java.util.Calendar

import com.venn.common.Common
import com.venn.stream.api.dayWindow.CurrentDayMaker.{calendar, getCreateTime, sdf}
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject



object FileSinkMaker {
  val topic = "async"

  def main(args: Array[String]): Unit = {

    while (true) {

      left("roll_file_sink")
      Thread.sleep(100)
    }
  }

  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")

  var idLeft = 0

  def left(topic: String) = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    idLeft = idLeft + 1
    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime)
    val jsonObject: JSONObject = new JSONObject(map)
    println("left : " + jsonObject.toString())
    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
//    producer.send(msg)
//    producer.flush()
  }

  var minute : Int = 1
  val calendar: Calendar = Calendar.getInstance()
  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }

} 
Example 133
Source File: RollingFileSinkDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.fs.StringWriter
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.api.scala._


    val sink = new BucketingSink[String]("D:\\idea_out\\rollfilesink")
    sink.setBucketer(new DayBasePathBucketer)
    sink.setWriter(new StringWriter[String])
    sink.setBatchSize(1024 * 1024 * 400) // this is 400 MB,
    //    sink.setBatchRolloverInterval(24 * 60 * 60 * 1000) // this is 24 hour
//    sink.setInProgressPrefix("inProcessPre")
//    sink.setPendingPrefix("pendingpre")
//    sink.setPartPrefix("partPre")

    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
        json.get("date") + "-" + json.toString
      })
      .addSink(sink)

    env.execute("rollingFileSink")
  }

} 
Example 134
Source File: ProcessWindowForTrigger.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.trigger

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


object ProcessWindowDemoForTrigger {
  val logger = LoggerFactory.getLogger(this.getClass)

  def main(args: Array[String]): Unit = {
    // environment
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    if ("\\".equals(File.pathSeparator)) {
      val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR)
      env.setStateBackend(rock)
      // checkpoint interval
      env.enableCheckpointing(10000)
    }

    val topic = "current_day"
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

    val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
    val stream = env.addSource(kafkaSource)
      .map(s => {
        s
      })
      .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60)))
      .trigger(CountAndTimeTrigger.of(10, Time.seconds(10)))
      .process(new ProcessAllWindowFunction[String, String, TimeWindow] {
        override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {

          var count = 0

          elements.iterator.foreach(s => {
            count += 1
          })
          logger.info("this trigger have : {} item", count)
        }
      })

    // execute job
    env.execute(this.getClass.getName)
  }

} 
Example 135
Source File: WindowDemoMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.trigger

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
    //    minute = minute + 1
    calendar.add(Calendar.MILLISECOND, 10)
    sdf.format(calendar.getTime)
  }

  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i = 0;
    while (true) {
      val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      if (MathUtil.random.nextBoolean()) {
        Thread.sleep(1500)
      } else {
        Thread.sleep(500)

      }
      i = i + 1
      //      System.exit(-1)
    }
  }

} 
Example 136
Source File: TimeSpec.scala    From ez-framework   with Apache License 2.0 5 votes vote down vote up
package com.ecfront.ez.framework.core.misc

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.ecfront.ez.framework.core.BasicSpec

class TimeSpec extends BasicSpec {

  test("ZeroTimeOffset Test") {

    val dfd = new SimpleDateFormat("yyyyMMdd")

    def getZeroTimeOffset = {
      val currentTime = new Date()
      val currentDay = dfd.parse(dfd.format(currentTime))
      val calendar = Calendar.getInstance()
      calendar.setTime(currentDay)
      calendar.add(Calendar.DATE, 1)
      calendar.getTime.getTime - currentTime.getTime
    }

    println(getZeroTimeOffset)
    Thread.sleep(10000)
    println(getZeroTimeOffset)
    val calendar = Calendar.getInstance()
    calendar.setTimeInMillis(new Date().getTime + getZeroTimeOffset)
    println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS").format(calendar.getTime))

  }

} 
Example 137
Source File: CurrentDayPvCountWaterMark.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.dayWindow

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import com.venn.source.TumblingEventTimeWindows
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.triggers.{ContinuousEventTimeTrigger, ContinuousProcessingTimeTrigger, CountTrigger}
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}


      .assignAscendingTimestamps(event => sdf.parse(event.createTime).getTime)
      .windowAll(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8)))
      .reduce(new ReduceFunction[Eventx] {
        override def reduce(event1: Eventx, event2: Eventx): Eventx = {

//          println("reduce event : " +  event2.toString)
          //            val minId:String = if (event1.id.compareTo(event2.id) >= 0 ) event2.id else event1.id
          //            val maxId = if (event1.id.compareTo(event2.id) < 0 ) event1.id else event2.id
          //            val minCreateTime = if ( event1.createTime.compareTo(event2.createTime) >= 0 ) event2.createTime else event1.createTime
          //            val maxCreateTime = if ( event1.createTime.compareTo(event2.createTime) < 0 ) event1.createTime else event2.createTime
          //            val count = event1.count + event2.count
          //            new EventResult(minId, maxId, minCreateTime, maxCreateTime, count)
          new Eventx(event1.id , event2.id , event1.amt + event2.amt)
        }
      })
      // format output even, connect min max id, add current timestamp
//      .map(event => Event(event.id + "-" + event.createTime, sdf.format(System.currentTimeMillis()), event.count))
    stream.print("result : ")
    // execute job
    env.execute("CurrentDayCount")
  }

} 
Example 138
Source File: CurrentDayMaker.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.dayWindow

import java.text.SimpleDateFormat
import java.util.{Calendar, Date}

import com.venn.common.Common
import com.venn.util.MathUtil
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.util.parsing.json.JSONObject


  def getCreateTime(): String = {
//    minute = minute + 1
    calendar.add(Calendar.MINUTE, 10)
    sdf.format(calendar.getTime)
  }
  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

  def main(args: Array[String]): Unit = {
    val producer = new KafkaProducer[String, String](Common.getProp)
    calendar.setTime(new Date())
    println(sdf.format(calendar.getTime))
    var i =0;
    while (true) {

//      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
      val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
      val jsonObject: JSONObject = new JSONObject(map)
      println(jsonObject.toString())
      // topic current_day
      val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString())
      producer.send(msg)
      producer.flush()
      Thread.sleep(1000)
      i = i + 1
//      System.exit(-1)
    }
  }

} 
Example 139
Source File: PackageSpec.scala    From sparkpipe-core   with Apache License 2.0 5 votes vote down vote up
package software.uncharted.sparkpipe.ops.core.dataframe.temporal

import org.scalatest._
import software.uncharted.sparkpipe.Spark
import software.uncharted.sparkpipe.ops.core.rdd.toDF

import java.text.SimpleDateFormat
import java.sql.Timestamp

class PackageSpec extends FunSpec {
  describe("ops.core.dataframe.temporal") {
    val rdd = Spark.sc.parallelize(Seq(
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18").getTime), "2015-11-18", 1),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19").getTime), "2015-11-19", 2),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-20").getTime), "2015-11-20", 3),
      (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21").getTime), "2015-11-21", 4)
    ))
    val df = toDF(Spark.sparkSession)(rdd)

    describe("#dateFilter()") {
      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21"),
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 3)
      }

      it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range, specified using strings") {
        val df2 = dateFilter(
          "2015-11-19",
          "2015-11-20",
          "yyyy-MM-dd",
          "_2"
        )(df)
        assert(df2.count == 2)
      }

      it("should support filtering rows in an input DataFrame with a Timestamp timestamp column, based on a date range") {
        val df2 = dateFilter(
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-17"),
          new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18"),
          "_1"
        )(df)
        assert(df2.count == 1)
      }
    }

    describe("#parseDate()") {
      it("should facilitate converting a string timestamp column into a TimestampType and adding it as a new column") {
        val df2 = parseDate("_2", "new", "yyyy-MM-dd")(df)
        assert(df2.filter("new = _1").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }

    describe("#dateField()") {
      it("should facilitate extracting a single field from a Timestamp column, and placing it a new column") {
        val df2 = dateField("_1", "new", java.util.Calendar.YEAR)(df)
        assert(df2.filter("new = 2015").count == df.count)
        assert(df2.schema.size == df.schema.size+1)
      }
    }
  }
} 
Example 140
Source File: DateUtilsTest.scala    From bigdata-examples   with Apache License 2.0 5 votes vote down vote up
package com.timeyang.common.util

import java.text.SimpleDateFormat
import java.util.Calendar

import org.junit.Test


@Test
class DateUtilsTest {

  @Test
  def test(): Unit = {
    // scalastyle:off
    println(DateUtils.current())

    val formatter = new SimpleDateFormat("yyyyMMddHH")
    println(formatter.format(System.currentTimeMillis()))
    // scalastyle:on
  }

  @Test
  def testTime(): Unit = {
    val calendar = Calendar.getInstance()
    val hour = calendar.get(Calendar.HOUR_OF_DAY)
    // scalastyle:off println
    println(hour)
    // scalastyle:on println
    calendar.add(Calendar.MILLISECOND, 60 * 60 * 1000)
    val hourOfNext = calendar.get(Calendar.HOUR_OF_DAY)
    print(hourOfNext)
  }

} 
Example 141
Source File: RequestDSL.scala    From twitter4s   with Apache License 2.0 5 votes vote down vote up
package com.danielasfregola.twitter4s.helpers

import java.text.SimpleDateFormat
import java.util.Locale

import akka.http.scaladsl.model._
import akka.http.scaladsl.model.headers.RawHeader
import akka.testkit.TestProbe
import com.danielasfregola.twitter4s.entities.RateLimit
import org.specs2.specification.AfterEach

import scala.concurrent.duration._
import scala.concurrent.{Await, Future}

abstract class RequestDSL extends TestActorSystem with FixturesSupport with AfterEach {

  def after = system.terminate

  private val timeout = 10 seconds

  val headers = List(RawHeader("x-rate-limit-limit", "15"),
                     RawHeader("x-rate-limit-remaining", "14"),
                     RawHeader("x-rate-limit-reset", "1445181993"))

  val rateLimit = {
    val dateFormatter = new SimpleDateFormat("EEE MMM dd HH:mm:ss ZZZZ yyyy", Locale.ENGLISH)
    val resetDate = dateFormatter.parse("Sun Oct 18 15:26:33 +0000 2015").toInstant
    new RateLimit(limit = 15, remaining = 14, reset = resetDate)
  }

  protected val transport = TestProbe()

  def when[T](future: Future[T]): RequestMatcher[T] = new RequestMatcher(future)

  class RequestMatcher[T](future: Future[T]) {
    protected def responder = new Responder(future)

    def expectRequest(req: HttpRequest): Responder[T] = {
      transport.expectMsg(timeout, req)
      responder
    }

    def expectRequest(fn: HttpRequest => Unit) = {
      transport.expectMsgPF(timeout) {
        case req: HttpRequest => fn(req)
      }
      responder
    }
  }

  class Responder[T](future: Future[T]) {
    def respondWith(response: HttpResponse): Await[T] = {
      transport.reply(response)
      new Await(future)
    }

    def respondWith(resourcePath: String): Await[T] =
      respondWith(HttpResponse(StatusCodes.OK, entity = HttpEntity(MediaTypes.`application/json`, load(resourcePath))))

    def respondWithRated(resourcePath: String): Await[T] =
      respondWith(
        HttpResponse(StatusCodes.OK,
                     headers = headers,
                     entity = HttpEntity(MediaTypes.`application/json`, load(resourcePath))))

    def respondWithOk: Await[Unit] = {
      val response =
        HttpResponse(StatusCodes.OK, entity = HttpEntity(MediaTypes.`application/json`, """{"code": "OK"}"""))
      transport.reply(response)
      new Await(Future.successful((): Unit))
    }
  }

  class Await[T](future: Future[T]) {
    private[helpers] val underlyingFuture = future

    def await(implicit duration: FiniteDuration = 20 seconds) =
      Await.result(future, duration)
  }

  implicit def awaitToReqMatcher[T](await: Await[T]) =
    new RequestMatcher(await.underlyingFuture)

} 
Example 142
Source File: NetflixPrizeUtils.scala    From zen   with Apache License 2.0 5 votes vote down vote up
package com.github.cloudml.zen.examples.ml

import java.text.SimpleDateFormat
import java.util.{Locale, TimeZone}

import breeze.linalg.{SparseVector => BSV}
import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg.{SparseVector => SSV}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import scala.collection.mutable.ArrayBuffer

object NetflixPrizeUtils {

  def genSamplesWithTime(
    sc: SparkContext,
    input: String,
    numPartitions: Int = -1,
    newLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK):
  (RDD[(Long, LabeledPoint)], RDD[(Long, LabeledPoint)], Array[Long]) = {

    val probeFile = s"$input/probe.txt"
    val dataSetFile = s"$input/training_set
    val views = Array(maxUserId, maxMovieId + maxUserId, numFeatures).map(_.toLong)

    (trainSet, testSet, views)

  }
} 
Example 143
Source File: SparkStreamingAkkaTest.scala    From apache-spark-test   with Apache License 2.0 5 votes vote down vote up
package com.github.dnvriend.spark.streaming.akka

import java.text.SimpleDateFormat

import akka.actor.Props
import com.github.dnvriend.TestSpec
import org.apache.spark.streaming.akka.{ ActorReceiver, AkkaUtils }

import scala.concurrent.duration._

class CustomActor extends ActorReceiver {
  import context.dispatcher
  def ping() = context.system.scheduler.scheduleOnce(200.millis, self, "foo")
  def today: String = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS").format(new java.util.Date)
  def receive = counter(0)
  def counter(x: Long): Receive = {
    case _ =>
      store(s"counter: $x, msg: Today is $today, have a nice day!")
      context.become(counter(x + 1))
      ping()
  }
  ping()
}

class SparkStreamingAkkaTest extends TestSpec {
  it should "stream from an actor" in withStreamingContext() { spark => ssc =>
    import spark.implicits._
    val lines = AkkaUtils.createStream[String](ssc, Props[CustomActor](), "CustomReceiver")
    lines.foreachRDD { rdd =>
      rdd.toDF.show(truncate = false)
    }

    ssc.start()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
    advanceClockOneBatch(ssc)
    sleep()
  }
} 
Example 144
Source File: CreatePosts.scala    From apache-spark-test   with Apache License 2.0 5 votes vote down vote up
package com.github.dnvriend

import java.nio.file.Paths
import java.nio.file.StandardOpenOption._
import java.text.SimpleDateFormat
import java.util.Date

import akka.actor.{ ActorSystem, Terminated }
import akka.stream.scaladsl.{ FileIO, Source }
import akka.stream.{ ActorMaterializer, Materializer }
import akka.util.ByteString
import play.api.libs.json.Json

import scala.concurrent.{ ExecutionContext, Future }
import scala.util.Random

object CreatePosts extends App {
  implicit val system: ActorSystem = ActorSystem()
  implicit val mat: Materializer = ActorMaterializer()
  implicit val ec: ExecutionContext = system.dispatcher

  def terminate: Future[Terminated] =
    system.terminate()

  sys.addShutdownHook {
    terminate
  }

  object Post {
    implicit val format = Json.format[Post]
  }

  final case class Post(
    commentCount: Int,
    lastActivityDate: String,
    ownerUserId: Long,
    body: String,
    score: Int,
    creationDate: String,
    viewCount: Int,
    title: String,
    tags: String,
    answerCount: Int,
    acceptedAnswerId: Long,
    postTypeId: Long,
    id: Long
  )

  def rng = Random.nextInt(20000)

  def now: String = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date())

  val lorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam fringilla magna et pharetra vestibulum."
  val title = " Ut id placerat sapien. Aliquam vel metus orci."
  Source.fromIterator(() => Iterator from 0).map { id =>
    Post(rng, now, rng, List.fill(Random.nextInt(5))(lorem).mkString("\n"), rng, now, rng, s"$rng - $title", title, rng, rng, rng, id)
  }.map(Json.toJson(_).toString)
    .map(json => ByteString(json + "\n"))
    .take(1000000)
    .via(LogProgress.flow())
    .runWith(FileIO.toPath(Paths.get("/tmp/posts.json"), Set(WRITE, TRUNCATE_EXISTING, CREATE)))
    .flatMap { done =>
      println(done)
      terminate
    }.recoverWith {
      case cause: Throwable =>
        cause.printStackTrace()
        terminate
    }

} 
Example 145
Source File: Implicits.scala    From activemq-cli   with Apache License 2.0 5 votes vote down vote up
package activemq.cli.util

import com.typesafe.config.Config
import java.util.Date
import java.util.Locale
import java.text.SimpleDateFormat
import javax.jms.Message
import javax.jms.TextMessage
import scala.collection.JavaConversions._

object Implicits {

  implicit class RichConfig(val underlying: Config) extends AnyVal {
    def getOptionalString(path: String): Option[String] = if (underlying.hasPath(path)) {
      Some(underlying.getString(path))
    } else {
      None
    }
  }

  
  implicit def optionStringToBoolean(o: Option[String]): Boolean = {
    !o.getOrElse("").isEmpty
  }

  implicit class MessageImprovements(val message: Message) {

    val prettyPrinter = new scala.xml.PrettyPrinter(100000, 2) //scalastyle:ignore

    def toXML(timestampFormat: Option[String] = None): String = {

      val addOptional = (condition: Boolean, xml: scala.xml.Elem) ⇒ if (condition) xml else scala.xml.NodeSeq.Empty

      prettyPrinter.format(<jms-message>
                             <header>
                               <message-id>{ message.getJMSMessageID }</message-id>
                               { addOptional(Option(message.getJMSCorrelationID).isDefined, <correlation-id>{ message.getJMSCorrelationID }</correlation-id>) }
                               <delivery-mode>{ message.getJMSDeliveryMode }</delivery-mode>
                               <destination>{ message.getJMSDestination }</destination>
                               <expiration>{ message.getJMSExpiration }</expiration>
                               <priority>{ message.getJMSPriority }</priority>
                               <redelivered>{ message.getJMSRedelivered }</redelivered>
                               { addOptional(Option(message.getJMSReplyTo).isDefined, <reply-to>{ message.getJMSReplyTo }</reply-to>) }
                               <timestamp>{
                                 timestampFormat match {
                                   case Some(matched)⇒ new SimpleDateFormat(matched).format(new Date(message.getJMSTimestamp))
                                   case _⇒ message.getJMSTimestamp
                                 }
                               }</timestamp>
                               { addOptional(Option(message.getJMSType).isDefined, <type>{ message.getJMSType }</type>) }
                             </header>
                             {
                               addOptional(message.getPropertyNames.hasMoreElements, <properties> {
                                 message.getPropertyNames.map(name ⇒
                                   <property><name>{ name }</name><value>{ message.getStringProperty(name.toString) }</value></property>)
                               } </properties>)
                             }
                             {
                               message match {
                                 case textMessage: TextMessage if Option(textMessage.getText).isDefined ⇒ addOptional(
                                   textMessage.getText,
                                   <body>{ scala.xml.PCData(textMessage.getText.replaceAll("]]>", "]]]]><![CDATA[>")) }</body>
                                 )
                                 case _⇒ scala.xml.NodeSeq.Empty
                               }
                             }
                           </jms-message>)
    }

    def textMatches(regex: String): Boolean = {
      if (regex) {
        message match {
          case textMessage: TextMessage ⇒ (regex.r findFirstIn textMessage.getText)
          case _                        ⇒ false
        }
      } else {
        true
      }
    }
  }
} 
Example 146
Source File: DateUtils.scala    From sundial   with MIT License 5 votes vote down vote up
package util

import java.text.SimpleDateFormat
import java.util.Date
import java.util.concurrent.TimeUnit

object DateUtils {

  val basicDateTimeFormat = new SimpleDateFormat("MMM d, H:mm z")

  def prettyRelativeTime(when: Date, now: Date): String = {
    s"${prettyDuration(when, now)} ago"
  }

  def prettyDuration(start: Date, end: Date): String = {
    val diff = end.getTime - start.getTime
    prettyDuration(diff, TimeUnit.MILLISECONDS)
  }

  def prettyDuration(amount: Long, unit: TimeUnit): String = {
    if (unit.toSeconds(amount) < 120) {
      s"${unit.toSeconds(amount)} seconds"
    } else if (unit.toMinutes(amount) < 180) {
      s"${unit.toMinutes(amount)} minutes"
    } else if (unit.toHours(amount) < 72) {
      s"${unit.toHours(amount)} hours"
    } else {
      s"${unit.toDays(amount)} days"
    }
  }

} 
Example 147
Source File: Util.scala    From aardpfark   with Apache License 2.0 5 votes vote down vote up
package com.ibm.aardpfark.pfa.utils

import java.text.SimpleDateFormat
import java.util.{Date, TimeZone}

object Utils {
  def getCurrentDate = {
    val fmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'")
    fmt.setTimeZone(TimeZone.getTimeZone("UTC"))
    fmt.format(new Date())
  }

  def getCurrentTs = {
    new Date().getTime
  }
} 
Example 148
Source File: StreamingProducer.scala    From Scala-Programming-Projects   with MIT License 4 votes vote down vote up
package coinyser

import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.TimeZone

import cats.effect.IO
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.pusher.client.Client
import com.pusher.client.channel.SubscriptionEventListener
import com.typesafe.scalalogging.StrictLogging

object StreamingProducer extends StrictLogging {

  def subscribe(pusher: Client)(onTradeReceived: String => Unit): IO[Unit] =
    for {
      _ <- IO(pusher.connect())
      channel <- IO(pusher.subscribe("live_trades"))

      _ <- IO(channel.bind("trade", new SubscriptionEventListener() {
        override def onEvent(channel: String, event: String, data: String): Unit = {
          logger.info(s"Received event: $event with data: $data")
          onTradeReceived(data)
        }
      }))
    } yield ()


  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    // Very important: the storage must be in UTC
    sdf.setTimeZone(TimeZone.getTimeZone("UTC"))
    m.setDateFormat(sdf)
  }

  def deserializeWebsocketTransaction(s: String): WebsocketTransaction =
    mapper.readValue(s, classOf[WebsocketTransaction])

  def convertWsTransaction(wsTx: WebsocketTransaction): Transaction =
    Transaction(
      timestamp = new Timestamp(wsTx.timestamp.toLong * 1000), tid = wsTx.id,
      price = wsTx.price, sell = wsTx.`type` == 1, amount = wsTx.amount)

  def serializeTransaction(tx: Transaction): String =
    mapper.writeValueAsString(tx)

}