java.text.SimpleDateFormat Scala Examples
The following examples show how to use java.text.SimpleDateFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: JacksonMessageWriter.scala From drizzle-spark with Apache License 2.0 | 6 votes |
package org.apache.spark.status.api.v1 import java.io.OutputStream import java.lang.annotation.Annotation import java.lang.reflect.Type import java.nio.charset.StandardCharsets import java.text.SimpleDateFormat import java.util.{Calendar, SimpleTimeZone} import javax.ws.rs.Produces import javax.ws.rs.core.{MediaType, MultivaluedMap} import javax.ws.rs.ext.{MessageBodyWriter, Provider} import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} @Provider @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ val mapper = new ObjectMapper() { override def writeValueAsString(t: Any): String = { super.writeValueAsString(t) } } mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) mapper.enable(SerializationFeature.INDENT_OUTPUT) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) override def isWriteable( aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Boolean = { true } override def writeTo( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType, multivaluedMap: MultivaluedMap[String, AnyRef], outputStream: OutputStream): Unit = { t match { case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8)) case _ => mapper.writeValue(outputStream, t) } } override def getSize( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Long = { -1L } } private[spark] object JacksonMessageWriter { def makeISODateFormat: SimpleDateFormat = { val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'") val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) iso8601.setCalendar(cal) iso8601 } }
Example 2
Source File: PMMLModelExport.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import java.text.SimpleDateFormat import java.util.Date import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { @BeanProperty val pmml: PMML = { val version = getClass.getPackage.getImplementationVersion val app = new Application("Apache Spark MLlib").setVersion(version) val timestamp = new Timestamp() .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date())) val header = new Header() .setApplication(app) .setTimestamp(timestamp) new PMML("4.2", header, null) } }
Example 3
Source File: SimpleDateParam.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.text.{ParseException, SimpleDateFormat} import java.util.TimeZone import javax.ws.rs.WebApplicationException import javax.ws.rs.core.Response import javax.ws.rs.core.Response.Status private[v1] class SimpleDateParam(val originalValue: String) { val timestamp: Long = { val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz") try { format.parse(originalValue).getTime() } catch { case _: ParseException => val gmtDay = new SimpleDateFormat("yyyy-MM-dd") gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) try { gmtDay.parse(originalValue).getTime() } catch { case _: ParseException => throw new WebApplicationException( Response .status(Status.BAD_REQUEST) .entity("Couldn't parse date: " + originalValue) .build() ) } } } }
Example 4
Source File: Train.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.models.vgg import java.text.SimpleDateFormat import java.util.Date import com.intel.analytics.bigdl._ import com.intel.analytics.bigdl.dataset.DataSet import com.intel.analytics.bigdl.dataset.image._ import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, Module} import com.intel.analytics.bigdl.optim._ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric._ import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, OptimizerV1, OptimizerV2, T, Table} import com.intel.analytics.bigdl.visualization.{TrainSummary, ValidationSummary} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext object Train { LoggerFilter.redirectSparkInfoLogs() import Utils._ def main(args: Array[String]): Unit = { trainParser.parse(args, new TrainParams()).map(param => { val conf = Engine.createSparkConf().setAppName("Train Vgg on Cifar10") // Will throw exception without this config when has only one executor .set("spark.rpc.message.maxSize", "200") val sc = new SparkContext(conf) Engine.init val trainDataSet = DataSet.array(Utils.loadTrain(param.folder), sc) -> BytesToBGRImg() -> BGRImgNormalizer(trainMean, trainStd) -> BGRImgToBatch(param.batchSize) val model = if (param.modelSnapshot.isDefined) { Module.load[Float](param.modelSnapshot.get) } else { if (param.graphModel) VggForCifar10.graph(classNum = 10) else VggForCifar10(classNum = 10) } if (param.optimizerVersion.isDefined) { param.optimizerVersion.get.toLowerCase match { case "optimizerv1" => Engine.setOptimizerVersion(OptimizerV1) case "optimizerv2" => Engine.setOptimizerVersion(OptimizerV2) } } val optimMethod = if (param.stateSnapshot.isDefined) { OptimMethod.load[Float](param.stateSnapshot.get) } else { new SGD[Float](learningRate = param.learningRate, learningRateDecay = 0.0, weightDecay = param.weightDecay, momentum = 0.9, dampening = 0.0, nesterov = false, learningRateSchedule = SGD.EpochStep(25, 0.5)) } val optimizer = Optimizer( model = model, dataset = trainDataSet, criterion = new ClassNLLCriterion[Float]() ) val validateSet = DataSet.array(Utils.loadTest(param.folder), sc) -> BytesToBGRImg() -> BGRImgNormalizer(testMean, testStd) -> BGRImgToBatch(param.batchSize) if (param.checkpoint.isDefined) { optimizer.setCheckpoint(param.checkpoint.get, Trigger.everyEpoch) } if (param.overWriteCheckpoint) { optimizer.overWriteCheckpoint() } if (param.summaryPath.isDefined) { val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") val timeStamp = sdf.format(new Date()) val trainSummry = new TrainSummary(param.summaryPath.get, s"vgg-on-cifar10-train-$timeStamp") optimizer.setTrainSummary(trainSummry) val validationSummary = new ValidationSummary(param.summaryPath.get, s"vgg-on-cifar10-val-$timeStamp") optimizer.setValidationSummary(validationSummary) } optimizer .setValidation(Trigger.everyEpoch, validateSet, Array(new Top1Accuracy[Float])) .setOptimMethod(optimMethod) .setEndWhen(Trigger.maxEpoch(param.maxEpoch)) .optimize() sc.stop() }) } }
Example 5
Source File: WikiETL.scala From CarbonDataLearning with GNU General Public License v3.0 | 5 votes |
package org.github.xubo245.carbonDataLearning.etl import java.io.{File, PrintWriter} import java.text.SimpleDateFormat import java.util.Date import scala.io.Source import scala.util.Random object WikiETL { def main(args: Array[String]): Unit = { val directory = "/root/xubo/data" val files = new File(directory) val out = new PrintWriter("/root/xubo/data/pageviews-20150505time") var flag:Int = 10000000; var typeMap= Map (("b","wikibooks") ,("d","wiktionary") ,("m","wikimedia") ,("mw","wikipedia mobile") ,("n","wikinews") ,("q","wikiquote") ,("s","wikisource") ,("v","wikiversity") ,("w","mediawiki")) for (file <- files.listFiles().sorted.filter(_.getCanonicalFile.getName.contains("pageviews-20150505-"))) { val filePath = file.getCanonicalPath println(filePath) // val out = new PrintWriter(filePath + "WithTime") val reader = Source.fromFile(filePath) val fileName = file.getCanonicalFile.getName val delimiter = "\t" for (line <- reader.getLines()) { val stringBuffer = new StringBuffer() val random = new Random() val id = flag+random.nextInt(1000000) stringBuffer .append(id).append(delimiter) .append(fileName.substring(10, 14)).append(delimiter) .append(fileName.substring(14, 16)).append(delimiter) .append(fileName.substring(16, 18)).append(delimiter) .append(fileName.substring(19, 21)).append(delimiter) val array=line.mkString.split("\\s+") if (array.length == 4 && array(2).matches("[0-9]*") && !array(1).contains("\"")) { val domain = array(0).split('.') stringBuffer.append(domain(0)).append(delimiter) if (domain.length > 1) { var value: String = typeMap.getOrElse(domain(1), "wiki") stringBuffer.append(value).append(delimiter) } else { stringBuffer.append("wiki").append(delimiter) } val time = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()); val tid= id*10+random.nextInt(5) stringBuffer.append(array(1).replace('_',' ')).append(delimiter) .append(tid).append(delimiter) .append(array(2)).append(delimiter) .append(random.nextInt(100000)).append(delimiter) .append(time) // for (i <- 0 until array.length-1){ // stringBuffer.append(array(i)).append(delimiter) // } // stringBuffer.append(array(array.length-1)) // if (array.length == 4 && array(2).matches("[0-9]*")) { // id = id + 1 out.println(stringBuffer.toString) } } } out.close() } }
Example 6
Source File: DataConverter.scala From spark-cdm with MIT License | 5 votes |
package com.microsoft.cdm.utils import java.text.SimpleDateFormat import java.util.{Locale, TimeZone} import java.sql.Timestamp import org.apache.commons.lang.time.DateUtils import org.apache.spark.sql.catalyst.util.TimestampFormatter import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String class DataConverter() extends Serializable { val dateFormatter = new SimpleDateFormat(Constants.SINGLE_DATE_FORMAT) val timestampFormatter = TimestampFormatter(Constants.TIMESTAMP_FORMAT, TimeZone.getTimeZone("UTC")) val toSparkType: Map[CDMDataType.Value, DataType] = Map( CDMDataType.int64 -> LongType, CDMDataType.dateTime -> DateType, CDMDataType.string -> StringType, CDMDataType.double -> DoubleType, CDMDataType.decimal -> DecimalType(Constants.DECIMAL_PRECISION,0), CDMDataType.boolean -> BooleanType, CDMDataType.dateTimeOffset -> TimestampType ) def jsonToData(dt: DataType, value: String): Any = { return dt match { case LongType => value.toLong case DoubleType => value.toDouble case DecimalType() => Decimal(value) case BooleanType => value.toBoolean case DateType => dateFormatter.parse(value) case TimestampType => timestampFormatter.parse(value) case _ => UTF8String.fromString(value) } } def toCdmType(dt: DataType): CDMDataType.Value = { return dt match { case IntegerType => CDMDataType.int64 case LongType => CDMDataType.int64 case DateType => CDMDataType.dateTime case StringType => CDMDataType.string case DoubleType => CDMDataType.double case DecimalType() => CDMDataType.decimal case BooleanType => CDMDataType.boolean case TimestampType => CDMDataType.dateTimeOffset } } def dataToString(data: Any, dataType: DataType): String = { (dataType, data) match { case (_, null) => null case (DateType, _) => dateFormatter.format(data) case (TimestampType, v: Number) => timestampFormatter.format(data.asInstanceOf[Long]) case _ => data.toString } } }
Example 7
Source File: CORSFilter.scala From daf-semantics with Apache License 2.0 | 5 votes |
package it.almawave.kb.http.providers import javax.ws.rs.container.ContainerRequestContext import javax.ws.rs.container.ContainerResponseContext import javax.ws.rs.container.ContainerResponseFilter import javax.ws.rs.ext.Provider import java.text.SimpleDateFormat import java.util.Date import java.net.URI @Provider class CORSFilter extends ContainerResponseFilter { override def filter(request: ContainerRequestContext, response: ContainerResponseContext) { val headers = response.getHeaders() headers.add("Access-Control-Allow-Origin", "*") headers.add("Access-Control-Allow-Headers", "origin, content-type, accept, authorization") headers.add("Access-Control-Allow-Credentials", "true") headers.add("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS, HEAD") // custom headers // headers.add("Server", "Simple Jersey/Jetty HTTP server for RDF") // headers.add("Pragma", "Pragma: no-cache") // headers.add("Link", new URI("http://almawave.it")) } }
Example 8
Source File: EachRunNewFileAppender.scala From sddf with GNU General Public License v3.0 | 5 votes |
package de.unihamburg.vsis.sddf.logging import java.io.File import java.text.SimpleDateFormat import java.util.Date import org.apache.log4j.FileAppender import de.unihamburg.vsis.sddf.config.Config class EachRunNewFileAppender extends FileAppender { override def setFile(fileName: String, append: Boolean, bufferedIO: Boolean, bufferSize: Int) = { val oldFile = new File(fileName) val dir = if (oldFile.isDirectory()) oldFile else oldFile.getParentFile val fileSuffix = if (oldFile.isDirectory()) ".log" else oldFile.getName val newFileName = EachRunNewFileAppender.runUuid + fileSuffix val newLogFile = new File(dir, newFileName) super.setFile(newLogFile.getPath, append, bufferedIO, bufferSize) } } object EachRunNewFileAppender { val dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss"); val runUuid = System.currentTimeMillis().toString() + "-" + dateFormat.format(new Date()) }
Example 9
Source File: DataWorkCloudEngineApplication.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine import java.text.SimpleDateFormat import java.util.Date import com.webank.wedatasphere.linkis.DataWorkCloudApplication import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.engine.conf.EngineConfiguration import com.webank.wedatasphere.linkis.server.conf.ServerConfiguration import org.apache.commons.lang.StringUtils import org.slf4j.LoggerFactory object DataWorkCloudEngineApplication { val userName:String = System.getProperty("user.name") val hostName:String = Utils.getComputerName val appName:String = EngineConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue val prefixName:String = EngineConfiguration.ENGINE_LOG_PREFIX.getValue val timeStamp:Long = System.currentTimeMillis() private val timeFormat = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss") private val dateFormat = new SimpleDateFormat("yyyy-MM-dd") val time:String = timeFormat.format(new Date(timeStamp)) val date:String = dateFormat.format(new Date(timeStamp)) val isTimeStampSuffix:Boolean = "true".equalsIgnoreCase(EngineConfiguration.ENGINE_LOG_TIME_STAMP_SUFFIX.getValue) val shortLogFile:String = if (isTimeStampSuffix) appName + "_" + hostName + "_" + userName + "_" + time + ".log" else appName + "_" + hostName + "_" + userName + ".log" val logName:String = if(isTimeStampSuffix) prefixName + "/" + userName + "/" + shortLogFile else prefixName + "/" + shortLogFile System.setProperty("engineLogFile", logName) System.setProperty("shortEngineLogFile", shortLogFile) // System.setProperty("engineLogFile", logName) // val context:LoggerContext = LogManager.getContext(false).asInstanceOf[LoggerContext] // val path:String = getClass.getResource("/").getPath // val log4j2XMLFile:File = new File(path + "/log4j2-engine.xml") // val configUri:URI = log4j2XMLFile.toURI // context.setConfigLocation(configUri) private val logger = LoggerFactory.getLogger(getClass) logger.info(s"Now log4j2 Rolling File is set to be $logName") logger.info(s"Now shortLogFile is set to be $shortLogFile") def main(args: Array[String]): Unit = { val parser = DWCArgumentsParser.parse(args) DWCArgumentsParser.setDWCOptionMap(parser.getDWCConfMap) val existsExcludePackages = ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.getValue if(StringUtils.isEmpty(existsExcludePackages)) DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, "com.webank.wedatasphere.linkis.enginemanager") else DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, existsExcludePackages + ",com.webank.wedatasphere.linkis.enginemanager") DataWorkCloudApplication.main(DWCArgumentsParser.formatSpringOptions(parser.getSpringConfMap)) } }
Example 10
Source File: ApplicationUtil.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.application.util import java.text.SimpleDateFormat import java.util.Date import com.webank.wedatasphere.linkis.application.conf.{ApplicationConfiguration, ApplicationScalaConfiguration} object ApplicationUtil { def getFlowsJson(user:String,date:Date):String ={ val initExamplePath = ApplicationScalaConfiguration.INIT_EXAMPLE_PATH.getValue.toString + user + "/application/dataStudio/" val sqlName = ApplicationScalaConfiguration.INIT_EXAMPLE_SQL_NAME.getValue.toString val scalaName = ApplicationScalaConfiguration.INIT_EXAMPLE_SCALA_NAME.getValue.toString val spyName = ApplicationScalaConfiguration.INIT_EXAMPLE_SPY_NAME.getValue.toString val hqlName = ApplicationScalaConfiguration.INIT_EXAMPLE_HQL_NAME.getValue.toString val pythonName = ApplicationScalaConfiguration.INIT_EXAMPLE_PYTHON_NAME.getValue.toString val formateDate = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(date) s"""[{"id":1,"name":"Default business process(默认业务流程)","createTime":"$formateDate","lastUpdateTime":"","description":"Default business process(默认业务流程)","version":"1.0.0","owner":"$user","canPublished":false,"params":{},"relations":[],"projectChildren":[],"flowChildren":[],"nodeChildren":{"dataExchange":[],"dataStudio":[{"id":1,"name":"$sqlName","type":"${sqlName.substring(sqlName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + sqlName}"}},{"id":2,"name":"$scalaName","type":"${scalaName.substring(scalaName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + scalaName}"}},{"id":3,"name":"$spyName","type":"${spyName.substring(spyName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + spyName}"}},{"id":4,"name":"$hqlName","type":"${hqlName.substring(hqlName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + hqlName}"}},{"id":5,"name":"$pythonName","type":"${pythonName.substring(pythonName.lastIndexOf(".")+1)}","createTime":"$formateDate","lastUpdateTime":"","description":"","version":"1.0.0","owner":"$user","content":{"scriptPath":"${initExamplePath + pythonName}"}}],"dataBI":[],"resources":[]}}]""" } }
Example 11
Source File: DWSHttpClient.scala From Linkis with Apache License 2.0 | 5 votes |
class DWSHttpClient(clientConfig: DWSClientConfig, clientName: String) extends AbstractHttpClient(clientConfig, clientName) { override protected def createDiscovery(): Discovery = new DWSGatewayDiscovery override protected def prepareAction(requestAction: HttpAction): HttpAction = { requestAction match { case dwsAction: DWSHttpAction => dwsAction.setDWSVersion(clientConfig.getDWSVersion) case _ => } requestAction } override protected def httpResponseToResult(response: HttpResponse, requestAction: HttpAction, responseBody: String): Option[Result] = { var entity = response.getEntity val statusCode: Int = response.getStatusLine.getStatusCode val url: String = requestAction.getURL val contentType: String = entity.getContentType.getValue DWSHttpMessageFactory.getDWSHttpMessageResult(url).map { case DWSHttpMessageResultInfo(_, clazz) => clazz match { case c if ClassUtils.isAssignable(c, classOf[DWSResult]) => val dwsResult = clazz.getConstructor().newInstance().asInstanceOf[DWSResult] dwsResult.set(responseBody, statusCode, url, contentType) BeanUtils.populate(dwsResult, dwsResult.getData) return Some(dwsResult) case _ => } def transfer(value: Result, map: Map[String, Object]): Unit = { value match { case httpResult: HttpResult => httpResult.set(responseBody, statusCode, url, contentType) case _ => } val javaMap = mapAsJavaMap(map) BeanUtils.populate(value, javaMap) fillResultFields(javaMap, value) } deserializeResponseBody(response) match { case map: Map[String, Object] => val value = clazz.getConstructor().newInstance().asInstanceOf[Result] transfer(value, map) value case list: List[Map[String, Object]] => val results = list.map { map => val value = clazz.getConstructor().newInstance().asInstanceOf[Result] transfer(value, map) value }.toArray new ListResult(responseBody, results) } }.orElse(nonDWSResponseToResult(response, requestAction)) } protected def nonDWSResponseToResult(response: HttpResponse, requestAction: HttpAction): Option[Result] = None protected def fillResultFields(responseMap: util.Map[String, Object], value: Result): Unit = {} //TODO Consistent with workspace, plus expiration time(与workspace保持一致,加上过期时间) override protected def getFsByUser(user: String, path: FsPath): Fs = FSFactory.getFsByProxyUser(path, user) } object DWSHttpClient { val jacksonJson = new ObjectMapper().setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ")) }
Example 12
Source File: TypeCast.scala From spark-select with Apache License 2.0 | 5 votes |
package io.minio.spark.select.util import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.{SimpleDateFormat, NumberFormat} import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try @throws[IllegalArgumentException] private[select] def toChar(str: String): Char = { if (str.charAt(0) == '\\') { str.charAt(1) match { case 't' => '\t' case 'r' => '\r' case 'b' => '\b' case 'f' => '\f' case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options case '\'' => '\'' case 'u' if str == """\u0000""" => '\u0000' case _ => throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str") } } else if (str.length == 1) { str.charAt(0) } else { throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str") } } }
Example 13
Source File: Authorize.scala From keycloak-benchmark with Apache License 2.0 | 5 votes |
package io.gatling.keycloak import java.text.SimpleDateFormat import java.util.{Date, Collections} import akka.actor.ActorDSL.actor import akka.actor.ActorRef import io.gatling.core.action.Interruptable import io.gatling.core.action.builder.ActionBuilder import io.gatling.core.config.Protocols import io.gatling.core.result.writer.DataWriterClient import io.gatling.core.session._ import io.gatling.core.validation._ import org.jboss.logging.Logger import org.keycloak.adapters.spi.AuthOutcome import org.keycloak.adapters.KeycloakDeploymentBuilder import org.keycloak.adapters.spi.HttpFacade.Cookie import org.keycloak.common.enums.SslRequired import org.keycloak.representations.adapters.config.AdapterConfig import scala.collection.JavaConverters._ case class AuthorizeAttributes( requestName: Expression[String], uri: Expression[String], cookies: Expression[List[Cookie]], sslRequired: SslRequired = SslRequired.EXTERNAL, resource: String = null, password: String = null, realm: String = null, realmKey: String = null, authServerUrl: Expression[String] = _ => Failure("no server url") ) { def toAdapterConfig(session: Session) = { val adapterConfig = new AdapterConfig adapterConfig.setSslRequired(sslRequired.toString) adapterConfig.setResource(resource) adapterConfig.setCredentials(Collections.singletonMap("secret", password)) adapterConfig.setRealm(realm) adapterConfig.setRealmKey(realmKey) adapterConfig.setAuthServerUrl(authServerUrl(session).get) adapterConfig } } class AuthorizeActionBuilder(attributes: AuthorizeAttributes) extends ActionBuilder { def newInstance(attributes: AuthorizeAttributes) = new AuthorizeActionBuilder(attributes) def sslRequired(sslRequired: SslRequired) = newInstance(attributes.copy(sslRequired = sslRequired)) def resource(resource: String) = newInstance(attributes.copy(resource = resource)) def clientCredentials(password: String) = newInstance(attributes.copy(password = password)) def realm(realm: String) = newInstance(attributes.copy(realm = realm)) def realmKey(realmKey: String) = newInstance(attributes.copy(realmKey = realmKey)) def authServerUrl(authServerUrl: Expression[String]) = newInstance(attributes.copy(authServerUrl = authServerUrl)) override def build(next: ActorRef, protocols: Protocols): ActorRef = { actor(actorName("authorize"))(new AuthorizeAction(attributes, next)) } } object AuthorizeAction { val logger = Logger.getLogger(classOf[AuthorizeAction]) } class AuthorizeAction( attributes: AuthorizeAttributes, val next: ActorRef ) extends Interruptable with ExitOnFailure with DataWriterClient { override def executeOrFail(session: Session): Validation[_] = { val facade = new MockHttpFacade() val deployment = KeycloakDeploymentBuilder.build(attributes.toAdapterConfig(session)); facade.request.setURI(attributes.uri(session).get); facade.request.setCookies(attributes.cookies(session).get.map(c => (c.getName, c)).toMap.asJava) var nextSession = session val requestAuth: MockRequestAuthenticator = session(MockRequestAuthenticator.KEY).asOption[MockRequestAuthenticator] match { case Some(ra) => ra case None => val tmp = new MockRequestAuthenticator(facade, deployment, new MockTokenStore, -1, session.userId) nextSession = session.set(MockRequestAuthenticator.KEY, tmp) tmp } Blocking(() => { AuthorizeAction.logger.debugf("%s: Authenticating %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit) Stopwatch(() => requestAuth.authenticate()) .check(result => result == AuthOutcome.AUTHENTICATED, result => { AuthorizeAction.logger.warnf("%s: Failed auth %s%n", new SimpleDateFormat("HH:mm:ss,SSS").format(new Date()).asInstanceOf[Any], session("username").as[Any], Unit) result.toString }) .recordAndContinue(AuthorizeAction.this, nextSession, attributes.requestName(session).get) }) } }
Example 14
Source File: SessionServlet.scala From jboss-wildfly-test with Apache License 2.0 | 5 votes |
package servlet import java.text.SimpleDateFormat import java.util.Date import javax.servlet.annotation._ import javax.servlet.http._ @WebServlet(value = Array("/SessionServlet")) class SessionServlet extends HttpServlet { def formatTime(timestamp: Long): String = { val sdf = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.SSS") sdf.format(new Date(timestamp)) } override def doGet(request: HttpServletRequest, response: HttpServletResponse) { response.setContentType("text/html") response.setCharacterEncoding("UTF-8") val out = response.getWriter out.println("<h3>Session Test Example</h3>") val session = request.getSession(true) out.println( s""" |Session Id: ${session.getId} <br/> |Created: ${formatTime(session.getCreationTime)} <br/> |Last Accessed: ${formatTime(session.getLastAccessedTime)} <br/> """.stripMargin) Option(request.getParameter("dataname")).foreach { dataName ⇒ Option(request.getParameter("datavalue")).foreach { dataValue ⇒ session.setAttribute(dataName, dataValue); } } import scala.collection.JavaConversions._ val xs = session.getAttributeNames val sessionDataString = xs.map(name ⇒ s"$name = ${session.getAttribute(name)}").mkString("<br/>") out.println( s""" |<p> |The following data is in your session: <br/><br/> |$sessionDataString |</p> | |<p> |POST based form <br/> |<form action='${response.encodeURL("SessionServlet")}' method='post'> | Name of session attribute: <input type='text' size='20' name='dataname'/><br/> | Value of session attribute: <input type='text' size='20' name='datavalue'/><br/> | <input type='submit'/> |</form> |</p> | |<p> |GET based form <br/> |<form action='${response.encodeURL("SessionServlet")}' method='get'> | Name of session attribute: <input type='text' size='20' name='dataname'/><br/> | Value of session attribute: <input type='text' size='20' name='datavalue'/><br/> | <input type='submit'/> |</form> |</p> | |<p><a href='${response.encodeURL("SessionServlet?dataname=foo&datavalue=bar")}'>URL encoded</a> """.stripMargin) out.close() } override def doPost(req: HttpServletRequest, resp: HttpServletResponse): Unit = doGet(req, resp) }
Example 15
Source File: Total.scala From akka_streams_tutorial with MIT License | 5 votes |
package sample.stream_actor import java.text.SimpleDateFormat import java.util.{Date, TimeZone} import akka.Done import akka.actor.Actor import sample.stream_actor.Total.Increment object Total { case class Increment(value: Long, avg: Double, id: String) } class Total extends Actor { var total: Long = 0 override def receive: Receive = { case Increment(value, avg, id) => println(s"Received $value new measurements from turbine with id: $id - Avg wind speed is: $avg") total = total + value val date = new Date() val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") df.setTimeZone(TimeZone.getTimeZone("Europe/Zurich")) println(s"${df.format(date) } - Current total of all measurements: $total") sender ! Done } }
Example 16
Source File: Warn.scala From spatial with MIT License | 5 votes |
package emul import java.text.SimpleDateFormat import java.util.Calendar import java.io.PrintStream object Warn { val now = Calendar.getInstance().getTime val fmt = new SimpleDateFormat("dd_MM_yyyy_hh_mm_aa") val timestamp = fmt.format(now) var warns: Int = 0 lazy val log = new PrintStream(timestamp + ".log") def apply(x: => String): Unit = { log.println(x) warns += 1 } def close(): Unit = { if (warns > 0) { println(Warn.warns + " warnings occurred during program execution. See " + Warn.timestamp + ".log for details") log.close() } } }
Example 17
Source File: Worker.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package encry.local.miner import java.util.Date import akka.actor.{Actor, ActorRef} import encry.EncryApp._ import scala.concurrent.duration._ import encry.consensus.{CandidateBlock, ConsensusSchemeReaders} import encry.local.miner.Miner.MinedBlock import encry.local.miner.Worker.{MineBlock, NextChallenge} import java.text.SimpleDateFormat import com.typesafe.scalalogging.StrictLogging import org.encryfoundation.common.utils.constants.TestNetConstants class Worker(myIdx: Int, numberOfWorkers: Int, miner: ActorRef) extends Actor with StrictLogging { val sdf: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss") var challengeStartTime: Date = new Date(System.currentTimeMillis()) val initialNonce: Long = Long.MaxValue / numberOfWorkers * myIdx override def preRestart(reason: Throwable, message: Option[Any]): Unit = logger.warn(s"Worker $myIdx is restarting because of: $reason") override def receive: Receive = { case MineBlock(candidate: CandidateBlock, nonce: Long) => logger.info(s"Trying nonce: $nonce. Start nonce is: $initialNonce. " + s"Iter qty: ${nonce - initialNonce + 1} on worker: $myIdx with diff: ${candidate.difficulty}") ConsensusSchemeReaders .consensusScheme.verifyCandidate(candidate, nonce) .fold( e => { self ! MineBlock(candidate, nonce + 1) logger.info(s"Mining failed cause: $e") }, block => { logger.info(s"New block is found: (${block.header.height}, ${block.header.encodedId}, ${block.payload.txs.size} " + s"on worker $self at ${sdf.format(new Date(System.currentTimeMillis()))}. Iter qty: ${nonce - initialNonce + 1}") miner ! MinedBlock(block, myIdx) }) case NextChallenge(candidate: CandidateBlock) => challengeStartTime = new Date(System.currentTimeMillis()) logger.info(s"Start next challenge on worker: $myIdx at height " + s"${candidate.parentOpt.map(_.height + 1).getOrElse(TestNetConstants.PreGenesisHeight.toString)} at ${sdf.format(challengeStartTime)}") self ! MineBlock(candidate, Long.MaxValue / numberOfWorkers * myIdx) } } object Worker { case class NextChallenge(candidateBlock: CandidateBlock) case class MineBlock(candidateBlock: CandidateBlock, nonce: Long) }
Example 18
Source File: ISODateConverter.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.convert import java.text.SimpleDateFormat import java.time._ import hydra.common.logging.LoggingAdapter import org.apache.avro.{Conversion, LogicalType, Schema} import scala.util.Try class ISODateConverter extends Conversion[ZonedDateTime] with LoggingAdapter { private val utc = ZoneOffset.UTC override def getLogicalTypeName: String = IsoDate.IsoDateLogicalTypeName override def getConvertedType: Class[ZonedDateTime] = classOf[ZonedDateTime] private val simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX") override def fromCharSequence( value: CharSequence, schema: Schema, `type`: LogicalType ): ZonedDateTime = { Try(OffsetDateTime.parse(value).toInstant) .orElse { Try(LocalDateTime.parse(value).toInstant(ZoneOffset.UTC)) } .orElse { Try(simpleDateFormat.parse(value.toString).toInstant) } .recover { case e: Throwable => log.error(e.getMessage, e) Instant.EPOCH } .map(_.atZone(utc)) .get } } object IsoDate extends LogicalType("iso-datetime") { val IsoDateLogicalTypeName = "iso-datetime" override def validate(schema: Schema): Unit = { if (schema.getType() != Schema.Type.STRING) { throw new IllegalArgumentException( "Iso-datetime can only be used with an underlying string type" ) } } }
Example 19
Source File: Trip.scala From gihyo-spark-book-example with Apache License 2.0 | 5 votes |
package jp.gihyo.spark.ch05 import java.text.SimpleDateFormat case class Trip(id: Int, duration: Int, startDate: java.sql.Timestamp, startStation: String, startTerminal: Int, endDate: java.sql.Timestamp, endStation: String, endTerminal: Int, bikeNum: Int, subscriberType: String, zipcode: String) object Trip { def parse(line: String): Trip = { val dateFormat = new SimpleDateFormat("MM/dd/yyy HH:mm") val elms = line.split(",") val id = elms(0).toInt val duration = elms(1).toInt val startDate = new java.sql.Timestamp(dateFormat.parse(elms(2)).getTime) val startStation = elms(3) val startTerminal = elms(4).toInt val endDate = new java.sql.Timestamp(dateFormat.parse(elms(5)).getTime) val endStation = elms(6) val endTerminal = elms(7).toInt val bikeNum = elms(8).toInt val subscriberType = elms(9) val zipcode = elms(10) Trip(id, duration, startDate, startStation, startTerminal, endDate, endStation, endTerminal, bikeNum, subscriberType, zipcode) } }
Example 20
Source File: Station.scala From gihyo-spark-book-example with Apache License 2.0 | 5 votes |
package jp.gihyo.spark.ch05 import java.text.SimpleDateFormat case class Station(id: Int, name: String, lat: Double, lon: Double, dockcount: Int, landmark: String, installation: java.sql.Date) object Station { def parse(line: String): Station = { val dateFormat = new SimpleDateFormat("MM/dd/yyy") val elms = line.split(",") val id = elms(0).toInt val name = elms(1) val lat = elms(2).toDouble val lon = elms(3).toDouble val dockcount = elms(4).toInt val landmark = elms(5) val parsedInstallation = dateFormat.parse(elms(6)) val installation = new java.sql.Date(parsedInstallation.getTime) Station(id, name, lat, lon, dockcount, landmark, installation) } }
Example 21
Source File: StationSuite.scala From gihyo-spark-book-example with Apache License 2.0 | 5 votes |
package jp.gihyo.spark.ch05 import java.sql.Timestamp import java.text.SimpleDateFormat import org.scalatest.FunSuite class StationSuite extends FunSuite { test("should be parse") { val line = "2,San Jose Diridon Caltrain Station,37.329732,-121.901782,27,San Jose,8/6/2013" val station = Station.parse(line) val dateFormat = new SimpleDateFormat("MM/dd/yyy") assert(station.id === 2) assert(station.name === "San Jose Diridon Caltrain Station") assert(station.lat === 37.329732) assert(station.lon === -121.901782) assert(station.dockcount === 27) assert(station.landmark === "San Jose") assert(station.installation === new Timestamp(dateFormat.parse("8/6/2013").getTime)) } }
Example 22
Source File: IOUtils.scala From watr-works with Apache License 2.0 | 5 votes |
package edu.umass.cs.iesl.watr package utils object PathUtils { import ammonite.{ops => fs} import java.nio.{file => nio} def appendTimestamp(path: String): String = { import java.text.SimpleDateFormat import java.util.Date val dateStamp = new SimpleDateFormat("yyyyMMddhhmmss").format(new Date()) s"$path-$dateStamp" } def nioToAmm(nioPath: nio.Path): fs.Path = { fs.FilePath(nioPath) match { case p: fs.Path => p case p: fs.RelPath => fs.pwd / p case _ => ??? } } def strToAmmPath(str: String): fs.Path = { fs.FilePath(str) match { case p: fs.Path => p case p: fs.RelPath => fs.pwd / p case _ => ??? } } implicit class RicherPathUtils_String(val self: String) extends AnyVal { def toPath(): fs.Path = { strToAmmPath(self) } } implicit class RicherPathUtils_NioPath(val self: nio.Path) extends AnyVal { def toFsPath(): fs.Path = { nioToAmm(self) } } }
Example 23
Source File: GenericMainClass.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.app.spark import java.text.SimpleDateFormat import java.util.Date import com.typesafe.config.{Config, ConfigFactory} import org.apache.hadoop.fs.FileSystem import org.apache.spark.sql.SparkSession import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ trait GenericMainClass { self: SparkManager => val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager") private def makeFileSystem(session: SparkSession): FileSystem = { if (session.sparkContext.isLocal) { FileSystem.getLocal(session.sparkContext.hadoopConfiguration) } else { FileSystem.get(session.sparkContext.hadoopConfiguration) } } // scalastyle:off private def getGlobalConfig: Config = { genericMainClassLogger.debug("system environment vars") for ((k, v) <- System.getenv().asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v") genericMainClassLogger.debug("system properties") for ((k, v) <- System.getProperties.asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v") ConfigFactory.load() } // scalastyle:on }
Example 24
Source File: MetricsReporter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.text.SimpleDateFormat import com.codahale.metrics.{Gauge, MetricRegistry} import org.apache.spark.internal.Logging import org.apache.spark.metrics.source.{Source => CodahaleSource} import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.streaming.StreamingQueryProgress class MetricsReporter( stream: StreamExecution, override val sourceName: String) extends CodahaleSource with Logging { override val metricRegistry: MetricRegistry = new MetricRegistry // Metric names should not have . in them, so that all the metrics of a query are identified // together in Ganglia as a single metric group registerGauge("inputRate-total", _.inputRowsPerSecond, 0.0) registerGauge("processingRate-total", _.processedRowsPerSecond, 0.0) registerGauge("latency", _.durationMs.get("triggerExecution").longValue(), 0L) private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601 timestampFormat.setTimeZone(DateTimeUtils.getTimeZone("UTC")) registerGauge("eventTime-watermark", progress => convertStringDateToMillis(progress.eventTime.get("watermark")), 0L) registerGauge("states-rowsTotal", _.stateOperators.map(_.numRowsTotal).sum, 0L) registerGauge("states-usedBytes", _.stateOperators.map(_.memoryUsedBytes).sum, 0L) private def convertStringDateToMillis(isoUtcDateStr: String) = { if (isoUtcDateStr != null) { timestampFormat.parse(isoUtcDateStr).getTime } else { 0L } } private def registerGauge[T]( name: String, f: StreamingQueryProgress => T, default: T): Unit = { synchronized { metricRegistry.register(name, new Gauge[T] { override def getValue: T = Option(stream.lastProgress).map(f).getOrElse(default) }) } } }
Example 25
Source File: ExecutorNumListener.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor import java.text.SimpleDateFormat import java.util import java.util.Date import java.util.concurrent.atomic.AtomicBoolean import com.fasterxml.jackson.annotation.JsonIgnore import org.apache.spark.SparkContext import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{ SparkListener, SparkListenerExecutorAdded, SparkListenerExecutorRemoved } import org.apache.spark.util.kvstore.KVIndex class ExecutorNumListener extends SparkListener with Logging { lazy val kvstore = SparkContext.getActive.get.statusStore.store var initialized: AtomicBoolean = new AtomicBoolean(false) var lastPointTime: Long = new Date().getTime var recentEventTime: Long = new Date().getTime private val liveExecutors = new util.HashSet[String]() def initialize(): Unit = { SparkContext.getActive.map(_.ui).flatten.foreach { case ui => ui.attachTab(new ExecutorNumTab(ui)) ui.addStaticHandler("static", "/static/special") } } def maybeAddPoint(time: Long): Unit = { if (!initialized.get) { initialize() initialized.compareAndSet(false, true) } if (time - lastPointTime > 20 * 1000) { addPoint(recentEventTime) addPoint(time) lastPointTime = time } recentEventTime = time } def addPoint(time: Long): Unit = { val executorNum = liveExecutors.size kvstore.write(new ExecutorNumWrapper(new ExecutorNum( s"own ${executorNum} executors at ${new SimpleDateFormat("HH:mm:ss").format(new Date(time))}", IndexedSeq(time, executorNum)))) } override def onExecutorAdded(event: SparkListenerExecutorAdded): Unit = { liveExecutors.add(event.executorId) maybeAddPoint(event.time) } override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = { liveExecutors.remove(event.executorId) maybeAddPoint(event.time) } } private[spark] class ExecutorNumWrapper(val point: ExecutorNum) { @JsonIgnore @KVIndex def id: Long = point.value(0) } private[spark] class ExecutorNum(val name: String, val value: IndexedSeq[Long])
Example 26
Source File: ApplicationMonitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor.application import java.sql.{Connection, Timestamp} import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.TimeUnit import scala.concurrent.duration.Duration import org.apache.spark.alarm.AlertMessage import org.apache.spark.alarm.AlertType._ import org.apache.spark.monitor.Monitor import org.apache.spark.monitor.MonitorItem.MonitorItem abstract class ApplicationMonitor extends Monitor { override val alertType = Seq(Application) } class ApplicationInfo( title: MonitorItem, appName: String, appId: String, md5: String, startTime: Date, duration: Long, appUiUrl: String, historyUrl: String, eventLogDir: String, minExecutor: Int, maxExecutor: Int, executorCore: Int, executorMemoryMB: Long, executorAccu: Double, user: String) extends AlertMessage(title) { override def toCsv(): String = { s"${user},${appId}," + s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}," + s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}," + s"${executorMemoryMB},${executorCore},${executorAccu.formatted("%.2f")},${appName}" } // scalastyle:off override def toHtml(): String = { val html = <h1>任务完成! </h1> <h2>任务信息 </h2> <ul> <li>作业名:{appName}</li> <li>作业ID:{appId}</li> <li>开始时间:{new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime)}</li> <li>任务用时:{Duration(duration, TimeUnit.MILLISECONDS).toSeconds} s</li> </ul> <h2>资源用量</h2> <ul> <li>Executor个数:{minExecutor}~{maxExecutor}</li> <li>Executor内存:{executorMemoryMB} MB</li> <li>Executor核数:{executorCore}</li> <li>Executor累积用量:{executorAccu.formatted("%.2f")} executor*min</li> </ul> <h2>调试信息</h2> <ul> <li>回看链接1:<a href={appUiUrl.split(",").head}>{appUiUrl.split(",").head}</a></li> <li>回看链接2:<a href={historyUrl}>{historyUrl}</a></li> <li>日志文件所在目录:{eventLogDir}</li> </ul> html.mkString } override def toJdbc(conn: Connection, appId: String): Unit = { val query = "INSERT INTO `xsql_monitor`.`spark_history`(" + "`user`, `md5`, `appId`, `startTime`, `duration`, " + "`yarnURL`, `sparkHistoryURL`, `eventLogDir`, `coresPerExecutor`, `memoryPerExecutorMB`," + " `executorAcc`, `appName`, `minExecutors`, `maxExecutors`)" + " SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? FROM DUAL" + " WHERE NOT EXISTS (SELECT * FROM `xsql_monitor`.`spark_history` WHERE `appId` = ?);" val preparedStmt = conn.prepareStatement(query) preparedStmt.setString(1, user) preparedStmt.setString(2, md5) preparedStmt.setString(3, appId) preparedStmt.setTimestamp(4, new Timestamp(startTime.getTime)) preparedStmt.setLong(5, Duration(duration, TimeUnit.MILLISECONDS).toSeconds) preparedStmt.setString(6, appUiUrl) preparedStmt.setString(7, historyUrl) preparedStmt.setString(8, eventLogDir) preparedStmt.setInt(9, executorCore) preparedStmt.setLong(10, executorMemoryMB) preparedStmt.setDouble(11, executorAccu) preparedStmt.setString(12, appName) preparedStmt.setInt(13, minExecutor) preparedStmt.setInt(14, maxExecutor) preparedStmt.setString(15, appId) preparedStmt.execute } }
Example 27
Source File: SQLMonitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor.sql import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.TimeUnit import scala.concurrent.duration.Duration import org.apache.spark.alarm.AlertMessage import org.apache.spark.alarm.AlertType._ import org.apache.spark.monitor.Monitor import org.apache.spark.monitor.MonitorItem.MonitorItem abstract class SQLMonitor extends Monitor { override val alertType = Seq(SQL) } class SQLInfo( title: MonitorItem, sqlId: String, aeFlag: Boolean, appId: String, executionId: Long, submissionTime: Date, duration: Long) extends AlertMessage(title) { override def toCsv(): String = { s"${sqlId},${aeFlag},${appId},${executionId}," + s"${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(submissionTime)}," + s"${Duration(duration, TimeUnit.MILLISECONDS).toSeconds}" } }
Example 28
Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0 | 5 votes |
package org.zuinnote.spark.office.excel import java.math.BigDecimal import java.sql.Date import java.sql.Timestamp import java.text.DateFormat import java.text.SimpleDateFormat import java.util.Calendar import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.NullWritable import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.mapreduce.RecordWriter import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow } import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.OutputWriter import org.apache.spark.sql.types._ import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil import org.zuinnote.hadoop.office.format.mapreduce._ import org.apache.commons.logging.LogFactory import org.apache.commons.logging.Log import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import java.util.Locale import java.text.DecimalFormat import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO import java.text.NumberFormat // NOTE: This class is instantiated and used on executor side only, no need to be serializable. private[excel] class ExcelOutputWriter( path: String, dataSchema: StructType, context: TaskAttemptContext, options: Map[String, String]) extends OutputWriter { def write(row: Row): Unit = { // check useHeader if (useHeader) { val headers = row.schema.fieldNames var i = 0 for (x <- headers) { val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName) recordWriter.write(NullWritable.get(), headerColumnSCD) i += 1 } currentRowNum += 1 useHeader = false } // for each value in the row if (row.size>0) { var currentColumnNum = 0; val simpleObject = new Array[AnyRef](row.size) for (i <- 0 to row.size - 1) { // for each element of the row val obj = row.get(i) if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) { val formattedValue = obj.asInstanceOf[Seq[String]](0) val comment = obj.asInstanceOf[Seq[String]](1) val formula = obj.asInstanceOf[Seq[String]](2) val address = obj.asInstanceOf[Seq[String]](3) val sheetName = obj.asInstanceOf[Seq[String]](4) simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName) } else { simpleObject(i)=obj.asInstanceOf[AnyRef] } } // convert row to spreadsheetcellDAO val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum) // write it for (x<- spreadSheetCellDAORow) { recordWriter.write(NullWritable.get(), x) } } currentRowNum += 1 } override def close(): Unit = { recordWriter.close(context) currentRowNum = 0; } }
Example 29
Source File: BenchmarkUtil.scala From CodeAnalyzerTutorial with Apache License 2.0 | 5 votes |
package tutor.utils import java.text.SimpleDateFormat import java.util.Date import com.typesafe.scalalogging.StrictLogging object BenchmarkUtil extends StrictLogging { def record[T](actionDesc: String)(action: => T): T = { val beginTime = new Date logger.info(s"begin $actionDesc") val rs = action logger.info(s"end $actionDesc") val endTime = new Date val elapsed = new Date(endTime.getTime - beginTime.getTime) val sdf = new SimpleDateFormat("mm:ss.SSS") logger.info(s"$actionDesc total elapsed ${sdf.format(elapsed)}") rs } def recordStart(actionDesc: String):Date = { logger.info(s"$actionDesc begin") new Date } def recordElapse(actionDesc: String, beginFrom: Date):Unit = { logger.info(s"$actionDesc ended") val endTime = new Date val elapsed = new Date(endTime.getTime - beginFrom.getTime) val sdf = new SimpleDateFormat("mm:ss.SSS") logger.info(s"$actionDesc total elapsed ${sdf.format(elapsed)}") } }
Example 30
Source File: L3-DStreamMapping.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext } import org.apache.hadoop.io.{ Text, LongWritable, IntWritable } import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.lib.input.TextInputFormat import org.apache.spark.streaming.dstream.DStream import org.apache.hadoop.mapred.TextOutputFormat import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat } import org.apache.spark.streaming.dstream.PairDStreamFunctions import org.apache.log4j.LogManager import org.json4s._ import org.json4s.native.JsonMethods._ import java.text.SimpleDateFormat import java.util.Date object RedditMappingApp { def main(args: Array[String]) { if (args.length != 2) { System.err.println( "Usage: RedditMappingApp <appname> <input_path>") System.exit(1) } val Seq(appName, inputPath) = args.toSeq val LOG = LogManager.getLogger(this.getClass) val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(1)) LOG.info("Started at %d".format(ssc.sparkContext.startTime)) val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString) val sdf = new SimpleDateFormat("yyyy-MM-dd") val tsKey = "created_utc" val secs = 1000L val keyedByDay = comments.map(rec => { val ts = (parse(rec) \ tsKey).values (sdf.format(new Date(ts.toString.toLong * secs)), rec) }) val keyedByDayPart = comments.mapPartitions(iter => { var ret = List[(String, String)]() while (iter.hasNext) { val rec = iter.next val ts = (parse(rec) \ tsKey).values ret.::=(sdf.format(new Date(ts.toString.toLong * secs)), rec) } ret.iterator }) val wordTokens = comments.map(rec => { ((parse(rec) \ "body")).values.toString.split(" ") }) val wordTokensFlat = comments.flatMap(rec => { ((parse(rec) \ "body")).values.toString.split(" ") }) val filterSubreddit = comments.filter(rec => (parse(rec) \ "subreddit").values.toString.equals("AskReddit")) val sortedByAuthor = comments.transform(rdd => (rdd.sortBy(rec => (parse(rec) \ "author").values.toString))) ssc.start() ssc.awaitTermination() } }
Example 31
Source File: L3-DStreamKeyValue.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext } import org.apache.hadoop.io.{ Text, LongWritable, IntWritable } import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.lib.input.TextInputFormat import org.apache.spark.streaming.dstream.DStream import org.apache.hadoop.mapred.TextOutputFormat import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat } import org.apache.spark.streaming.dstream.PairDStreamFunctions import org.apache.log4j.LogManager import org.json4s._ import org.json4s.native.JsonMethods._ import java.text.SimpleDateFormat import java.util.Date import org.apache.spark.HashPartitioner object RedditKeyValueApp { def main(args: Array[String]) { if (args.length != 3) { System.err.println( "Usage: RedditKeyValueApp <appname> <input_path> <input_path_popular>") System.exit(1) } val Seq(appName, inputPath, inputPathPopular) = args.toSeq val LOG = LogManager.getLogger(this.getClass) val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(1)) LOG.info("Started at %d".format(ssc.sparkContext.startTime)) val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString) val popular = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPathPopular, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString) val topAuthors = comments.map(rec => ((parse(rec) \ "author").values.toString, 1)) .groupByKey() .map(r => (r._2.sum, r._1)) .transform(rdd => rdd.sortByKey(ascending = false)) val topAuthors2 = comments.map(rec => ((parse(rec) \ "author").values.toString, 1)) .reduceByKey(_ + _) .map(r => (r._2, r._1)) .transform(rdd => rdd.sortByKey(ascending = false)) val topAuthorsByAvgContent = comments.map(rec => ((parse(rec) \ "author").values.toString, (parse(rec) \ "body").values.toString.split(" ").length)) .combineByKey( (v) => (v, 1), (accValue: (Int, Int), v) => (accValue._1 + v, accValue._2 + 1), (accCombine1: (Int, Int), accCombine2: (Int, Int)) => (accCombine1._1 + accCombine2._1, accCombine1._2 + accCombine2._2), new HashPartitioner(ssc.sparkContext.defaultParallelism)) .map({ case (k, v) => (k, v._1 / v._2.toFloat) }) .map(r => (r._2, r._1)) .transform(rdd => rdd.sortByKey(ascending = false)) val keyedBySubreddit = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, rec)) val keyedBySubreddit2 = popular.map(rec => ({ val t = rec.split(",") (t(1).split("/")(4), t(0)) })) val commentsWithIndustry = keyedBySubreddit.join(keyedBySubreddit2) val keyedBySubredditCo = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, rec)) val keyedBySubredditCo2 = popular.map(rec => ({ val t = rec.split(",") (t(1).split("/")(4), t(0)) })) val commentsWithIndustryCo = keyedBySubreddit.cogroup(keyedBySubreddit2) val checkpointPath = "/tmp" ssc.checkpoint(checkpointPath) val updateFunc = (values: Seq[Int], state: Option[Int]) => { val currentCount = values.sum val previousCount = state.getOrElse(0) Some(currentCount + previousCount) } val keyedBySubredditState = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, 1)) val globalCount = keyedBySubredditState.updateStateByKey(updateFunc) .map(r => (r._2, r._1)) .transform(rdd => rdd.sortByKey(ascending = false)) ssc.start() ssc.awaitTermination() } }
Example 32
Source File: L3-DStreamVariation.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext } import org.apache.hadoop.io.{ Text, LongWritable, IntWritable } import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.lib.input.TextInputFormat import org.apache.spark.streaming.dstream.DStream import org.apache.hadoop.mapred.TextOutputFormat import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat } import org.apache.spark.streaming.dstream.PairDStreamFunctions import org.apache.log4j.LogManager import org.json4s._ import org.json4s.native.JsonMethods._ import java.text.SimpleDateFormat import java.util.Date object RedditVariationApp { def main(args: Array[String]) { if (args.length != 2) { System.err.println( "Usage: RedditVariationApp <appname> <input_path>") System.exit(1) } val Seq(appName, inputPath) = args.toSeq val LOG = LogManager.getLogger(this.getClass) val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(1)) LOG.info("Started at %d".format(ssc.sparkContext.startTime)) val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString) val merged = comments.union(comments) val repartitionedComments = comments.repartition(4) val rddMin = comments.glom().map(arr => arr.minBy(rec => ((parse(rec) \ "created_utc").values.toString.toInt))) ssc.start() ssc.awaitTermination() } }
Example 33
Source File: L3-DStreamWindowAndAction.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext } import org.apache.hadoop.io.{ Text, LongWritable, IntWritable } import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.lib.input.TextInputFormat import org.apache.spark.streaming.dstream.DStream import org.apache.hadoop.mapred.TextOutputFormat import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat } import org.apache.spark.streaming.dstream.PairDStreamFunctions import org.apache.log4j.LogManager import org.json4s._ import org.json4s.native.JsonMethods._ import java.text.SimpleDateFormat import java.util.Date import org.apache.spark.HashPartitioner object RedditWindowAndActionApp { def main(args: Array[String]) { if (args.length != 2) { System.err.println( "Usage: RedditWindowAndActionApp <appname> <input_path>") System.exit(1) } val Seq(appName, inputPath) = args.toSeq val LOG = LogManager.getLogger(this.getClass) val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(1)) LOG.info("Started at %d".format(ssc.sparkContext.startTime)) val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString) val checkpointPath = "/tmp" ssc.checkpoint(checkpointPath) val updateFunc = (values: Seq[Int], state: Option[Int]) => { val currentCount = values.sum val previousCount = state.getOrElse(0) Some(currentCount + previousCount) } val keyedBySubredditState = comments.map(rec => (((parse(rec)) \ "subreddit").values.toString, 1)) val globalCount = keyedBySubredditState.updateStateByKey(updateFunc) .map(r => (r._2, r._1)) .transform(rdd => rdd.sortByKey(ascending = false)) val distinctSubreddits = comments.map(rec => ((parse(rec)) \ "subreddit").values.toString) val windowedRecs = distinctSubreddits.window(Seconds(5), Seconds(5)) val windowedCounts = windowedRecs.countByValue() windowedCounts.print(10) windowedCounts.saveAsObjectFiles("subreddit", "obj") windowedCounts.saveAsTextFiles("subreddit", "txt") globalCount.saveAsHadoopFiles("subreddit", "hadoop", classOf[IntWritable], classOf[Text], classOf[TextOutputFormat[IntWritable, Text]]) globalCount.saveAsNewAPIHadoopFiles("subreddit", "newhadoop", classOf[IntWritable], classOf[Text], classOf[NewTextOutputFormat[IntWritable, Text]]) comments.foreachRDD(rdd => { LOG.info("RDD: %s, Count: %d".format(rdd.id, rdd.count())) }) ssc.start() ssc.awaitTermination() } }
Example 34
Source File: L3-DStreamAggregation.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext } import org.apache.hadoop.io.{ Text, LongWritable, IntWritable } import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.lib.input.TextInputFormat import org.apache.spark.streaming.dstream.DStream import org.apache.hadoop.mapred.TextOutputFormat import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat } import org.apache.spark.streaming.dstream.PairDStreamFunctions import org.apache.log4j.LogManager import org.json4s._ import org.json4s.native.JsonMethods._ import java.text.SimpleDateFormat import java.util.Date object RedditAggregationApp { def main(args: Array[String]) { if (args.length != 2) { System.err.println( "Usage: RedditAggregationApp <appname> <input_path>") System.exit(1) } val Seq(appName, inputPath) = args.toSeq val LOG = LogManager.getLogger(this.getClass) val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(1)) LOG.info("Started at %d".format(ssc.sparkContext.startTime)) val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString) val recCount = comments.count() val recCountValue = comments.countByValue() val totalWords = comments.map(rec => ((parse(rec) \ "body").values.toString)) .flatMap(body => body.split(" ")) .map(word => 1) .reduce(_ + _) ssc.start() ssc.awaitTermination() } }
Example 35
Source File: package.scala From sbt-flaky with Apache License 2.0 | 5 votes |
package flaky import java.io.File import java.text.SimpleDateFormat import java.util.Date import scalatags.Text import scalatags.Text.all.{a, hr, href, p, _} package object web { def footer(): Text.TypedTag[String] = { p( hr(), p( ReportCss.footer, "Created with ", a(href := "https://github.com/otrebski/sbt-flaky", "sbt-flaky plugin"), br, s"Report generated at ${new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())}", s"Fugue icons are on Creative Common license" ) ) } def indexHtml(reportFile: File, historyFile: Option[File]): String = { val history = historyFile match { case Some(fileName) => a(href := fileName.getName, "History trends") case None => p( "History trends report is not created. To enable history check documentation at ", a(href := "https://github.com/otrebski/sbt-flaky", "https://github.com/otrebski/sbt-flaky") ) } html( head(link(rel := "stylesheet", href := "report.css")), body( h1(ReportCss.title, "Flaky test report"), h4(ReportCss.subtitle, a(href := reportFile.getName, "Report for last build")), h4(ReportCss.subtitle, history), footer() ) ).render } def anchorTest(test: Test): String = s"${test.clazz}_${test.test}" def anchorClass(test: Test): String = test.clazz def anchorTestRun(testCase: TestCase): String = testCase.runName def singleTestDir(test: Test): String = test.clazz def singleTestFileName(test: Test): String = s"${test.test.replaceAll("/", "_")}.html" def linkToSingleTest(test: Test): String = singleTestDir(test) + "/" + singleTestFileName(test) def linkToSingleTestClass(clazz: String): String = s"flaky-report.html#$clazz" def linkToRunNameInSingleTest(test: Test,runName:String) = s"${linkToSingleTest(test)}#$runName" }
Example 36
Source File: History.scala From sbt-flaky with Apache License 2.0 | 5 votes |
package flaky.history import java.io.{File, FileFilter, InputStream} import java.text.SimpleDateFormat import java.util.Date import flaky.{Flaky, FlakyTestReport, Io} import org.apache.commons.vfs2.VFS import scala.xml.XML class History(project: String, historyDir: File, flakyReportDir: File, projectDir: File) { private val zipFileFilter = new FileFilter { override def accept(pathname: File): Boolean = pathname.getName.endsWith(".zip") } private def runFiles(historyDir: File): List[File] = historyDir.listFiles(zipFileFilter).toList def addCurrentToHistory(): Unit = { val timestamp = System.currentTimeMillis() val date = new SimpleDateFormat(History.dateFormat).format(new Date(timestamp)) val gitCommit = Git(projectDir).currentId().toOption val historyReportDescription = HistoryReportDescription(timestamp, gitCommit) HistoryReportDescription.save(historyReportDescription, new File(flakyReportDir, History.descriptorFile)) Zip.compressFolder(new File(historyDir, s"$date.zip"), flakyReportDir) } def removeToOldFromHistory(maxToKeep: Int): Unit = { runFiles(historyDir) .take(Math.max(runFiles(historyDir).size - maxToKeep, 0)) .foreach(_.delete()) } def createHistoryReport(): HistoryReport = { val historicalRuns: List[HistoricalRun] = runFiles(historyDir) .map(History.loadHistory) val date = new SimpleDateFormat("HH:mm dd-MM-YYYY").format(new Date()) HistoryReport(project, date, historicalRuns) } def processHistory(): HistoryReport = { historyDir.mkdirs() addCurrentToHistory() removeToOldFromHistory(20) createHistoryReport() } } case class HistoryReportDescription(timestamp: Long, gitCommitHash: Option[String]) object HistoryReportDescription { def load(in: InputStream): HistoryReportDescription = { val descriptorXml = XML.load(in) val timestamp = (descriptorXml \ "timestamp").text.trim.toLong val gitHash = (descriptorXml \ "gitCommitHash").text.trim HistoryReportDescription(timestamp, Some(gitHash)) } def save(historyReportDescription: HistoryReportDescription, file: File): Unit = { val xml = <HistoryReportDescription> <timestamp> {historyReportDescription.timestamp} </timestamp> <gitCommitHash> {historyReportDescription.gitCommitHash.getOrElse("")} </gitCommitHash> </HistoryReportDescription> val prettyXml = new scala.xml.PrettyPrinter(80, 2).format(xml) Io.writeToFile(file, prettyXml) } } object History { val descriptorFile = "descriptor.xml" val dateFormat = "yyyyMMdd-HHmmss" def loadHistory: (File) => HistoricalRun = { file => { val manager = VFS.getManager val uri = file.toURI.toString.replace("file:/", "zip:/") val fo = manager.resolveFile(uri) val report: FlakyTestReport = Flaky.createReportFromHistory(fo) val descriptorFile = Option(fo.getChild(History.descriptorFile)) val dateFromFileName = file.getName.replace(".zip","") val hrd = descriptorFile .filter(_.exists()) .map(f => HistoryReportDescription.load(f.getContent.getInputStream)) .getOrElse(HistoryReportDescription(new SimpleDateFormat(dateFormat).parse(dateFromFileName).getTime, None)) HistoricalRun(hrd, report) } } }
Example 37
Source File: HistorySpec.scala From sbt-flaky with Apache License 2.0 | 5 votes |
package flaky.history import java.io.File import java.text.SimpleDateFormat import org.scalatest.{Matchers, WordSpec} class HistorySpec extends WordSpec with Matchers { val fileWithDescriptor = "20170516-072750.zip" val fileWithoutDescriptor = "20170516-072825.zip" val dirWithReports = new File("./src/test/resources/history") "HistoryTest" should { "loadHistory with descriptor" in { val historicalRun: HistoricalRun = History.loadHistory.apply(new File(dirWithReports, fileWithDescriptor)) historicalRun.historyReportDescription shouldBe HistoryReportDescription(123456L, Some("abcdefg")) } "loadHistory without descriptor" in { //Timestamp can't be hardcoded, because loadHistory tries to parse date from file name // with local time zone val timestamp = new SimpleDateFormat("yyyyMMdd-HHmmss").parse("20170516-072825").getTime val historicalRun: HistoricalRun = History.loadHistory.apply(new File(dirWithReports, fileWithoutDescriptor)) historicalRun.historyReportDescription shouldBe HistoryReportDescription(timestamp, None) } } }
Example 38
Source File: FieldSequentialValue.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.test import java.text.SimpleDateFormat import java.util.Date import org.schedoscope.dsl.{FieldLike, Structure} object FieldSequentialValue { def get(f: FieldLike[_], i: Int, p: String): Any = { if (f.t == manifest[Int]) i else if (f.t == manifest[Long]) i.toLong else if (f.t == manifest[Byte]) i.toByte else if (f.t == manifest[Boolean]) i % 2 == 0 else if (f.t == manifest[Double]) i.toDouble else if (f.t == manifest[Float]) i.toFloat else if (f.t == manifest[Date]) new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date(i * 1000L)) else if (f.t == manifest[String]) f.n + "-" + p.format(i) else if (classOf[Structure].isAssignableFrom(f.t.runtimeClass)) { f.t.runtimeClass.newInstance().asInstanceOf[Structure].fields.map(sf => (sf.n, get(sf, i, p))).toMap } else if (f.t.runtimeClass == classOf[List[_]]) { List() } else if (f.t.runtimeClass == classOf[Map[_, _]]) Map() else throw new RuntimeException("Cannot generate random values for: " + f.n + ", type is: " + f.t) } }
Example 39
Source File: Globals.scala From schedoscope with Apache License 2.0 | 5 votes |
package schedoscope.example.osm import java.text.SimpleDateFormat import java.util.Date import org.schedoscope.Settings import org.schedoscope.dsl.View import org.schedoscope.dsl.views.MonthlyParameterization object Globals { def defaultHiveQlParameters(v: View) = { val baseParameters = Map( "env" -> v.env, "workflow_time" -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date), "workflow_name" -> v.getClass().getName()) if (v.isInstanceOf[MonthlyParameterization]) baseParameters ++ Map( "year" -> v.asInstanceOf[MonthlyParameterization].year.v.get, "month" -> v.asInstanceOf[MonthlyParameterization].month.v.get) else baseParameters } def defaultPigProperties(v: View) = Map( "exec.type" -> "MAPREDUCE", "mapred.job.tracker" -> Settings().jobTrackerOrResourceManager, "fs.default.name" -> Settings().nameNode, "workflow_time" -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date), "workflow_name" -> v.getClass().getName()) }
Example 40
Source File: CustomTelemetryService.scala From finagle-prometheus with MIT License | 5 votes |
package com.samstarling.prometheusfinagle.examples import java.text.SimpleDateFormat import java.util.Calendar import com.samstarling.prometheusfinagle.metrics.Telemetry import com.twitter.finagle.Service import com.twitter.finagle.http.{Request, Response, Status} import com.twitter.util.Future class CustomTelemetryService(telemetry: Telemetry) extends Service[Request, Response] { private val dayOfWeekFormat = new SimpleDateFormat("E") private val counter = telemetry.counter("requests_by_day_of_week", "Help text", Seq("day_of_week")) override def apply(request: Request): Future[Response] = { dayOfWeek counter.labels(dayOfWeek).inc() val rep = Response(request.version, Status.Ok) rep.setContentString("Your request was logged!") Future(rep) } private def dayOfWeek: String = { dayOfWeekFormat.format(Calendar.getInstance.getTime) } }
Example 41
Source File: DirectDataInjector.scala From SparkOnKudu with Apache License 2.0 | 5 votes |
package org.kududb.spark.demo.gamer.cdc import java.text.SimpleDateFormat import java.util.Random import org.kududb.client.{PartialRow, Operation, KuduClient} import org.kududb.spark.demo.gamer.aggregates.GamerDataGenerator class DirectDataInjector { val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy") val random = new Random def main(args:Array[String]): Unit = { if (args.length == 0) { println("<kuduMaster> <tableName> <numberOfRecords>") return } val kuduMaster = args(0) val tableName = args(1) val numberOfRecords = args(2).toInt val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build() val table = kuduClient.openTable(tableName) val session = kuduClient.newSession() for (i <- 0 to numberOfRecords) { val record = GamerDataGenerator.makeNewGamerRecord(100000) val pr = new PartialRow(table.getSchema) pr.addString(0, "record.gamerId") pr.addString(1, "") val scannerRows = kuduClient.newScannerBuilder(table).lowerBound(null).limit(1).build().nextRows() val op:Operation = if (scannerRows.hasNext) { val oldRow = scannerRows.next() val oldRecordUpdateOp = table.newInsert() val row = oldRecordUpdateOp.getRow row.addString("gamer_id", oldRow.getString("gamer_id")) row.addString("eff_to", simpleDateFormat.format(System.currentTimeMillis())) row.addString("eff_from", oldRow.getString("eff_from")) row.addLong("last_time_played", oldRow.getLong("last_time_played")) row.addInt("games_played", oldRow.getInt("games_played")) row.addInt("games_won", oldRow.getInt("games_won")) row.addInt("oks", oldRow.getInt("oks")) row.addInt("deaths", oldRow.getInt("deaths")) row.addInt("damage_given", oldRow.getInt("damage_given")) row.addInt("damage_taken", oldRow.getInt("damage_taken")) row.addInt("max_oks_in_one_game", oldRow.getInt("max_oks_in_one_game")) row.addInt("max_deaths_in_one_game", oldRow.getInt("max_deaths_in_one_game")) session.apply(oldRecordUpdateOp) table.newUpdate() } else { table.newInsert() } val row = op.getRow row.addString("gamer_id", record.gamerId) row.addString("eff_to", "") row.addString("eff_from", simpleDateFormat.format(System.currentTimeMillis())) row.addLong("last_time_played", record.lastTimePlayed) row.addInt("games_played", record.gamesPlayed) row.addInt("games_won", record.gamesWon) row.addInt("oks", record.oks) row.addInt("deaths", record.deaths) row.addInt("damage_given", record.damageGiven) row.addInt("damage_taken", record.damageTaken) row.addInt("max_oks_in_one_game", record.maxOksInOneGame) row.addInt("max_deaths_in_one_game", record.maxDeathsInOneGame) session.apply(op) } session.flush() kuduClient.close() } }
Example 42
Source File: DirectDataMultiThreadedInjector.scala From SparkOnKudu with Apache License 2.0 | 5 votes |
package org.kududb.spark.demo.gamer.cdc import java.text.SimpleDateFormat import java.util.Random import java.util.concurrent.atomic.AtomicInteger import java.util.concurrent.{TimeUnit, Executors} import org.kududb.client.{Operation, PartialRow, KuduClient} import org.kududb.spark.demo.gamer.aggregates.GamerDataGenerator object DirectDataMultiThreadedInjector { val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy") val random = new Random def main(args:Array[String]): Unit = { if (args.length == 0) { println("<kuduMaster> <tableName> <numberOfRecords> <numberOfThreads>") return } val kuduMaster = args(0) val tableName = args(1) val numberOfRecords = args(2).toInt val executor = Executors.newFixedThreadPool(args(3).toInt) val numberOfGamers = args(4).toInt val sleepTime = args(5).toInt val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build() val leftToRun = new AtomicInteger() for (i <- 0 to numberOfRecords) { leftToRun.incrementAndGet() executor.execute(new ApplyNewRecordRunnable(GamerDataGenerator.makeNewGamerRecord(numberOfGamers), kuduClient, tableName, leftToRun)) println("Summited:" + i) Thread.sleep(sleepTime) } val startTime = System.currentTimeMillis() while (!executor.awaitTermination(10000, TimeUnit.SECONDS)) { val newTime = System.currentTimeMillis() println("> Still Waiting: {Time:" + (newTime - startTime) + ", LeftToRun:" + leftToRun + "}" ) } kuduClient.close() } }
Example 43
Source File: ApplyNewRecordRunnable.scala From SparkOnKudu with Apache License 2.0 | 5 votes |
package org.kududb.spark.demo.gamer.cdc import java.text.SimpleDateFormat import java.util.concurrent.atomic.AtomicInteger import org.kududb.client.{Operation, PartialRow, KuduClient} import org.kududb.spark.demo.gamer.GamerEvent class ApplyNewRecordRunnable(val gameEvent: GamerEvent, val kuduClient: KuduClient, val tableName: String, val leftToRun:AtomicInteger) extends Runnable{ override def run(): Unit = { val table = kuduClient.openTable(tableName) val session = kuduClient.newSession() val simpleDateFormat = new SimpleDateFormat("MM,dd,yyyy") val record = gameEvent val pr = new PartialRow(table.getSchema) pr.addString(0, record.gamerId) pr.addString(1, "") val scannerRows = kuduClient.newScannerBuilder(table).lowerBound(pr).limit(1).build().nextRows() val op:Operation = if (scannerRows.hasNext) { println(" >> had next") val oldRow = scannerRows.next() val oldRecordUpdateOp = table.newInsert() val row = oldRecordUpdateOp.getRow row.addString("gamer_id", oldRow.getString("gamer_id")) row.addString("eff_to", simpleDateFormat.format(record.lastTimePlayed)) row.addString("eff_from", oldRow.getString("eff_from")) row.addLong("last_time_played", oldRow.getLong("last_time_played")) row.addInt("games_played", oldRow.getInt("games_played")) row.addInt("games_won", oldRow.getInt("games_won")) row.addInt("oks", oldRow.getInt("oks")) row.addInt("deaths", oldRow.getInt("deaths")) row.addInt("damage_given", oldRow.getInt("damage_given")) row.addInt("damage_taken", oldRow.getInt("damage_taken")) row.addInt("max_oks_in_one_game", oldRow.getInt("max_oks_in_one_game")) row.addInt("max_deaths_in_one_game", oldRow.getInt("max_deaths_in_one_game")) session.apply(oldRecordUpdateOp) table.newUpdate() } else { table.newInsert() } val row = op.getRow row.addString("gamer_id", record.gamerId) row.addString("eff_to", "") row.addString("eff_from", simpleDateFormat.format(record.lastTimePlayed)) row.addLong("last_time_played", record.lastTimePlayed) row.addInt("games_played", record.gamesPlayed) row.addInt("games_won", record.gamesWon) row.addInt("oks", record.oks) row.addInt("deaths", record.deaths) row.addInt("damage_given", record.damageGiven) row.addInt("damage_taken", record.damageTaken) row.addInt("max_oks_in_one_game", record.maxOksInOneGame) row.addInt("max_deaths_in_one_game", record.maxDeathsInOneGame) session.apply(op) session.flush() leftToRun.decrementAndGet() println(" >> finished Submit") } }
Example 44
Source File: SequenceFileSink.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.hadoop import java.text.SimpleDateFormat import org.apache.hadoop.fs.Path import org.apache.hadoop.hdfs.HdfsConfiguration import org.apache.hadoop.io.SequenceFile import org.apache.gearpump.Message import org.apache.gearpump.cluster.UserConfig import org.apache.gearpump.streaming.hadoop.lib.HadoopUtil import org.apache.gearpump.streaming.hadoop.lib.format.{DefaultSequenceFormatter, OutputFormatter} import org.apache.gearpump.streaming.hadoop.lib.rotation.{FileSizeRotation, Rotation} import org.apache.gearpump.streaming.sink.DataSink import org.apache.gearpump.streaming.task.{TaskContext, TaskId} class SequenceFileSink( userConfig: UserConfig, basePath: String, rotation: Rotation = new FileSizeRotation(128 * Math.pow(2, 20).toLong), sequenceFormat: OutputFormatter = new DefaultSequenceFormatter) extends DataSink{ @transient private lazy val configuration = new HdfsConfiguration() private val dateFormat = new SimpleDateFormat("yyyy_MM_dd-HH-mm-ss") private var writer: SequenceFile.Writer = null private var taskId: TaskId = null private var appName: String = null override def close(): Unit = { closeWriter() } private def closeWriter(): Unit = { Option(writer).foreach { w => w.hflush() w.close() } } private def getNextWriter: SequenceFile.Writer = { SequenceFile.createWriter( configuration, SequenceFile.Writer.file(getNextFilePath), SequenceFile.Writer.keyClass(sequenceFormat.getKeyClass), SequenceFile.Writer.valueClass(sequenceFormat.getValueClass) ) } private def getNextFilePath: Path = { val base = new Path(basePath, s"$appName-task${taskId.processorId}_${taskId.index}") new Path(base, dateFormat.format(new java.util.Date)) } }
Example 45
Source File: ImageProcessing.scala From 006877 with MIT License | 5 votes |
package aia.routing import java.text.SimpleDateFormat import java.util.Date case class Photo(license: String, speed: Int) object ImageProcessing { val dateFormat = new SimpleDateFormat("ddMMyyyy HH:mm:ss.SSS") def getSpeed(image: String): Option[Int] = { val attributes = image.split('|') if (attributes.size == 3) Some(attributes(1).toInt) else None } def getTime(image: String): Option[Date] = { val attributes = image.split('|') if (attributes.size == 3) Some(dateFormat.parse(attributes(0))) else None } def getLicense(image: String): Option[String] = { val attributes = image.split('|') if (attributes.size == 3) Some(attributes(2)) else None } def createPhotoString(date: Date, speed: Int): String = { createPhotoString(date, speed, " ") } def createPhotoString(date: Date, speed: Int, license: String): String = { "%s|%s|%s".format(dateFormat.format(date), speed, license) } }
Example 46
Source File: ThriftJsonServlet.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.thriftserver.ui import java.text.SimpleDateFormat import org.apache.livy.server.JsonServlet import org.apache.livy.thriftserver.LivyThriftServer class ThriftJsonServlet(val basePath: String) extends JsonServlet { private val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z") case class SessionInfo( sessionId: String, livySessionId: String, owner: String, createdAt: String) get("/sessions") { val thriftSessions = LivyThriftServer.getInstance.map { server => val sessionManager = server.getSessionManager sessionManager.getSessions.map { sessionHandle => val info = sessionManager.getSessionInfo(sessionHandle) SessionInfo(sessionHandle.getSessionId.toString, sessionManager.livySessionId(sessionHandle).map(_.toString).getOrElse(""), info.username, df.format(info.creationTime)) }.toSeq }.getOrElse(Seq.empty) val from = params.get("from").map(_.toInt).getOrElse(0) val size = params.get("size").map(_.toInt).getOrElse(100) Map( "from" -> from, "total" -> thriftSessions.length, "sessions" -> thriftSessions.view(from, from + size)) } }
Example 47
Source File: GMMClusteringPersist.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package org.sparksamples.gmm import java.text.SimpleDateFormat import org.apache.spark.SparkConf import org.apache.spark.ml.clustering.{GaussianMixture} import org.apache.spark.sql.SparkSession object GMMClusteringPersist { val PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/" val BASE = "./data/movie_lens_libsvm_2f" val time = System.currentTimeMillis() val formatter = new SimpleDateFormat("dd_MM_yyyy_hh_mm_ss") import java.util.Calendar val calendar = Calendar.getInstance() calendar.setTimeInMillis(time) val date_time = formatter.format(calendar.getTime()) def main(args: Array[String]): Unit = { val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp"). set("spark.driver.allowMultipleContexts", "true") val spark = SparkSession .builder() .appName("Spark SQL Example") .config(spConfig) .getOrCreate() val datasetUsers = spark.read.format("libsvm").load( BASE + "/movie_lens_2f_users_libsvm/part-00000") datasetUsers.show(3) val gmmUsers = new GaussianMixture().setK(5).setSeed(1L) gmmUsers.setMaxIter(20) val modelUsers = gmmUsers.fit(datasetUsers) val predictedDataSetUsers = modelUsers.transform(datasetUsers) val predictionsUsers = predictedDataSetUsers.select("prediction").rdd.map(x=> x(0)) predictionsUsers.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_users") val dataSetItems = spark.read.format("libsvm").load(BASE + "/movie_lens_2f_items_libsvm/part-00000") val gmmItems = new GaussianMixture().setK(5).setSeed(1L) val modelItems = gmmItems.fit(dataSetItems) val predictedDataSetItems = modelItems.transform(dataSetItems) val predictionsItems = predictedDataSetItems.select("prediction").rdd.map(x=> x(0)) predictionsItems.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_items") spark.stop() } }
Example 48
Source File: BisectingKMeansPersist.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package org.sparksamples.kmeans import java.text.SimpleDateFormat import org.apache.spark.SparkConf import org.apache.spark.ml.clustering.BisectingKMeans import org.apache.spark.sql.SparkSession object BisectingKMeansPersist { val PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/" val BASE = "./data/movie_lens_libsvm_2f" val time = System.currentTimeMillis() val formatter = new SimpleDateFormat("dd_MM_yyyy_hh_mm_ss") import java.util.Calendar val calendar = Calendar.getInstance() calendar.setTimeInMillis(time) val date_time = formatter.format(calendar.getTime()) def main(args: Array[String]): Unit = { val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp"). set("spark.driver.allowMultipleContexts", "true") val spark = SparkSession .builder() .appName("Spark SQL Example") .config(spConfig) .getOrCreate() val datasetUsers = spark.read.format("libsvm").load( BASE + "/movie_lens_2f_users_xy/part-00000") datasetUsers.show(3) val bKMeansUsers = new BisectingKMeans() bKMeansUsers.setMaxIter(10) bKMeansUsers.setMinDivisibleClusterSize(5) val modelUsers = bKMeansUsers.fit(datasetUsers) val predictedUserClusters = modelUsers.transform(datasetUsers) modelUsers.clusterCenters.foreach(println) val predictedDataSetUsers = modelUsers.transform(datasetUsers) val predictionsUsers = predictedDataSetUsers.select("prediction").rdd.map(x=> x(0)) predictionsUsers.saveAsTextFile(BASE + "/prediction/" + date_time + "/bkmeans_2f_users") val datasetItems = spark.read.format("libsvm").load(BASE + "/movie_lens_2f_items_xy/part-00000") datasetItems.show(3) val kmeansItems = new BisectingKMeans().setK(5).setSeed(1L) val modelItems = kmeansItems.fit(datasetItems) val predictedDataSetItems = modelItems.transform(datasetItems) val predictionsItems = predictedDataSetItems.select("prediction").rdd.map(x=> x(0)) predictionsItems.saveAsTextFile(BASE + "/prediction/" + date_time + "/bkmeans_2f_items") spark.stop() } }
Example 49
Source File: ScalaApp.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
import java.text.SimpleDateFormat import java.util.Calendar import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.{ALS, Rating} //import org.apache.spark. val predictedRating = model.predict(789, 123) println(predictedRating) val userId = 789 val K = 10 val topKRecs = model.recommendProducts(userId, K) println(topKRecs.mkString("\n")) val movies = sc.textFile(PATH + "/ml-100k/u.item") val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap() titles(123) // res68: String = Frighteners, The (1996) val moviesForUser = ratings.keyBy(_.user).lookup(789) // moviesForUser: Seq[org.apache.spark.mllib.recommendation.Rating] = WrappedArray(Rating(789,1012,4.0), Rating(789,127,5.0), Rating(789,475,5.0), Rating(789,93,4.0), ... // ... println(moviesForUser.size) moviesForUser.sortBy(-_.rating).take(10).map(rating => (titles(rating.product), rating.rating)).foreach(println) topKRecs.map(rating => (titles(rating.product), rating.rating)).foreach(println) sc.stop() //bw.close() } class Util { def getDate(): String = { val today = Calendar.getInstance().getTime() // (2) create a date "formatter" (the date format we want) val formatter = new SimpleDateFormat("yyyy-MM-dd-hh.mm.ss") // (3) create a new String using the date format we want val folderName = formatter.format(today) return folderName } } }
Example 50
Source File: Util.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package com.sparksample object Util { val PATH = "../.." val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp") var sc = new SparkContext(spConfig) def getMovieData() : RDD[String] = { val movie_data = sc.textFile(PATH + "/data/ml-100k/u.item") return movie_data } def getUserData() : RDD[String] = { val user_data = sc.textFile(PATH + "/data/ml-100k/u.data") return user_data } def getDate(): String = { val today = Calendar.getInstance().getTime() // (2) create a date "formatter" (the date format we want) val formatter = new SimpleDateFormat("yyyy-MM-dd-hh.mm.ss") // (3) create a new String using the date format we want val folderName = formatter.format(today) return folderName } def cosineSimilarity(vec1: DoubleMatrix, vec2: DoubleMatrix): Double = { vec1.dot(vec2) / (vec1.norm2() * vec2.norm2()) } def avgPrecisionK(actual: Seq[Int], predicted: Seq[Int], k: Int): Double = { val predK = predicted.take(k) var score = 0.0 var numHits = 0.0 for ((p, i) <- predK.zipWithIndex) { if (actual.contains(p)) { numHits += 1.0 score += numHits / (i.toDouble + 1.0) } } if (actual.isEmpty) { 1.0 } else { score / scala.math.min(actual.size, k).toDouble } } }
Example 51
Source File: PMMLModelExport.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import java.text.SimpleDateFormat import java.util.{Date, Locale} import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { @BeanProperty val pmml: PMML = { val version = getClass.getPackage.getImplementationVersion val app = new Application("Apache Spark MLlib").setVersion(version) val timestamp = new Timestamp() .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date())) val header = new Header() .setApplication(app) .setTimestamp(timestamp) new PMML("4.2", header, null) } }
Example 52
Source File: JacksonMessageWriter.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.io.OutputStream import java.lang.annotation.Annotation import java.lang.reflect.Type import java.nio.charset.StandardCharsets import java.text.SimpleDateFormat import java.util.{Calendar, Locale, SimpleTimeZone} import javax.ws.rs.Produces import javax.ws.rs.core.{MediaType, MultivaluedMap} import javax.ws.rs.ext.{MessageBodyWriter, Provider} import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} @Provider @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ val mapper = new ObjectMapper() { override def writeValueAsString(t: Any): String = { super.writeValueAsString(t) } } mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) mapper.enable(SerializationFeature.INDENT_OUTPUT) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) override def isWriteable( aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Boolean = { true } override def writeTo( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType, multivaluedMap: MultivaluedMap[String, AnyRef], outputStream: OutputStream): Unit = { t match { case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8)) case _ => mapper.writeValue(outputStream, t) } } override def getSize( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Long = { -1L } } private[spark] object JacksonMessageWriter { def makeISODateFormat: SimpleDateFormat = { val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US) val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) iso8601.setCalendar(cal) iso8601 } }
Example 53
Source File: SimpleDateParam.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.text.{ParseException, SimpleDateFormat} import java.util.{Locale, TimeZone} import javax.ws.rs.WebApplicationException import javax.ws.rs.core.Response import javax.ws.rs.core.Response.Status private[v1] class SimpleDateParam(val originalValue: String) { val timestamp: Long = { val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US) try { format.parse(originalValue).getTime() } catch { case _: ParseException => val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US) gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) try { gmtDay.parse(originalValue).getTime() } catch { case _: ParseException => throw new WebApplicationException( Response .status(Status.BAD_REQUEST) .entity("Couldn't parse date: " + originalValue) .build() ) } } } }
Example 54
Source File: PlainText.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.utils.meta import java.text.SimpleDateFormat import java.util.Calendar import java.util.TimeZone import org.clulab.wm.eidos.utils.EidosException import org.clulab.timenorm.scate.SimpleInterval import org.clulab.wm.eidos.context.DCT import org.clulab.wm.eidos.document.Metadata class PlainText(text: String, titleOpt: Option[String] = None, idOpt: Option[String] = None, dateOpt: Option[String] = None, locationOpt: Option[String] = None ) extends EidosText { protected val metadata = { val dctOpt: Option[DCT] = { dateOpt.map { date => val calendar = try { val parsed = PlainText.dateFormat.parse(date) val calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")) calendar.setTime(parsed) calendar } catch { case throwable: Throwable => throw new EidosException(s"""Could not decipher "${date}" as a date""", throwable) } val simpleInterval = SimpleInterval.of(calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH) + 1, calendar.get(Calendar.DAY_OF_MONTH)) DCT(simpleInterval, date) } } new Metadata(dctOpt, idOpt, titleOpt, locationOpt) } def getText: String = text def getMetadata: Metadata = metadata } object PlainText { protected val dateFormat: SimpleDateFormat = { val dateFormat = new SimpleDateFormat("yyyy-MM-dd") val timeZone = TimeZone.getTimeZone("UTC") dateFormat.setTimeZone(timeZone) dateFormat } }
Example 55
Source File: CustomScalarSpec.scala From sangria with Apache License 2.0 | 5 votes |
package sangria.schema import java.text.SimpleDateFormat import java.util.Date import sangria.ast import sangria.util.Pos import sangria.util.SimpleGraphQlSupport._ import sangria.validation.ValueCoercionViolation import scala.util.{Failure, Success, Try} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class CustomScalarSpec extends AnyWordSpec with Matchers { "Schema" should { "allow to define custom scalar types" in { val dateFormat = new SimpleDateFormat("yyyy-MM-dd") case object DateCoercionViolation extends ValueCoercionViolation("Date value expected") def parseDate(s: String) = Try(dateFormat.parse(s)) match { case Success(d) => Right(d) case Failure(error) => Left(DateCoercionViolation) } val DateType = ScalarType[Date]("Date", description = Some("An example of date scalar type"), coerceOutput = (d, _) => dateFormat.format(d), coerceUserInput = { case s: String => parseDate(s) case _ => Left(DateCoercionViolation) }, coerceInput = { case ast.StringValue(s, _, _, _, _) => parseDate(s) case _ => Left(DateCoercionViolation) }) val DateArg = Argument("dateInput", DateType) val QueryType = ObjectType("Query", fields[Unit, Unit]( Field("foo", DateType, arguments = DateArg :: Nil, resolve = ctx => { val date: Date = ctx.arg(DateArg) new Date(date.getTime + 1000 * 60 * 60 * 24 * 5) }) )) val schema = Schema(QueryType) check(schema, (), """ { foo(dateInput: "2015-05-11") } """, Map("data" -> Map("foo" -> "2015-05-16")) ) checkContainsErrors(schema, (), """ { foo(dateInput: "2015-05-test") } """, null, List("""Expected type 'Date!', found '"2015-05-test"'. Date value expected""" -> List(Pos(3, 28))) ) } } }
Example 56
Source File: CarbonLoadParams.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.management import java.text.SimpleDateFormat import java.util import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.command.UpdateTableModel import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.carbondata.core.indexstore.PartitionSpec import org.apache.carbondata.core.statusmanager.SegmentStatus import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.events.OperationContext import org.apache.carbondata.processing.loading.model.CarbonLoadModel case class CarbonLoadParams( sparkSession: SparkSession, tableName: String, sizeInBytes: Long, isOverwriteTable: Boolean, carbonLoadModel: CarbonLoadModel, hadoopConf: Configuration, logicalPartitionRelation: LogicalRelation, dateFormat : SimpleDateFormat, timeStampFormat : SimpleDateFormat, optionsOriginal: Map[String, String], finalPartition : Map[String, Option[String]], currPartitions: util.List[PartitionSpec], partitionStatus : SegmentStatus, var dataFrame: Option[DataFrame], scanResultRDD : Option[RDD[InternalRow]], updateModel: Option[UpdateTableModel], operationContext: OperationContext) { }
Example 57
Source File: TestUpdateAndDeleteWithLargeData.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.iud import java.text.SimpleDateFormat import org.apache.spark.sql.test.util.QueryTest import org.apache.spark.sql.{DataFrame, Row, SaveMode} import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties class TestUpdateAndDeleteWithLargeData extends QueryTest with BeforeAndAfterAll { var df: DataFrame = _ override def beforeAll { dropTable() buildTestData() } private def buildTestData(): Unit = { CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd") // Simulate data and write to table orders import sqlContext.implicits._ val sdf = new SimpleDateFormat("yyyy-MM-dd") df = sqlContext.sparkSession.sparkContext.parallelize(1 to 1500000) .map(value => (value, new java.sql.Date(sdf.parse("2015-07-" + (value % 10 + 10)).getTime), "china", "aaa" + value, "phone" + 555 * value, "ASD" + (60000 + value), 14999 + value, "ordersTable" + value)) .toDF("o_id", "o_date", "o_country", "o_name", "o_phonetype", "o_serialname", "o_salary", "o_comment") createTable() } private def createTable(): Unit = { df.write .format("carbondata") .option("tableName", "orders") .option("tempCSV", "true") .option("compress", "true") .mode(SaveMode.Overwrite) .save() } private def dropTable() = { sql("DROP TABLE IF EXISTS orders") } test("test the update and delete delete functionality for large data") { sql( """ update ORDERS set (o_comment) = ('yyy')""").show() checkAnswer(sql( """select o_comment from orders limit 2 """), Seq(Row("yyy"), Row("yyy"))) sql("delete from orders where exists (select 1 from orders)") checkAnswer(sql( """ SELECT count(*) FROM orders """), Row(0)) } }
Example 58
Source File: RowStreamParserImp.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.streaming.parser import java.text.SimpleDateFormat import java.util import org.apache.hadoop.conf.Configuration import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.Row import org.apache.spark.sql.types.StructType import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.processing.loading.ComplexDelimitersEnum import org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants class RowStreamParserImp extends CarbonStreamParser { var configuration: Configuration = null var isVarcharTypeMapping: Array[Boolean] = null var structType: StructType = null var encoder: ExpressionEncoder[Row] = null var timeStampFormat: SimpleDateFormat = null var dateFormat: SimpleDateFormat = null var complexDelimiters: util.ArrayList[String] = new util.ArrayList[String]() var serializationNullFormat: String = null override def initialize(configuration: Configuration, structType: StructType, isVarcharTypeMapping: Array[Boolean]): Unit = { this.configuration = configuration this.structType = structType this.encoder = RowEncoder.apply(this.structType).resolveAndBind() this.isVarcharTypeMapping = isVarcharTypeMapping this.timeStampFormat = new SimpleDateFormat( this.configuration.get(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT)) this.dateFormat = new SimpleDateFormat( this.configuration.get(CarbonCommonConstants.CARBON_DATE_FORMAT)) this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_1")) this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_2")) this.complexDelimiters.add(this.configuration.get("carbon_complex_delimiter_level_3")) this.complexDelimiters.add(ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value()) this.serializationNullFormat = this.configuration.get(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT) } override def parserRow(value: InternalRow): Array[Object] = { this.encoder.fromRow(value).toSeq.zipWithIndex.map { case (x, i) => FieldConverter.objectToString( x, serializationNullFormat, complexDelimiters, timeStampFormat, dateFormat, isVarcharType = i < this.isVarcharTypeMapping.length && this.isVarcharTypeMapping(i), binaryCodec = null) } }.toArray override def close(): Unit = { } }
Example 59
Source File: Commons.scala From spark-structured-streaming with MIT License | 5 votes |
package com.kafkaToSparkToCass import java.sql.Timestamp import java.text.{DateFormat, SimpleDateFormat} object Commons { case class UserEvent(user_id: String, time: Timestamp, event: String) extends Serializable def getTimeStamp(timeStr: String): Timestamp = { val dateFormat1: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") val dateFormat2: DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss") val date: Option[Timestamp] = { try { Some(new Timestamp(dateFormat1.parse(timeStr).getTime)) } catch { case e: java.text.ParseException => Some(new Timestamp(dateFormat2.parse(timeStr).getTime)) } } date.getOrElse(Timestamp.valueOf(timeStr)) } }
Example 60
Source File: DateUtils.scala From common4s with Apache License 2.0 | 5 votes |
package commons.mapper.utils import java.text.{ ParseException, ParsePosition, SimpleDateFormat } import java.util.Date def parseDateWithLeniency(str : String, parsePatterns : Array[String], lenient : Boolean) : Date = { if (str == null || parsePatterns == null) { throw new IllegalArgumentException("Date and Patterns must not be null"); } val parser = new SimpleDateFormat(); parser.setLenient(lenient); val pos = new ParsePosition(0); for (parsePattern <- parsePatterns) { var pattern = parsePattern; // LANG-530 - need to make sure 'ZZ' output doesn't get passed to SimpleDateFormat if (parsePattern.endsWith("ZZ")) { pattern = pattern.substring(0, pattern.length() - 1); } parser.applyPattern(pattern); pos.setIndex(0); var str2 = str; // LANG-530 - need to make sure 'ZZ' output doesn't hit SimpleDateFormat as it will ParseException if (parsePattern.endsWith("ZZ")) { str2 = str.replaceAll("([-+][0-9][0-9]):([0-9][0-9])$", "$1$2"); } val date = parser.parse(str2, pos); if (date != null && pos.getIndex() == str2.length()) { return date; } } throw new ParseException("Unable to parse the date: " + str, -1); } }
Example 61
Source File: JsonUtil.scala From ionroller with MIT License | 5 votes |
package ionroller import java.text.SimpleDateFormat import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper import play.api.libs.json.Json object JsonUtil { object Implicits { implicit class Unmarshallable(unMarshallMe: String) { def toMap: Map[String, Any] = JsonUtil.toMap(unMarshallMe) def toMapOf[V]()(implicit m: Manifest[V]): Map[String, V] = JsonUtil.toMap[V](unMarshallMe) def fromJson[T]()(implicit m: Manifest[T]): T = JsonUtil.fromJson[T](unMarshallMe) } implicit class Marshallable[T](marshallMe: T) { def toJson: String = JsonUtil.toJson(marshallMe) def toJsonValue = Json.parse(JsonUtil.toJson(marshallMe)) } } val mapper = new ObjectMapper() with ScalaObjectMapper mapper.registerModule(DefaultScalaModule) mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS")) def toJson(value: Map[Symbol, Any]): String = { toJson(value map { case (k, v) => k.name -> v }) } def toJson(value: Any): String = { mapper.writeValueAsString(value) } def toMap[V](json: String)(implicit m: Manifest[V]) = fromJson[Map[String, V]](json) def fromJson[T](json: String)(implicit m: Manifest[T]): T = { mapper.readValue[T](json) } }
Example 62
Source File: DateUtil.scala From real-time-stream-processing-engine with Apache License 2.0 | 5 votes |
package com.knoldus.streaming.util import java.text.SimpleDateFormat import java.util.Date object DateUtil { private val dateFormats = List( "yyyyMMdd'T'HHmmss.SSSZ", "EEE, dd MMM yyyy HH:mm:ss Z", "yyyy-MM-dd HH:mm:ss", "EEE MMM dd HH:mm:ss Z yyyy", "MMM dd, yyyy, HH:mm a", "MMM dd, yyyy HH:mm a", "yyyy-MM-dd'T'HH:mm:ss", "dd MMM yyyy HH:mm:ss:S Z", "E MMM dd HH:mm:ss z yyyy", "dd MMM yyyy HH:mm:ss:SSS", "dd MMM yyyy H:mm:ss:SSS", "MM-dd-yyyy HH:mm:ss:SSS", "MM/dd/yyyy HH:mm:ss:SSS", "dd/MM/yyyy HH:mm:ss:SSS", "dd-MM-yyyy HH:mm:ss:SSS", "MMM/dd/yyyy HH:mm:ss:SSS", "MMM-dd-yyyy HH:mm:ss:SSS", "dd-MMM-yyyy HH:mm:ss:SSS", "MM-dd-yyyy H:mm:ss:SSS", "MM/dd/yyyy H:mm:ss:SSS", "dd/MM/yyyy H:mm:ss:SSS", "dd-MM-yyyy H:mm:ss:SSS", "MMM/dd/yyyy H:mm:ss:SSS", "MMM-dd-yyyy H:mm:ss:SSS", "dd-MMM-yyyy H:mm:ss:SSS", "MM-dd-yyyy HH:mm:ss", "MM/dd/yyyy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "dd-MM-yyyy HH:mm:ss", "MMM/dd/yyyy HH:mm:ss", "MMM-dd-yyyy HH:mm:ss", "dd-MMM-yyyy HH:mm:ss", "MM-dd-yyyy H:mm:ss", "MM/dd/yyyy H:mm:ss", "dd/MM/yyyy H:mm:ss", "dd-MM-yyyy H:mm:ss", "MMM/dd/yyyy H:mm:ss", "MMM-dd-yyyy H:mm:ss", "dd-MMM-yyyy H:mm:ss", "yyyy-MM-dd", "MM-dd-yyyy", "MM/dd/yyyy", "dd/MM/yyyy", "dd-MM-yyyy", "MMM/dd/yyyy", "MMM-dd-yyyy", "dd-MMM-yyyy") private val esDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ") def getESDateFormat(dateString: String): String = { def getDate(dateFormats: Seq[String], dateString: String): String = try { val dateFormat = new SimpleDateFormat(dateFormats.head) val date = dateFormat.parse(dateString) esDateFormat.format(date) } catch { case _ if (dateFormats.size > 1) => getDate(dateFormats.tail, dateString) case _: Exception => esDateFormat.format(new Date()) } getDate(dateFormats, dateString) } }
Example 63
Source File: httpserverplugin_staticfile.scala From scalabpe with Apache License 2.0 | 5 votes |
package scalabpe.plugin.http import java.io.File import java.net.URLEncoder import java.text.SimpleDateFormat import java.util.Calendar import java.util.GregorianCalendar import java.util.Locale import java.util.TimeZone import scala.collection.mutable.HashMap import org.jboss.netty.handler.codec.http.HttpHeaders import scalabpe.core.HashMapStringAny class StaticFilePlugin extends HttpServerPlugin with HttpServerStaticFilePlugin { val ETAG_TAG = "etag" val EXPIRE_TAG = "expire" val ATTACHMENT = "attachment" val FILENAME = "filename" val HTTP_DATE_FORMAT = "EEE, dd MMM yyyy HH:mm:ss zzz"; val HTTP_DATE_GMT_TIMEZONE = "GMT"; val df_tl = new ThreadLocal[SimpleDateFormat]() { override def initialValue(): SimpleDateFormat = { val df = new SimpleDateFormat(HTTP_DATE_FORMAT, Locale.US) df.setTimeZone(TimeZone.getTimeZone(HTTP_DATE_GMT_TIMEZONE)); df } } def generateStaticFile(serviceId: Int, msgId: Int, errorCode: Int, errorMessage: String, body: HashMapStringAny, pluginParam: String, headers: HashMap[String, String]): String = { if (body.ns(FILENAME) == "") { return null } val filename = body.ns(FILENAME) if (!new File(filename).exists()) { return null } if (body.ns(ETAG_TAG) != "") { headers.put("ETag", body.ns(ETAG_TAG)) } if (body.ns(EXPIRE_TAG) != "") { body.i(EXPIRE_TAG) match { case 0 | -1 => headers.put(HttpHeaders.Names.CACHE_CONTROL, "no-cache") case n => // seconds val time = new GregorianCalendar(); time.add(Calendar.SECOND, n); headers.put(HttpHeaders.Names.EXPIRES, df_tl.get.format(time.getTime())); headers.put(HttpHeaders.Names.CACHE_CONTROL, "max-age=" + n); } } val ext = parseExt(filename) if (ext != "") body.put("__file_ext__", ext) if (body.ns(ATTACHMENT, "1") == "1") { val filename = body.ns(FILENAME) val v = "attachment; filename=\"%s\"".format(URLEncoder.encode(parseFilename(filename), "UTF-8")) headers.put("Content-Disposition", v) } filename } def parseFilename(name: String): String = { val p = name.lastIndexOf("/") if (p < 0) return name name.substring(p + 1) } def parseExt(name: String): String = { val p = name.lastIndexOf(".") if (p < 0) return "" name.substring(p + 1).toLowerCase() } }
Example 64
Source File: IlluminaBasecallsToSam.scala From dagr with MIT License | 5 votes |
package dagr.tasks.picard import java.text.SimpleDateFormat import dagr.core.execsystem._ import dagr.core.tasksystem.{JvmRanOutOfMemory, VariableResources} import dagr.tasks.DagrDef.{DirPath, FilePath} import htsjdk.samtools.util.Iso8601Date import picard.util.IlluminaUtil.IlluminaAdapterPair import scala.collection.mutable.ListBuffer class IlluminaBasecallsToSam(basecallsDir: DirPath, lane: Int, runBarcode: String, readStructure: String, libraryParamsFile: FilePath, runDate: Option[Iso8601Date] = None, sequencingCenter: Option[String] = None, includeNonPfReads: Boolean = false, ignoreUnexpectedBarcodes: Boolean = false, minThreads: Int = 4, maxThreads: Int = 16, adapterPairs: Seq[IlluminaAdapterPair] = Seq( IlluminaAdapterPair.INDEXED, IlluminaAdapterPair.DUAL_INDEXED, IlluminaAdapterPair.NEXTERA_V2, IlluminaAdapterPair.FLUIDIGM ), barcodesDir: Option[DirPath] = None, maxReadsInRamPerTile: Option[Int] = Some(500000), firstTile: Option[Int] = None, tileLimit: Option[Int] = None, tmpDir: Option[DirPath] = None ) extends PicardTask with VariableResources with JvmRanOutOfMemory { protected val byMemoryPerThread: Memory = Memory("1GB") protected var memoryPerThread: Memory = Memory("2GB") override def pickResources(resources: ResourceSet): Option[ResourceSet] = { Range.inclusive(start=maxThreads, end=minThreads, step= -1) .flatMap { cores => resources.subset(Cores(cores), Memory(cores * memoryPerThread.value)) }.headOption } override protected def addPicardArgs(buffer: ListBuffer[Any]): Unit = { buffer += "BASECALLS_DIR=" + basecallsDir buffer += "LANE=" + lane buffer += "RUN_BARCODE=" + runBarcode barcodesDir.foreach(dir => buffer += "BARCODES_DIR=" + dir) runDate.foreach(date => buffer += "RUN_START_DATE=" + new SimpleDateFormat("yyyy/MM/dd").format(date)) buffer += "SEQUENCING_CENTER=" + sequencingCenter.getOrElse("null") buffer += "NUM_PROCESSORS=" + resources.cores.toInt buffer += "READ_STRUCTURE=" + readStructure.toString buffer += "LIBRARY_PARAMS=" + libraryParamsFile buffer += "INCLUDE_NON_PF_READS=" + includeNonPfReads if (ignoreUnexpectedBarcodes) buffer += "IGNORE_UNEXPECTED_BARCODES=true" if (adapterPairs.isEmpty) buffer += "ADAPTERS_TO_CHECK=null" else adapterPairs.foreach(buffer += "ADAPTERS_TO_CHECK=" + _) maxReadsInRamPerTile.foreach(n => buffer += "MAX_READS_IN_RAM_PER_TILE=" + n) firstTile.foreach(buffer += "FIRST_TILE=" + _) // If set, this is the first tile to be processed (used for debugging). tileLimit.foreach(buffer += "TILE_LIMIT=" + _) // If set, process no more than this many tiles (used for debugging). tmpDir.foreach(tmp => buffer += "TMP_DIR=" + tmp) } }
Example 65
Source File: StringToTimestampParser.scala From bandar-log with Apache License 2.0 | 5 votes |
package com.aol.one.dwh.infra.parser import java.text.{DateFormat, SimpleDateFormat} import java.util.TimeZone import com.aol.one.dwh.infra.util.{ExceptionPrinter, LogTrait} import scala.util.control.NonFatal import scala.util.{Failure, Try} object StringToTimestampParser extends LogTrait with ExceptionPrinter { def parse(value: String, format: String): Option[Long] = { Try { val dateFormat: DateFormat = new SimpleDateFormat(format) dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")) dateFormat.parse(value).getTime }.recoverWith { case NonFatal(e) => logger.error(s"Could not parse value:[$value] using format:[$format]. Catching exception {}", e.getStringStackTrace) Failure(e) }.toOption } }
Example 66
Source File: Bench.scala From akka-nbench with Apache License 2.0 | 5 votes |
package bench import akka.actor._ import akka.pattern.ask import akka.util.Timeout import scala.concurrent.duration._ import scala.reflect.runtime.universe._ import scala.concurrent.Await import com.typesafe.config._ import net.ceedubs.ficus.Ficus._ import java.util.Properties import java.nio.file._ import java.util.Date import java.text.SimpleDateFormat import java.util.Date import Tapper._ object Bench extends App { def prepareOutputDirs(): String = { val csvDateTimeDir = FileSystems.getDefault().getPath( "tests/" + new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date())) Files.createDirectories(csvDateTimeDir) val csvSymlink = FileSystems.getDefault().getPath("tests/current") if(Files.isSymbolicLink(csvSymlink)){ Files.delete(csvSymlink) } else if (Files.exists(csvSymlink)) { throw new NotASymbolicLinkException(s"test/current is not a symbolic link. Path: $csvSymlink") } Files.createSymbolicLink(csvSymlink, csvDateTimeDir.toAbsolutePath) csvDateTimeDir.toAbsolutePath.toString } def parseOptions(): String = { val usage = """ Usage: activator -mem 4096 "run-main bench.Bench scenario_name" """ if (args.length != 1) println(usage) return args(0) } val scenario = parseOptions val config = ConfigFactory.load().getConfig(scenario) val duration = config.getInt("duration") val concurrent = config.getInt("concurrent") val csvDateTimeDir = prepareOutputDirs val system = ActorSystem("bench") val actorProps = Props(classOf[StatsCollector], csvDateTimeDir, config) val statsCollector = system.actorOf(actorProps, name = "statscollector") val operationsWithRatio: Map[String, Int] = config.as[Map[String, Int]]("operations") val numer = operationsWithRatio.values.sum if (concurrent < numer){ val msg = s"concurrent($concurrent) must greater than sum of operations ratio($numer)" System.err.println(msg) throw new ApplicationConfigException(msg) } val operations = for((key, value) <- operationsWithRatio) yield { List.range(0, concurrent * operationsWithRatio(key) / numer).map(_ => key) } implicit val timeout = Timeout(duration * 2, SECONDS) var driverClz = Class.forName(config.getString("driver")) val drivers = operations.flatten.zipWithIndex.map{ case (operation, i) => system.actorOf(Props(driverClz, operation, statsCollector, config).withDispatcher("my-dispatcher"), name = s"driver_$i") } drivers.par.map(actor => actor ? Ready()).foreach{ f => Await.result(f, timeout.duration).asInstanceOf[OK] } val startAt = new Date() val doUntil = new Date(startAt.getTime + duration * 1000) drivers.par.map(actor => actor ? Go(doUntil)).foreach { f => Await.result(f, timeout.duration).asInstanceOf[OK] } (statsCollector ? TearDown()).tap { f => Await.result(f, timeout.duration).asInstanceOf[OK] } drivers.par.map(actor => actor ? TearDown()).foreach { f => Await.result(f, timeout.duration).asInstanceOf[OK] } (drivers.head ? TearDown()).tap { f => Await.result(f, timeout.duration).asInstanceOf[OK] } system.awaitTermination() }
Example 67
Source File: Utils.scala From graphcool-framework with Apache License 2.0 | 5 votes |
package cool.graph.rabbit import java.text.SimpleDateFormat import java.util.{Date, UUID} import java.util.concurrent.ThreadFactory import java.util.concurrent.atomic.AtomicLong object Utils { def timestamp: String = { val formatter = new SimpleDateFormat("HH:mm:ss.SSS-dd.MM.yyyy") val now = new Date() formatter.format(now) } def timestampWithRandom: String = timestamp + "-" + UUID.randomUUID() def newNamedThreadFactory(name: String): ThreadFactory = new ThreadFactory { val count = new AtomicLong(0) override def newThread(runnable: Runnable): Thread = { val thread = new Thread(runnable) thread.setName(s"$name-" + count.getAndIncrement) thread.setDaemon(true) thread } } }
Example 68
Source File: LDBCRouter.scala From Raphtory with Apache License 2.0 | 5 votes |
package com.raphtory.examples.ldbc.routers import java.text.SimpleDateFormat import java.util.Date import com.raphtory.core.components.Router.RouterWorker import com.raphtory.core.model.communication.EdgeAdd import com.raphtory.core.model.communication.EdgeDelete import com.raphtory.core.model.communication.Type import com.raphtory.core.model.communication.VertexAdd import com.raphtory.core.model.communication.VertexDelete import com.raphtory.examples.random.actors.RandomSpout class LDBCRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker { override protected def parseTuple(value: Any): Unit = { val fileLine = value.asInstanceOf[String].split("\\|") val date = fileLine(1).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event val date2 = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime() val deletionDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date2).getTime() val vertexDeletion = sys.env.getOrElse("LDBC_VERTEX_DELETION", "false").trim.toBoolean val edgeDeletion = sys.env.getOrElse("LDBC_EDGE_DELETION", "false").trim.toBoolean fileLine(0) match { case "person" => sendGraphUpdate(VertexAdd(creationDate, assignID("person" + fileLine(3)), Type("person"))) //sendGraphUpdate(VertexAdd(creationDate, fileLine(3).toLong,Type("person"))) if(vertexDeletion) sendGraphUpdate(VertexDelete(deletionDate, assignID("person" + fileLine(3)))) case "person_knows_person" => //sendGraphUpdate(EdgeAdd(creationDate, fileLine(3).toLong,fileLine(4).toLong,Type("person_knows_person"))) sendGraphUpdate( EdgeAdd( creationDate, assignID("person" + fileLine(3)), assignID("person" + fileLine(4)), Type("person_knows_person") ) ) if(edgeDeletion) sendGraphUpdate(EdgeDelete(deletionDate, assignID("person"+fileLine(3)),assignID("person"+fileLine(4)))) } } } //2012-11-01T09:28:01.185+00:00|2019-07-22T11:24:24.362+00:00|35184372093644|Jose|Garcia|female|1988-05-20|111.68.47.44|Firefox
Example 69
Source File: LDBCOldRouter.scala From Raphtory with Apache License 2.0 | 5 votes |
package com.raphtory.examples.ldbc.routers import java.text.SimpleDateFormat import java.util.Date import com.raphtory.core.components.Router.RouterWorker import com.raphtory.core.model.communication.EdgeAdd import com.raphtory.core.model.communication.EdgeDelete import com.raphtory.core.model.communication.Type import com.raphtory.core.model.communication.VertexAdd import com.raphtory.core.model.communication.VertexDelete class LDBCOldRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker { override protected def parseTuple(value: Any): Unit = { val fileLine = value.asInstanceOf[String].split("\\|") //val deletionDate:Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date2).getTime() fileLine(0) match { case "person" => val date = fileLine(6).substring(0, 10) + fileLine(5).substring(11, 23); //extract the day of the event //val date2 = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime() sendGraphUpdate(VertexAdd(creationDate, assignID("person" + fileLine(1)), Type("person"))) //sendGraphUpdate(VertexAdd(creationDate, fileLine(3).toLong,Type("person"))) // sendGraphUpdate(VertexDelete(deletionDate, assignID("person"+fileLine(3)))) case "person_knows_person" => val date = fileLine(3).substring(0, 10) + fileLine(3).substring(11, 23); //extract the day of the event //val date2 = fileLine(2).substring(0, 10) + fileLine(1).substring(11, 23); //extract the day of the event val creationDate: Long = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss.SSS").parse(date).getTime() //sendGraphUpdate(EdgeAdd(creationDate, fileLine(3).toLong,fileLine(4).toLong,Type("person_knows_person"))) sendGraphUpdate( EdgeAdd( creationDate, assignID("person" + fileLine(1)), assignID("person" + fileLine(2)), Type("person_knows_person") ) ) //sendGraphUpdate(EdgeDelete(deletionDate, assignID("person"+fileLine(3)),assignID("person"+fileLine(4)))) } } } //2012-11-01T09:28:01.185+00:00|2019-07-22T11:24:24.362+00:00|35184372093644|Jose|Garcia|female|1988-05-20|111.68.47.44|Firefox
Example 70
Source File: rumourInteractRouter.scala From Raphtory with Apache License 2.0 | 5 votes |
package com.raphtory.examples.twitterRumour import java.text.SimpleDateFormat import com.raphtory.core.components.Router.RouterWorker import com.raphtory.core.model.communication._ import spray.json._ import scala.io.Source class rumourInteractRouter(override val routerId: Int,override val workerID:Int, val initialManagerCount: Int) extends RouterWorker { override protected def parseTuple(cmd: Any): Unit = { //println("im at router top...") val List(r_status, tweet) = cmd.asInstanceOf[String].split("__").toList val json = Source.fromFile(tweet) for (line <- json.getLines) { // println("reading json"+cmd) var user = line.parseJson.asJsObject.fields("user").asJsObject val post = line.parseJson.asJsObject val replyTime = post.fields("created_at").toString.toString.split("\"")(1) val source = user.fields("id").toString // if (source.toLong <0){println("this is converting worng.."+source) // sys.exit()} val dist = post.fields("in_reply_to_user_id").toString if (dist != "null") sendGraphUpdate( EdgeAddWithProperties( getTwitterDate(replyTime), source.toLong, dist.toLong, properties = Properties(ImmutableProperty("rumourStatus", r_status)) ) ) else sendGraphUpdate( VertexAddWithProperties( getTwitterDate(replyTime), source.toLong, properties = Properties(ImmutableProperty("rumourStatus", r_status)) ) ) } } def getTwitterDate(date: String): Long = { // println(">>> converting time...") val twitter = "EEE MMM dd HH:mm:ss ZZZZZ yyyy" val sf = new SimpleDateFormat(twitter) //println(date) try //println("converted time ///"+t) return sf.parse(date).getTime() catch { case e: Throwable => println("-----time not properly converting" + date) sys.exit() return 0 } } }
Example 71
Source File: ChainalysisABRouter.scala From Raphtory with Apache License 2.0 | 5 votes |
package com.raphtory.examples.blockchain.routers import java.text.SimpleDateFormat import com.raphtory.core.components.Router.RouterWorker import com.raphtory.core.model.communication.Type import com.raphtory.core.model.communication._ class ChainalysisABRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker { def parseTuple(record: Any): Unit = { val dp = formatLine(record.asInstanceOf[String].split(",").map(_.trim)) val transactionTime = dp.time val srcClusterId = assignID(dp.srcCluster.toString) val dstClusterId = assignID(dp.dstCluster.toString) val transactionId = assignID(dp.txid.toString) val btcAmount = dp.amount val usdAmount = dp.usd sendGraphUpdate(VertexAdd(transactionTime, srcClusterId, Type("Cluster"))) sendGraphUpdate(VertexAdd(transactionTime, dstClusterId, Type("Cluster"))) sendGraphUpdate(VertexAdd(transactionTime, transactionId, Type("Transaction"))) sendGraphUpdate( EdgeAddWithProperties(transactionTime, srcClusterId, transactionId, Properties(DoubleProperty("BitCoin", btcAmount), DoubleProperty("USD",usdAmount)), Type("Incoming Payment") ) ) sendGraphUpdate( EdgeAddWithProperties(transactionTime, transactionId, dstClusterId, Properties(DoubleProperty("BitCoin", btcAmount), DoubleProperty("USD",usdAmount)), Type("Outgoing Payment") ) ) } //converts the line into a case class which has all of the data via the correct name and type def formatLine(line: Array[String]): Datapoint = Datapoint( line(1).toDouble / 100000000, //Amount of transaction in BTC line(2).toLong, //ID of destination cluster line(3).toLong, //ID of source cluster line(4).toLong * 1000, //Time of transaction in seconds (milli in Raph) line(5).toLong, //ID of transaction, can be similar for many records line(6).toDouble / 100000 //Amount of transaction in USD ) def longCheck(data: String): Option[Long] = if (data equals "") None else Some(data.toLong) case class Datapoint( amount: Double, //Amount of transaction in Satoshi dstCluster: Long, //ID of destination cluster srcCluster: Long, //ID of source cluster time: Long, //Time of transaction in seconds txid: Long, //ID of transaction, can be similar for many records usd: Double //Amount of transaction in USD ) }
Example 72
Source File: OutDegree.scala From Raphtory with Apache License 2.0 | 5 votes |
package com.raphtory.examples.random.depricated import java.text.SimpleDateFormat import java.util.Date import com.raphtory.core.analysis.API.Analyser import com.raphtory.core.utils.Utils import scala.collection.mutable.ArrayBuffer class OutDegree(args:Array[String]) extends Analyser(args){ override def analyse(): Unit = { var results = ArrayBuffer[Int]() proxy.getVerticesSet().foreach { v => val vertex = proxy.getVertex(v._2) val totalEdges = vertex.getOutgoingNeighbors.size // println("Total edges for V "+v+" "+vertex.getOutgoingNeighbors + " "+vertex.getIngoingNeighbors ) results += totalEdges } // println("THIS IS HOW RESULTS LOOK: "+ results.groupBy(identity).mapValues(_.size)) results.groupBy(identity).mapValues(_.size).toList } override def setup(): Unit = {} override def defineMaxSteps(): Int = 1 override def processResults(results: ArrayBuffer[Any], timeStamp: Long, viewCompleteTime: Long): Unit = {} override def processViewResults(results: ArrayBuffer[Any], timestamp: Long, viewCompleteTime: Long): Unit = { val output_file = System.getenv().getOrDefault("GAB_PROJECT_OUTPUT", "/app/defout.csv").trim val inputFormat = new SimpleDateFormat("E MMM dd HH:mm:ss z yyyy") val outputFormat = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss") var finalResults = ArrayBuffer[(Int, Int)]() for (kv <- results) // println("KV RESULTS: " + kv) for (pair <- kv.asInstanceOf[List[(Int, Int)]]) finalResults += pair val currentDate = new Date(timestamp) val formattedDate = outputFormat.format(inputFormat.parse(currentDate.toString)) var degrees = finalResults.groupBy(_._1).mapValues(seq => seq.map(_._2).reduce(_ + _)).toList.sortBy(_._1) //.foreach(println) for ((degree, total) <- degrees) { var text = formattedDate + "," + degree + "," + total Utils.writeLines(output_file, text, "Date,OutDegree,Total") } } override def processWindowResults( results: ArrayBuffer[Any], timestamp: Long, windowSize: Long, viewCompleteTime: Long ): Unit = ??? override def returnResults(): Any = ??? }
Example 73
Source File: CitationRouter.scala From Raphtory with Apache License 2.0 | 5 votes |
package com.raphtory.examples.citationNetwork import java.text.SimpleDateFormat import com.raphtory.core.components.Router.RouterWorker import com.raphtory.core.model.communication._ class CitationRouter(override val routerId: Int,override val workerID:Int, override val initialManagerCount: Int) extends RouterWorker { def parseTuple(record: Any): Unit = { val fileLine = record.asInstanceOf[String].split(",").map(_.trim) //extract the values from the data source in the form of: // 0-sourceNode,1-targetNode,2-sourceCitedTargetOn,3-targetCreationDate,4-targetLastCitedOn val sourceNode = fileLine(0).toInt val targetNode = fileLine(1).toInt val sourceCitedTargetOn = dateToUnixTime(timestamp = fileLine(2)) val targetCreationDate = dateToUnixTime(timestamp = fileLine(3)) val targetLastCitedOn = dateToUnixTime(timestamp = fileLine(4)) //create sourceNode sendGraphUpdate(VertexAdd(sourceCitedTargetOn, sourceNode)) //create destinationNode sendGraphUpdate(VertexAdd(targetCreationDate, targetNode)) //create edge sendGraphUpdate(EdgeAdd(sourceCitedTargetOn, sourceNode, targetNode)) if (sourceCitedTargetOn == targetLastCitedOn) sendGraphUpdate(EdgeDelete(targetLastCitedOn, sourceNode, targetNode)) } def dateToUnixTime(timestamp: => String): Long = { //if(timestamp == null) return null; println(timestamp) val sdf = new SimpleDateFormat("dd/MM/yyyy") println(sdf) val dt = sdf.parse(timestamp) println(dt) val epoch = dt.getTime() println(epoch) epoch / 1000 } }
Example 74
Source File: S3MigrationHandlerBase.scala From flyway-awslambda with MIT License | 5 votes |
package crossroad0201.aws.flywaylambda import java.text.SimpleDateFormat import java.util.Date import com.amazonaws.services.lambda.runtime.Context import com.amazonaws.services.s3.AmazonS3 import crossroad0201.aws.flywaylambda.deploy.{FlywayDeployment, S3SourceFlywayDeployer} import crossroad0201.aws.flywaylambda.migration.{FlywayMigrator, MigrationInfo, MigrationResult} import spray.json.DefaultJsonProtocol import scala.util.Try object MigrationResultProtocol extends DefaultJsonProtocol { import spray.json._ implicit val DateFormat = new RootJsonFormat[Date] { override def write(value: Date): JsValue = if (value == null) JsNull else JsString(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").format(value)) override def read(json: JsValue): Date = ??? } implicit val migrationInfoFormat = jsonFormat6(MigrationInfo.apply) implicit val migrationResultFormat = jsonFormat5(MigrationResult.apply) } trait S3MigrationHandlerBase extends FlywayMigrator { type ResultJson = String type ResultStoredPath = String protected def migrate(bucketName: String, prefix: String, flywayConfFileName: String = "flyway.conf")(implicit context: Context, s3Client: AmazonS3): Try[ResultJson] = { val logger = context.getLogger def resultJson(result: MigrationResult): ResultJson = { import MigrationResultProtocol._ import spray.json._ result.toJson.prettyPrint } def storeResult(deployment: FlywayDeployment, result: MigrationResult): ResultStoredPath = { val jsonPath = s"${deployment.sourcePrefix}/migration-result.json" s3Client.putObject(deployment.sourceBucket, jsonPath, resultJson(result)) jsonPath } for { // Deploy Flyway resources. d <- new S3SourceFlywayDeployer(s3Client, bucketName, prefix, flywayConfFileName).deploy _ = { logger.log( s"""--- Flyway configuration ------------------------------------ |flyway.url = ${d.url} |flyway.user = **** |flyway.password = **** | |SQL locations = ${d.location} |SQL files = ${d.sqlFiles.mkString(", ")} |------------------------------------------------------------- """.stripMargin) } // Migrate DB. r = migrate(d) _ = { logger.log(s"${r.message}!. ${r.appliedCount} applied.") r.infos.foreach { i => logger.log(s"Version=${i.version}, Type=${i.`type`}, State=${i.state} InstalledAt=${i.installedAt} ExecutionTime=${i.execTime} Description=${i.description}") } } // Store migration result. storedPath = storeResult(d, r) _ = logger.log(s"Migration result stored to $bucketName/$storedPath.") } yield resultJson(r) } }
Example 75
Source File: CliLogger.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jnsaf.native_statistics import java.io.{File, FileWriter, PrintWriter} import java.text.SimpleDateFormat import java.util.Date object CliLogger { def timeStamp = new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date) def outPrint(s : String) { scala.Console.out.print(s) scala.Console.out.flush() } def outPrintln(s : String) { scala.Console.out.println(s) scala.Console.out.flush() } def outPrintln() { scala.Console.out.println() scala.Console.out.flush() } def errPrintln(s : String) { scala.Console.err.println(s) scala.Console.err.flush() } def errPrintln() { scala.Console.err.println() scala.Console.err.flush() } def logError(dir: File, text: String, e: Throwable) { outPrintln() errPrintln(text + e.getMessage) val f = new File(dir, ".errorlog") f.getParentFile.mkdirs val fw = new FileWriter(f) try { val pw = new PrintWriter(fw) pw.println("An error occurred on " + timeStamp) e.printStackTrace(pw) fw.close() outPrintln("Written: " + f.getAbsolutePath) } catch { case e : Throwable => errPrintln("Error: " + e.getMessage) } } }
Example 76
Source File: CliLogger.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.saf.cli.util import java.io.{File, FileWriter, PrintWriter} import java.text.SimpleDateFormat import java.util.Date object CliLogger { def timeStamp: String = new SimpleDateFormat("yyyyMMdd-HHmmss").format(new Date) def outPrint(s : String) { scala.Console.out.print(s) scala.Console.out.flush() } def outPrintln(s : String) { scala.Console.out.println(s) scala.Console.out.flush() } def outPrintln() { scala.Console.out.println() scala.Console.out.flush() } def errPrintln(s : String) { scala.Console.err.println(s) scala.Console.err.flush() } def errPrintln() { scala.Console.err.println() scala.Console.err.flush() } def logError(dir: File, text: String, e: Throwable) { outPrintln() errPrintln(text + e.getMessage) val f = new File(dir, ".errorlog") f.getParentFile.mkdirs val fw = new FileWriter(f) try { val pw = new PrintWriter(fw) pw.println("An error occurred on " + timeStamp) e.printStackTrace(pw) fw.close() outPrintln("Written: " + f.getAbsolutePath) } catch { case e : Throwable => errPrintln("Error: " + e.getMessage) } } }
Example 77
Source File: StatsSender.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.stats import java.net.{DatagramPacket, DatagramSocket, InetAddress} import java.util.Calendar import java.text.SimpleDateFormat import akka.actor.{Actor, ActorSystem, Props} import akka.actor.Actor.Receive case class Message(msg: String, host: String, port: Int) class SenderActor extends Actor { private val dsocket = new DatagramSocket() sys.addShutdownHook { dsocket.close() } override def receive: Receive = { case Message(msg, host, port) => val address = InetAddress.getByName(host) val packet = new DatagramPacket(msg.getBytes(), msg.length, address, port) dsocket.send(packet) } } class StatsSender(path: String, host: String = "localhost", port: Int = 8125) { object Sender { val system = ActorSystem("mySystem") val actor = system.actorOf(Props[SenderActor], "SenderActor") def send(message: String) { actor ! Message(message, host, port) } } private def getCurrentTimeStr: String = { val now = Calendar.getInstance().getTime() val dateFormat = new SimpleDateFormat("ddMMyyyy_hhmm") dateFormat.format(now) } private def getMachineName: String = { java.net.InetAddress.getLocalHost().getHostName().split('.')(0) } private def getName(p: String, action: String): String = { p.replace("{MachineName}", getMachineName).replace("{DateTime}", getCurrentTimeStr) + "." + action .replace(".", "-") .replace(" ", "_") } def sendCounts(action: String, num: Int) { val message = getName(path, action) + ":" + num + "|c" Sender.send(message) } def sendTimings(action: String, num: Int) { val message = getName(path, action) + ":" + num + "|ms" Sender.send(message) } def sendGauges(action: String, num: Int) { val message = getName(path, action) + ":" + num + "|g" Sender.send(message) } def sendSets(action: String) { val message = getName(path, action) + "|s" Sender.send(message) } }
Example 78
Source File: IotMessageConverterTest.scala From toketi-kafka-connect-iothub with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package com.microsoft.azure.iot.kafka.connect.source import java.text.SimpleDateFormat import java.time.Instant import com.microsoft.azure.eventhubs.impl.AmqpConstants import com.microsoft.azure.iot.kafka.connect.source.testhelpers.DeviceTemperature import org.apache.kafka.connect.data.Struct import org.json4s.jackson.Serialization._ import org.scalatest.{FlatSpec, GivenWhenThen} import scala.collection.mutable import scala.util.Random class IotMessageConverterTest extends FlatSpec with GivenWhenThen with JsonSerialization { private val random: Random = new Random "IotMessage Converter" should "populate right values for kafka message struct fields" in { Given("IotMessage object") val deviceTemp = DeviceTemperature(100.01, "F") val deviceTempStr = write(deviceTemp) val sequenceNumber = random.nextLong() val correlationId = random.nextString(10) val offset = random.nextString(10) val enqueuedDate = new SimpleDateFormat("MM/dd/yyyy").parse("12/01/2016") val systemProperties = mutable.Map[String, Object]( "iothub-connection-device-id" → "device10", AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME → sequenceNumber.asInstanceOf[Object], AmqpConstants.AMQP_PROPERTY_CORRELATION_ID → correlationId, AmqpConstants.OFFSET_ANNOTATION_NAME → offset, AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME → enqueuedDate) val timestamp = Instant.now().toString val messageProperties = mutable.Map[String, Object]( "timestamp" → timestamp, "contentType" → "temperature" ) val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties) When("getIotMessageStruct is called with IotMessage object") val kafkaMessageStruct: Struct = IotMessageConverter.getIotMessageStruct(iotMessage) Then("The struct has all the expected properties") assert(kafkaMessageStruct.getString("deviceId") == "device10") assert(kafkaMessageStruct.getString("offset") == offset) assert(kafkaMessageStruct.getString("contentType") == "temperature") assert(kafkaMessageStruct.getString("enqueuedTime") == enqueuedDate.toInstant.toString) assert(kafkaMessageStruct.getInt64("sequenceNumber") == sequenceNumber) assert(kafkaMessageStruct.getString("content") == deviceTempStr) val structSystemProperties = kafkaMessageStruct.getMap[String, String]("systemProperties") assert(structSystemProperties != null) assert(structSystemProperties.size == 1) assert(structSystemProperties.get(AmqpConstants.AMQP_PROPERTY_CORRELATION_ID) == correlationId) val structProperties = kafkaMessageStruct.getMap[String, String]("properties") assert(structProperties != null) assert(structProperties.size == 1) assert(structProperties.get("timestamp") == timestamp) } it should "use default values for missing properties" in { val deviceTemp = DeviceTemperature(100.01, "F") val deviceTempStr = write(deviceTemp) val systemProperties = mutable.Map.empty[String, Object] val messageProperties = mutable.Map.empty[String, Object] val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties) When("getIotMessageStruct is called with IotMessage object") val kafkaMessageStruct: Struct = IotMessageConverter.getIotMessageStruct(iotMessage) Then("The struct has all the expected properties") assert(kafkaMessageStruct.getString("deviceId") == "") assert(kafkaMessageStruct.getString("offset") == "") assert(kafkaMessageStruct.getString("contentType") == "") assert(kafkaMessageStruct.getString("enqueuedTime") == "") assert(kafkaMessageStruct.getInt64("sequenceNumber") == 0) assert(kafkaMessageStruct.getString("content") == deviceTempStr) val structSystemProperties = kafkaMessageStruct.getMap[String, String]("systemProperties") assert(structSystemProperties != null) assert(structSystemProperties.size == 0) val structProperties = kafkaMessageStruct.getMap[String, String]("properties") assert(structProperties != null) assert(structProperties.size == 0) } }
Example 79
Source File: MockDataReceiver.scala From toketi-kafka-connect-iothub with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package com.microsoft.azure.iot.kafka.connect.source.testhelpers import java.text.SimpleDateFormat import java.time.{Duration, Instant} import com.microsoft.azure.eventhubs.impl.AmqpConstants import com.microsoft.azure.iot.kafka.connect.source.{DataReceiver, IotMessage, JsonSerialization} import org.json4s.jackson.Serialization.write import scala.collection.mutable import scala.util.Random class MockDataReceiver(val connectionString: String, val receiverConsumerGroup: String, val partition: String, var offset: Option[String], val startTime: Option[Instant], val receiveTimeout: Duration ) extends DataReceiver with JsonSerialization { private val random: Random = new Random override def receiveData(batchSize: Int): Iterable[IotMessage] = { val list = scala.collection.mutable.ListBuffer.empty[IotMessage] for (i <- 0 until batchSize) { list += generateIotMessage(i) } list } def generateIotMessage(index: Int): IotMessage = { val temp = 70 + random.nextInt(10) + random.nextDouble() val deviceTemp = DeviceTemperature(temp, "F") val deviceTempStr = write(deviceTemp) val systemProperties = mutable.Map[String, Object]( "iothub-connection-device-id" → s"device$index", AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME → index.toLong.asInstanceOf[Object], AmqpConstants.AMQP_PROPERTY_CORRELATION_ID → random.nextString(10), AmqpConstants.OFFSET_ANNOTATION_NAME → random.nextString(10), AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME → new SimpleDateFormat("MM/dd/yyyy").parse("12/01/2016")) val messageProperties = mutable.Map[String, Object]( "timestamp" → Instant.now().toString, "contentType" → "temperature" ) val iotMessage = IotMessage(deviceTempStr, systemProperties, messageProperties) iotMessage } override def close(): Unit = {} }
Example 80
Source File: PMMLModelExport.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import java.text.SimpleDateFormat import java.util.{Date, Locale} import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { @BeanProperty val pmml: PMML = { val version = getClass.getPackage.getImplementationVersion val app = new Application("Apache Spark MLlib").setVersion(version) val timestamp = new Timestamp() .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date())) val header = new Header() .setApplication(app) .setTimestamp(timestamp) new PMML("4.2", header, null) } }
Example 81
Source File: JacksonMessageWriter.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.io.OutputStream import java.lang.annotation.Annotation import java.lang.reflect.Type import java.nio.charset.StandardCharsets import java.text.SimpleDateFormat import java.util.{Calendar, Locale, SimpleTimeZone} import javax.ws.rs.Produces import javax.ws.rs.core.{MediaType, MultivaluedMap} import javax.ws.rs.ext.{MessageBodyWriter, Provider} import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} @Provider @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ val mapper = new ObjectMapper() { override def writeValueAsString(t: Any): String = { super.writeValueAsString(t) } } mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) mapper.enable(SerializationFeature.INDENT_OUTPUT) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) override def isWriteable( aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Boolean = { true } override def writeTo( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType, multivaluedMap: MultivaluedMap[String, AnyRef], outputStream: OutputStream): Unit = { t match { case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8)) case _ => mapper.writeValue(outputStream, t) } } override def getSize( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Long = { -1L } } private[spark] object JacksonMessageWriter { def makeISODateFormat: SimpleDateFormat = { val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US) val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) iso8601.setCalendar(cal) iso8601 } }
Example 82
Source File: SimpleDateParam.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.text.{ParseException, SimpleDateFormat} import java.util.{Locale, TimeZone} import javax.ws.rs.WebApplicationException import javax.ws.rs.core.Response import javax.ws.rs.core.Response.Status private[v1] class SimpleDateParam(val originalValue: String) { val timestamp: Long = { val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US) try { format.parse(originalValue).getTime() } catch { case _: ParseException => val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US) gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) try { gmtDay.parse(originalValue).getTime() } catch { case _: ParseException => throw new WebApplicationException( Response .status(Status.BAD_REQUEST) .entity("Couldn't parse date: " + originalValue) .build() ) } } } }
Example 83
Source File: AppAnalyzer.scala From sparklens with Apache License 2.0 | 5 votes |
package com.qubole.sparklens.analyzer import java.util.Date import java.util.concurrent.TimeUnit import com.qubole.sparklens.common.AppContext import scala.collection.mutable.ListBuffer def pd(millis: Long) : String = { "%02dm %02ds".format( TimeUnit.MILLISECONDS.toMinutes(millis), TimeUnit.MILLISECONDS.toSeconds(millis) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis)) ) } def pcm(millis: Long) : String = { val millisForMinutes = millis % (60*60*1000) "%02dh %02dm".format( TimeUnit.MILLISECONDS.toHours(millis), TimeUnit.MILLISECONDS.toMinutes(millisForMinutes)) } implicit class PrintlnStringBuilder(sb: StringBuilder) { def println(x: Any): StringBuilder = { sb.append(x).append("\n") } def print(x: Any): StringBuilder = { sb.append(x) } } } object AppAnalyzer { def startAnalyzers(appContext: AppContext): Unit = { val list = new ListBuffer[AppAnalyzer] list += new SimpleAppAnalyzer list += new HostTimelineAnalyzer list += new ExecutorTimelineAnalyzer list += new AppTimelineAnalyzer list += new JobOverlapAnalyzer list += new EfficiencyStatisticsAnalyzer list += new ExecutorWallclockAnalyzer list += new StageSkewAnalyzer list.foreach( x => { try { val output = x.analyze(appContext) println(output) } catch { case e:Throwable => { println(s"Failed in Analyzer ${x.getClass.getSimpleName}") e.printStackTrace() } } }) } }
Example 84
Source File: PMMLModelExport.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import java.text.SimpleDateFormat import java.util.Date import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { @BeanProperty val pmml: PMML = new PMML setHeader(pmml) private def setHeader(pmml: PMML): Unit = { val version = getClass.getPackage.getImplementationVersion val app = new Application().withName("Apache Spark MLlib").withVersion(version) val timestamp = new Timestamp() .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date())) val header = new Header() .withApplication(app) .withTimestamp(timestamp) pmml.setHeader(header) } }
Example 85
Source File: DateUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import java.sql.Date import java.text.SimpleDateFormat import java.util.{Calendar, TimeZone} import org.apache.spark.sql.catalyst.expressions.Cast object DateUtils { private val MILLIS_PER_DAY = 86400000 // Java TimeZone has no mention of thread safety. Use thread local instance to be safe. private val LOCAL_TIMEZONE = new ThreadLocal[TimeZone] { override protected def initialValue: TimeZone = { Calendar.getInstance.getTimeZone } } private def javaDateToDays(d: Date): Int = { millisToDays(d.getTime) } // we should use the exact day as Int, for example, (year, month, day) -> day def millisToDays(millisLocal: Long): Int = { ((millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal)) / MILLIS_PER_DAY).toInt } private def toMillisSinceEpoch(days: Int): Long = { val millisUtc = days.toLong * MILLIS_PER_DAY millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc) } def fromJavaDate(date: java.sql.Date): Int = { javaDateToDays(date) } def toJavaDate(daysSinceEpoch: Int): java.sql.Date = { new java.sql.Date(toMillisSinceEpoch(daysSinceEpoch)) } def toString(days: Int): String = Cast.threadLocalDateFormat.get.format(toJavaDate(days)) def stringToTime(s: String): java.util.Date = { if (!s.contains('T')) { // JDBC escape string if (s.contains(' ')) { java.sql.Timestamp.valueOf(s) } else { java.sql.Date.valueOf(s) } } else if (s.endsWith("Z")) { // this is zero timezone of ISO8601 stringToTime(s.substring(0, s.length - 1) + "GMT-00:00") } else if (s.indexOf("GMT") == -1) { // timezone with ISO8601 val inset = "+00.00".length val s0 = s.substring(0, s.length - inset) val s1 = s.substring(s.length - inset, s.length) if (s0.substring(s0.lastIndexOf(':')).contains('.')) { stringToTime(s0 + "GMT" + s1) } else { stringToTime(s0 + ".0GMT" + s1) } } else { // ISO8601 with GMT insert val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" ) ISO8601GMT.parse(s) } } }
Example 86
Source File: JacksonMessageWriter.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.io.OutputStream import java.lang.annotation.Annotation import java.lang.reflect.Type import java.text.SimpleDateFormat import java.util.{Calendar, SimpleTimeZone} import javax.ws.rs.Produces import javax.ws.rs.core.{MediaType, MultivaluedMap} import javax.ws.rs.ext.{MessageBodyWriter, Provider} import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} @Provider @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ val mapper = new ObjectMapper() { override def writeValueAsString(t: Any): String = { super.writeValueAsString(t) } } mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) mapper.enable(SerializationFeature.INDENT_OUTPUT) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) override def isWriteable( aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Boolean = { true } override def writeTo( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType, multivaluedMap: MultivaluedMap[String, AnyRef], outputStream: OutputStream): Unit = { t match { case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8")) case _ => mapper.writeValue(outputStream, t) } } override def getSize( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Long = { -1L } } private[spark] object JacksonMessageWriter { def makeISODateFormat: SimpleDateFormat = { val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'") val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) iso8601.setCalendar(cal) iso8601 } }
Example 87
Source File: SimpleDateParam.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.text.SimpleDateFormat import java.util.TimeZone import javax.ws.rs.WebApplicationException import javax.ws.rs.core.Response import javax.ws.rs.core.Response.Status import scala.util.Try private[v1] class SimpleDateParam(val originalValue: String) { val timestamp: Long = { SimpleDateParam.formats.collectFirst { case fmt if Try(fmt.parse(originalValue)).isSuccess => fmt.parse(originalValue).getTime() }.getOrElse( throw new WebApplicationException( Response .status(Status.BAD_REQUEST) .entity("Couldn't parse date: " + originalValue) .build() ) ) } } private[v1] object SimpleDateParam { val formats: Seq[SimpleDateFormat] = { val gmtDay = new SimpleDateFormat("yyyy-MM-dd") gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) Seq( new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz"), gmtDay ) } }
Example 88
Source File: DataFrameReportPerformanceSpec.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.dataframe import java.sql.Timestamp import java.text.{DateFormat, SimpleDateFormat} import java.util.TimeZone import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType} import org.scalatest.{BeforeAndAfter, Ignore} import ai.deepsense.commons.utils.{DoubleUtils, Logging} import ai.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport} // It's ignored because it does not have got assertions, it only prints report generation time. @Ignore class DataFrameReportPerformanceSpec extends DeeplangIntegTestSupport with BeforeAndAfter with TestFiles with Logging { val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv" "DataFrame" should { "generate report" when { "DataFrame has 17K of rows" in { val numberOfTries = 10 var results: Seq[Double] = Seq() for (i <- 1 to numberOfTries) { val dataFrame: DataFrame = demandDataFrame() val start = System.nanoTime() val report = dataFrame.report() val end = System.nanoTime() val time1: Double = (end - start).toDouble / 1000000000.0 results = results :+ time1 logger.debug("Report generation time: {}", DoubleUtils.double2String(time1)) } logger.debug( "Mean report generation time: {}", DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble)) } } } private def demandDataFrame(): DataFrame = { val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile) val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row) executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data) } private def demandSchema: StructType = StructType(Seq( StructField("datetime", TimestampType), StructField("log_count", DoubleType), StructField("workingday", DoubleType), StructField("holiday", DoubleType), StructField("season2", DoubleType), StructField("season3", DoubleType), StructField("season4", DoubleType))) private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } } private object DataFrameHelpers { def demandString2Row(s: String): Row = { val split = s.split(",") Row( timestamp(split(0)), split(1).toDouble, split(2).toDouble, split(3).toDouble, split(4).toDouble, split(5).toDouble, split(6).toDouble ) } private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } }
Example 89
Source File: OutputInterceptorFactory.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.outputintercepting import java.io.File import java.text.SimpleDateFormat import java.util.logging._ import java.util.{Calendar, UUID} import com.google.inject.Inject import com.google.inject.name.Named import org.apache.spark.launcher.SparkLauncher import ai.deepsense.commons.models.ClusterDetails case class OutputInterceptorHandle private [outputintercepting] ( private val logger: Logger, private val childProcLoggerName: String, private val loggerFileHandler: FileHandler ) { def attachTo(sparkLauncher: SparkLauncher): Unit = { sparkLauncher.setConf( "spark.launcher.childProcLoggerName", childProcLoggerName ) } def writeOutput(text: String): Unit = { logger.info(text) } def close(): Unit = { loggerFileHandler.close() } } class OutputInterceptorFactory @Inject()( @Named("session-executor.spark-applications-logs-dir") val executorsLogDirectory: String ) { def prepareInterceptorWritingToFiles(clusterDetails: ClusterDetails): OutputInterceptorHandle = { new File(executorsLogDirectory).mkdirs() val childProcLoggerName = s"WE-app-${UUID.randomUUID()}" val logger = Logger.getLogger(childProcLoggerName) val fileName = { val time = Calendar.getInstance().getTime() // Colons are not allowed in Windows filenames val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss") val formattedTime = format.format(time) val illegalFileNameCharactersRegExp = "[^a-zA-Z0-9.-]" s"$formattedTime-${clusterDetails.name.replaceAll(illegalFileNameCharactersRegExp, "_")}.log" } val fileHandler = new FileHandler(s"$executorsLogDirectory/$fileName") fileHandler.setFormatter(new SimpleFormaterWithoutOutputRedirectorNoise) logger.addHandler(fileHandler) sys.addShutdownHook { fileHandler.close() } OutputInterceptorHandle(logger, childProcLoggerName, fileHandler) } class SimpleFormaterWithoutOutputRedirectorNoise extends Formatter { val simpleFormatter = new SimpleFormatter override def format(logRecord: LogRecord): String = { val formatted = simpleFormatter.format(logRecord) val redirectorNoise = "org.apache.spark.launcher.OutputRedirector redirect\nINFO: " val beginningOfRedirectorNoise = formatted.indexOf(redirectorNoise) val endOfRedirectorNoise = if (beginningOfRedirectorNoise > 0) { beginningOfRedirectorNoise + redirectorNoise.length } else { 0 } formatted.substring(endOfRedirectorNoise) } } }
Example 90
Source File: GeneratorTest.scala From kafka-connect-kcql-smt with Apache License 2.0 | 5 votes |
package com.landoop.connect.sql import java.text.SimpleDateFormat import java.util.Date import com.landoop.json.sql.JacksonJson import com.sksamuel.avro4s.SchemaFor import org.scalatest.{Matchers, WordSpec} import scala.util.Random class GeneratorTest extends WordSpec with Matchers { "Generator" should { "generate schema" in { val sql = Sql.parse("SELECT * FROM `order-topic`") val schema = SchemaFor[Product]() val str = schema.toString println(str) } "generate data" in { val rnd = new Random(System.currentTimeMillis()) val f = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss.FFF") val products = (1 to 4).map { i => Product(i, f.format(new Date()), s"product_$i", Payment(rnd.nextDouble(), i * rnd.nextInt(3), "GBP")) }.map(JacksonJson.toJson).mkString(s"${System.lineSeparator()}") println(products) } } } case class Product(id: Int, created: String, name: String, payment: Payment) case class Payment(price: Double, quantity: Int, currency: String)
Example 91
Source File: RandomListTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.testkit import java.text.SimpleDateFormat import com.salesforce.op.features.types._ import com.salesforce.op.test.TestCommon import com.salesforce.op.testkit.RandomList.{NormalGeolocation, UniformGeolocation} import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{Assertions, FlatSpec} import scala.language.postfixOps @RunWith(classOf[JUnitRunner]) class RandomListTest extends FlatSpec with TestCommon with Assertions { private val numTries = 10000 private val rngSeed = 314159214142136L private def check[D, T <: OPList[D]]( g: RandomList[D, T], minLen: Int, maxLen: Int, predicate: (D => Boolean) = (_: D) => true ) = { g reset rngSeed def segment = g limit numTries segment count (_.value.length < minLen) shouldBe 0 segment count (_.value.length > maxLen) shouldBe 0 segment foreach (list => list.value foreach { x => predicate(x) shouldBe true }) } private val df = new SimpleDateFormat("dd/MM/yy") Spec[Text, RandomList[String, TextList]] should "generate lists of strings" in { val sut = RandomList.ofTexts(RandomText.countries, 0, 4) check[String, TextList](sut, 0, 4, _.length > 0) (sut limit 7 map (_.value.toList)) shouldBe List( List("Madagascar", "Gondal", "Zephyria"), List("Holy Alliance"), List("North American Union"), List("Guatemala", "Estonia", "Kolechia"), List(), List("Myanmar", "Bhutan"), List("Equatorial Guinea") ) } Spec[Date, RandomList[Long, DateList]] should "generate lists of dates" in { val dates = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000) val sut = RandomList.ofDates(dates, 11, 22) var d0 = 0L check[Long, DateList](sut, 11, 22, d => { val d1 = d0 d0 = d d > d1 }) } Spec[DateTimeList, RandomList[Long, DateTimeList]] should "generate lists of datetimes" in { val datetimes = RandomIntegral.datetimes(df.parse("01/01/2017"), 1000, 1000000) val sut = RandomList.ofDateTimes(datetimes, 11, 22) var d0 = 0L check[Long, DateTimeList](sut, 11, 22, d => { val d1 = d0 d0 = d d > d1 }) } Spec[UniformGeolocation] should "generate uniformly distributed geolocations" in { val sut = RandomList.ofGeolocations val segment = sut limit numTries segment foreach (_.value.length shouldBe 3) } Spec[NormalGeolocation] should "generate geolocations around given point" in { for {accuracy <- GeolocationAccuracy.values} { val geolocation = RandomList.ofGeolocationsNear(37.444136, 122.163160, accuracy) val segment = geolocation limit numTries segment foreach (_.value.length shouldBe 3) } } }
Example 92
Source File: RandomIntegralTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.testkit import java.text.SimpleDateFormat import com.salesforce.op.features.types._ import com.salesforce.op.test.TestCommon import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{Assertions, FlatSpec} import scala.language.postfixOps @RunWith(classOf[JUnitRunner]) class RandomIntegralTest extends FlatSpec with TestCommon with Assertions { private val numTries = 10000 private val rngSeed = 314159214142135L private def check[T <: Integral]( g: RandomIntegral[T], predicate: Long => Boolean = _ => true ) = { g reset rngSeed def segment = g limit numTries val numberOfEmpties = segment count (_.isEmpty) val expectedNumberOfEmpties = g.probabilityOfEmpty * numTries withClue(s"numEmpties = $numberOfEmpties, expected $expectedNumberOfEmpties") { math.abs(numberOfEmpties - expectedNumberOfEmpties) < 2 * math.sqrt(numTries) shouldBe true } val maybeValues = segment filterNot (_.isEmpty) map (_.value) val values = maybeValues collect { case Some(s) => s } values foreach (x => predicate(x) shouldBe true) withClue(s"number of distinct values = ${values.size}, expected:") { math.abs(maybeValues.size - values.toSet.size) < maybeValues.size / 20 } } private val df = new SimpleDateFormat("dd/MM/yy") Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers" in { val sut0 = RandomIntegral.integrals val sut = sut0.withProbabilityOfEmpty(0.3) check(sut) sut.probabilityOfEmpty shouldBe 0.3 } Spec[RandomIntegral[Integral]] should "generate empties and distinct numbers in some range" in { val sut0 = RandomIntegral.integrals(100, 200) val sut = sut0.withProbabilityOfEmpty(0.3) check(sut, i => i >= 100 && i < 200) sut.probabilityOfEmpty shouldBe 0.3 } Spec[RandomIntegral[Date]] should "generate dates" in { val sut = RandomIntegral.dates(df.parse("01/01/2017"), 1000, 1000000) var d0 = 0L check(sut withProbabilityOfEmpty 0.01, d => { val d1 = d0 d0 = d d0 > d1 }) } Spec[RandomIntegral[DateTime]] should "generate dates with times" in { val sut = RandomIntegral.datetimes(df.parse("08/24/2017"), 1000, 1000000) var d0 = 0L check(sut withProbabilityOfEmpty 0.001, d => { val d1 = d0 d0 = d d0 > d1 }) } }
Example 93
Source File: IncomingTransactionDialogFragment.scala From OUTDATED_ledger-wallet-android with MIT License | 5 votes |
package co.ledger.wallet.app.ui.m2fa import java.text.SimpleDateFormat import java.util.Locale import android.content.DialogInterface import android.os.Bundle import android.view.{View, ViewGroup, LayoutInflater} import co.ledger.wallet.R import co.ledger.wallet.app.base.BaseDialogFragment import co.ledger.wallet.core.bitcoin.AmountFormatter import co.ledger.wallet.app.api.m2fa.IncomingTransactionAPI import co.ledger.wallet.core.utils.TR import co.ledger.wallet.core.view.DialogActionBarController import co.ledger.wallet.core.widget.TextView class IncomingTransactionDialogFragment extends BaseDialogFragment { lazy val actions = DialogActionBarController(R.id.dialog_action_bar).noNeutralButton lazy val amount = TR(R.id.amount).as[TextView] lazy val address = TR(R.id.address).as[TextView] lazy val date = TR(R.id.date).as[TextView] lazy val name = TR(R.id.dongle_name).as[TextView] private[this] var _transaction: Option[IncomingTransactionAPI#IncomingTransaction] = None def this(tx: IncomingTransactionAPI#IncomingTransaction) { this() _transaction = Option(tx) setCancelable(false) } override def onCreateView(inflater: LayoutInflater, container: ViewGroup, savedInstanceState: Bundle): View = { inflater.inflate(R.layout.incoming_transaction_dialog_fragment, container, false) } override def onResume(): Unit = { super.onResume() if (_transaction.isEmpty || _transaction.get.isDone) dismiss() _transaction.foreach(_.onCancelled(dismiss)) } override def onPause(): Unit = { super.onPause() _transaction.foreach(_.onCancelled(null)) dismissAllowingStateLoss() } override def onViewCreated(view: View, savedInstanceState: Bundle): Unit = { super.onViewCreated(view, savedInstanceState) actions onPositiveClick { _transaction.foreach(_.accept()) _transaction = None dismiss() } actions onNegativeClick { _transaction.foreach(_.reject()) _transaction = None dismiss() } _transaction match { case Some(transaction) => amount.setText(AmountFormatter.Bitcoin.format(transaction.amount)) address.setText(transaction.address) name.setText(transaction.dongle.name.get) val df = android.text.format.DateFormat.getDateFormat(getActivity) val hf = android.text.format.DateFormat.getTimeFormat(getActivity) date.setText(TR(R.string.incoming_tx_date).as[String].format(df.format(transaction.date), hf.format(transaction.date))) case _ => } } override def onDismiss(dialog: DialogInterface): Unit = { super.onDismiss(dialog) _transaction.foreach(_.cancel()) } } object IncomingTransactionDialogFragment { val DefaultTag = "IncomingTransactionDialogFragment" }
Example 94
Source File: TimerSchedule.scala From spark1.52 with Apache License 2.0 | 5 votes |
package scalaDemo import java.text.SimpleDateFormat import java.util.{Timer, TimerTask} object TimerSchedule { val fTime = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss") val d1 = fTime.parse("2005/12/30 14:10:00"); val timer: Timer = new Timer(); timer.scheduleAtFixedRate(new TimerTask() { override def run(): Unit = { System.out.println("this is task you do6"); } }, d1, 3 * 60 * 1000); } }
Example 95
Source File: PMMLModelExport.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import java.text.SimpleDateFormat import java.util.Date import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { @BeanProperty val pmml: PMML = new PMML setHeader(pmml) private def setHeader(pmml: PMML): Unit = { val version = getClass.getPackage.getImplementationVersion val app = new Application().withName("Apache Spark MLlib").withVersion(version) val timestamp = new Timestamp() .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date())) val header = new Header() .withApplication(app) .withTimestamp(timestamp) pmml.setHeader(header) } }
Example 96
Source File: JacksonMessageWriter.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.io.OutputStream import java.lang.annotation.Annotation import java.lang.reflect.Type import java.text.SimpleDateFormat import java.util.{Calendar, SimpleTimeZone} import javax.ws.rs.Produces import javax.ws.rs.core.{MediaType, MultivaluedMap} import javax.ws.rs.ext.{MessageBodyWriter, Provider} import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} @Provider @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ val mapper = new ObjectMapper() { override def writeValueAsString(t: Any): String = { super.writeValueAsString(t) } } mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) mapper.enable(SerializationFeature.INDENT_OUTPUT) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) override def isWriteable( aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Boolean = { true } override def writeTo( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType, multivaluedMap: MultivaluedMap[String, AnyRef], outputStream: OutputStream): Unit = { t match { case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8")) case _ => mapper.writeValue(outputStream, t) } } override def getSize( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Long = { -1L } } private[spark] object JacksonMessageWriter { def makeISODateFormat: SimpleDateFormat = { val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'") val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) iso8601.setCalendar(cal) iso8601 } }
Example 97
Source File: SimpleDateParam.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.text.{ParseException, SimpleDateFormat} import java.util.TimeZone import javax.ws.rs.WebApplicationException import javax.ws.rs.core.Response import javax.ws.rs.core.Response.Status private[v1] class SimpleDateParam(val originalValue: String) { val timestamp: Long = { val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz") try { format.parse(originalValue).getTime() } catch { case _: ParseException => val gmtDay = new SimpleDateFormat("yyyy-MM-dd") gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) try { gmtDay.parse(originalValue).getTime() } catch { case _: ParseException => throw new WebApplicationException( Response .status(Status.BAD_REQUEST) .entity("Couldn't parse date: " + originalValue) .build() ) } } } }
Example 98
Source File: BusinessLogger.scala From languagedetector with MIT License | 5 votes |
package biz.meetmatch.logging import java.text.SimpleDateFormat import java.util.{Calendar, Date} import org.slf4j.LoggerFactory object BusinessLogger { def getDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") } class BusinessLogger(module: String) { private val logger = LoggerFactory.getLogger("businessLogger") def calcStarted(options: String, sparkAppId: String): Unit = { log(s"CALC\tSTART\t$options\t$sparkAppId") } def calcStopped(result: String): Unit = { log(s"CALC\tSTOP\t$result") } def jobStarted(jobId: Int, jobDescription: String, stageCount: Int, executionId: Option[String]): Unit = { log(s"JOB\t$jobId\tSTART\t${jobDescription.replace("\n", " ").replace("\t", " ")}\t$stageCount\t${executionId.getOrElse("")}") } def jobStopped(jobId: Int, result: String): Unit = { log(s"JOB\t$jobId\tSTOP\t$result") } def transactionStarted(category: String, id: String, stageId: Int = -1, partitionId: Int = -1, taskId: Long = -1, message: String = ""): Unit = { log(s"TRANSACTION\t$category\t$id\tSTART\t$stageId\t$partitionId\t$taskId\t${message.replace("\n", " ").replace("\t", " ")}") } def transactionStopped(category: String, id: String): Unit = { log(s"TRANSACTION\t$category\t$id\tSTOP") } def dataParquetRead(tableName: String, count: Long = -1): Unit = { log(s"DATA\tPARQUET\tREAD\t${tableName.replace("\n", " ").replace("\t", " ")}\t$count") } def dataParquetWritten(tableName: String, countBefore: Long, countAfter: Long): Unit = { log(s"DATA\tPARQUET\tWRITE\t${tableName.replace("\n", " ").replace("\t", " ")}\t$countBefore\t$countAfter") } def dataJdbcRead(tableName: String, count: Long = -1): Unit = { log(s"DATA\tJDBC\tREAD\t${tableName.replace("\n", " ").replace("\t", " ")}\t$count") } def dataJdbcWritten(tableName: String, countBefore: Long = -1, countAfter: Long = -1, countUpdated: Long = -1): Unit = { log(s"DATA\tJDBC\tWRITE\t${tableName.replace("\n", " ").replace("\t", " ")}\t$countBefore\t$countAfter\t$countUpdated") } def info(subject: String, message: String): Unit = { log(s"MESSAGE\tINFO\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}") } def warn(subject: String, message: String): Unit = { log(s"MESSAGE\tWARN\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}") } def error(subject: String, message: String): Unit = { log(s"MESSAGE\tERROR\t${subject.replace("\n", " ").replace("\t", " ")}\t${message.replace("\n", " ").replace("\t", " ")}") } private def log(line: String) = { logger.info(s"${BusinessLogger.getDateFormat.format(Calendar.getInstance.getTime)}\t$module\t$line") } } case class LogLineWorkflow(message: String, startDate: String, stopDate: Date, duration: String, state: String, options: Array[Array[String]], sparkAppId: String, calcs: Array[LogLineCalc], warnings: Int, errors: Int) case class LogLineCalc(module: String, startDate: String, stopDate: Date, duration: String, state: String, options: Array[Array[String]], sparkAppId: String, jobs: Array[LogLineJob], transactionCategories: Array[LogLineTransactionCategory], dataReads: Array[LogLineDataRead], dataWrites: Array[LogLineDataWrite], messages: Array[LogLineMessage]) case class LogLineJob(id: Int, startDate: String, duration: String, state: String, description: String, stageCount: Int, executionId: Int = -1) case class LogLineTransactionCategory(category: String, transactions: Array[LogLineTransaction], numberOfTransactions: Int, averageFinishedTransactionDuration: String) case class LogLineTransaction(category: String, id: String, stageId: Int, partitionId: Int, taskId: Long, message: String, startDate: String, duration: String, state: String) case class LogLineDataRead(storage: String, tableName: String, count: Int, date: String) case class LogLineDataWrite(storage: String, tableName: String, countBefore: Int, countAfter: Int, countUpdated: Int, date: String) case class LogLineMessage(category: String, subject: String, message: String, date: String)
Example 99
Source File: PMMLModelExport.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import java.text.SimpleDateFormat import java.util.{Date, Locale} import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { @BeanProperty val pmml: PMML = { val version = getClass.getPackage.getImplementationVersion val app = new Application("Apache Spark MLlib").setVersion(version) val timestamp = new Timestamp() .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date())) val header = new Header() .setApplication(app) .setTimestamp(timestamp) new PMML("4.2", header, null) } }
Example 100
Source File: JacksonMessageWriter.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.io.OutputStream import java.lang.annotation.Annotation import java.lang.reflect.Type import java.nio.charset.StandardCharsets import java.text.SimpleDateFormat import java.util.{Calendar, Locale, SimpleTimeZone} import javax.ws.rs.Produces import javax.ws.rs.core.{MediaType, MultivaluedMap} import javax.ws.rs.ext.{MessageBodyWriter, Provider} import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} @Provider @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ val mapper = new ObjectMapper() { override def writeValueAsString(t: Any): String = { super.writeValueAsString(t) } } mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) mapper.enable(SerializationFeature.INDENT_OUTPUT) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) override def isWriteable( aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Boolean = { true } override def writeTo( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType, multivaluedMap: MultivaluedMap[String, AnyRef], outputStream: OutputStream): Unit = { t match { case ErrorWrapper(err) => outputStream.write(err.getBytes(StandardCharsets.UTF_8)) case _ => mapper.writeValue(outputStream, t) } } override def getSize( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Long = { -1L } } private[spark] object JacksonMessageWriter { def makeISODateFormat: SimpleDateFormat = { val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US) val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) iso8601.setCalendar(cal) iso8601 } }
Example 101
Source File: SimpleDateParam.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.text.{ParseException, SimpleDateFormat} import java.util.{Locale, TimeZone} import javax.ws.rs.WebApplicationException import javax.ws.rs.core.Response import javax.ws.rs.core.Response.Status private[v1] class SimpleDateParam(val originalValue: String) { val timestamp: Long = { val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US) try { format.parse(originalValue).getTime() } catch { case _: ParseException => val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US) gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) try { gmtDay.parse(originalValue).getTime() } catch { case _: ParseException => throw new WebApplicationException( Response .status(Status.BAD_REQUEST) .entity("Couldn't parse date: " + originalValue) .build() ) } } } }
Example 102
Source File: StreamingDemo.scala From flink-demos with Apache License 2.0 | 5 votes |
package com.dataartisans.flink.example.eventpattern import java.text.SimpleDateFormat import java.util import java.util.{Calendar, Properties, UUID} import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction} import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08 import org.apache.flink.util.Collector import org.elasticsearch.action.index.IndexRequest import org.elasticsearch.client.Requests class StateMachineMapper extends RichFlatMapFunction[Event, Alert] { private[this] var currentState: ValueState[State] = _ override def open(config: Configuration): Unit = { currentState = getRuntimeContext.getState( new ValueStateDescriptor("state", classOf[State], InitialState)) } override def flatMap(t: Event, out: Collector[Alert]): Unit = { val state = currentState.value() val nextState = state.transition(t.event) nextState match { case InvalidTransition => out.collect(Alert(t.sourceAddress, state, t.event)) case x if x.terminal => currentState.clear() case x => currentState.update(nextState) } } }
Example 103
Source File: FECData.scala From s4ds with Apache License 2.0 | 5 votes |
import java.io.File import java.sql.Date import java.text.SimpleDateFormat import com.github.tototoshi.csv._ object FECData { val DataDirectory = "./data/" private val dateParser = new SimpleDateFormat("DD-MMM-YY") private def load(fileName:String):FECData = { val reader = CSVReader.open(new File(DataDirectory + fileName)) val transactions = for { row <- reader.iteratorWithHeaders id = None candidate = row("candidate") contributor = row("contributor_name") state = row("contributor_state") occupation = row("contributor_occupation") match { case "" => None case v => Some(v) } amount = (row("amount").toDouble*100).toInt date = new Date(dateParser.parse(row("date")).getTime) } yield Transaction(id, candidate, contributor, state, occupation, amount, date) new FECData(transactions) } def loadAll:FECData = load("us.csv") def loadOhio:FECData = load("ohio.csv") } class FECData(val transactions:Iterator[Transaction])
Example 104
Source File: PMMLModelExport.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import java.text.SimpleDateFormat import java.util.Date import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { @BeanProperty val pmml: PMML = new PMML pmml.setVersion("4.2") setHeader(pmml) private def setHeader(pmml: PMML): Unit = { val version = getClass.getPackage.getImplementationVersion val app = new Application().withName("Apache Spark MLlib").withVersion(version) val timestamp = new Timestamp() .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date())) val header = new Header() .withApplication(app) .withTimestamp(timestamp) pmml.setHeader(header) } }
Example 105
Source File: JacksonMessageWriter.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.io.OutputStream import java.lang.annotation.Annotation import java.lang.reflect.Type import java.text.SimpleDateFormat import java.util.{Calendar, SimpleTimeZone} import javax.ws.rs.Produces import javax.ws.rs.core.{MediaType, MultivaluedMap} import javax.ws.rs.ext.{MessageBodyWriter, Provider} import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} @Provider @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ val mapper = new ObjectMapper() { override def writeValueAsString(t: Any): String = { super.writeValueAsString(t) } } mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) mapper.enable(SerializationFeature.INDENT_OUTPUT) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) override def isWriteable( aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Boolean = { true } override def writeTo( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType, multivaluedMap: MultivaluedMap[String, AnyRef], outputStream: OutputStream): Unit = { t match { case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8")) case _ => mapper.writeValue(outputStream, t) } } override def getSize( t: Object, aClass: Class[_], `type`: Type, annotations: Array[Annotation], mediaType: MediaType): Long = { -1L } } private[spark] object JacksonMessageWriter { def makeISODateFormat: SimpleDateFormat = { val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'") val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) iso8601.setCalendar(cal) iso8601 } }
Example 106
Source File: SimpleDateParam.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.text.{ParseException, SimpleDateFormat} import java.util.TimeZone import javax.ws.rs.WebApplicationException import javax.ws.rs.core.Response import javax.ws.rs.core.Response.Status private[v1] class SimpleDateParam(val originalValue: String) { val timestamp: Long = { val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz") try { format.parse(originalValue).getTime() } catch { case _: ParseException => val gmtDay = new SimpleDateFormat("yyyy-MM-dd") gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) try { gmtDay.parse(originalValue).getTime() } catch { case _: ParseException => throw new WebApplicationException( Response .status(Status.BAD_REQUEST) .entity("Couldn't parse date: " + originalValue) .build() ) } } } }
Example 107
Source File: JavaJsonUtils.scala From asura with MIT License | 5 votes |
package asura.common.util import java.text.SimpleDateFormat import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.core.JsonParser import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} object JavaJsonUtils extends JsonUtils { val mapper: ObjectMapper = new ObjectMapper() mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.configure(DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT, true) mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) mapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false) mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true) mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true) }
Example 108
Source File: JsonUtils.scala From asura with MIT License | 5 votes |
package asura.common.util import java.io.InputStream import java.text.SimpleDateFormat import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.core.JsonParser import com.fasterxml.jackson.core.`type`.TypeReference import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper object JsonUtils extends JsonUtils { val mapper: ObjectMapper with ScalaObjectMapper = new ObjectMapper() with ScalaObjectMapper mapper.registerModule(DefaultScalaModule) mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")) mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) mapper.configure(DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT, true) mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) mapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false) mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true) mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true) } trait JsonUtils { val mapper: ObjectMapper def stringify(obj: AnyRef): String = { mapper.writeValueAsString(obj) } def parse[T <: AnyRef](content: String, c: Class[T]): T = { mapper.readValue(content, c) } def parse[T <: AnyRef](input: InputStream, c: Class[T]): T = { mapper.readValue(input, c) } def parse[T <: AnyRef](content: String, typeReference: TypeReference[T]): T = { mapper.readValue(content, typeReference) } }
Example 109
Source File: MongodbSchemaIT.scala From Spark-MongoDB with Apache License 2.0 | 5 votes |
package com.stratio.datasource.mongodb.schema import java.text.SimpleDateFormat import java.util.Locale import com.stratio.datasource.MongodbTestConstants import com.stratio.datasource.mongodb.config.{MongodbConfig, MongodbConfigBuilder} import com.stratio.datasource.mongodb.partitioner.MongodbPartitioner import com.stratio.datasource.mongodb.rdd.MongodbRDD import com.stratio.datasource.mongodb._ import org.apache.spark.sql.mongodb.{TemporaryTestSQLContext, TestSQLContext} import org.apache.spark.sql.types.{ArrayType, StringType, StructField, TimestampType} import org.junit.runner.RunWith import org.scalatest._ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class MongodbSchemaIT extends FlatSpec with Matchers with MongoEmbedDatabase with TestBsonData with MongodbTestConstants { private val host: String = "localhost" private val collection: String = "testCol" private val readPreference = "secondaryPreferred" val testConfig = MongodbConfigBuilder() .set(MongodbConfig.Host,List(host + ":" + mongoPort)) .set(MongodbConfig.Database,db) .set(MongodbConfig.Collection,collection) .set(MongodbConfig.SamplingRatio,1.0) .set(MongodbConfig.ReadPreference, readPreference) .build() val mongodbPartitioner = new MongodbPartitioner(testConfig) val mongodbRDD = new MongodbRDD(TemporaryTestSQLContext, testConfig, mongodbPartitioner) behavior of "A schema" it should "be inferred from rdd with primitives" + scalaBinaryVersion in { withEmbedMongoFixture(primitiveFieldAndType) { mongodProc => val schema = MongodbSchema(mongodbRDD, 1.0).schema() schema.fields should have size 7 schema.fieldNames should contain allOf("string", "integer", "long", "double", "boolean", "null") schema.printTreeString() } } it should "be inferred from rdd with complex fields" + scalaBinaryVersion in { withEmbedMongoFixture(complexFieldAndType1) { mongodProc => val schema = MongodbSchema(mongodbRDD, 1.0).schema() schema.fields should have size 13 schema.fields filter { case StructField(name, ArrayType(StringType, _), _, _) => Set("arrayOfNull", "arrayEmpty") contains name case _ => false } should have size 2 schema.printTreeString() } } it should "resolve type conflicts between fields" + scalaBinaryVersion in { withEmbedMongoFixture(primitiveFieldValueTypeConflict) { mongodProc => val schema = MongodbSchema(mongodbRDD, 1.0).schema() schema.fields should have size 7 schema.printTreeString() } } it should "be inferred from rdd with more complex fields" + scalaBinaryVersion in { withEmbedMongoFixture(complexFieldAndType2) { mongodProc => val schema = MongodbSchema(mongodbRDD, 1.0).schema() schema.fields should have size 5 schema.printTreeString() } } it should "read java.util.Date fields as timestamptype" + scalaBinaryVersion in { val dfunc = (s: String) => new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy", Locale.ENGLISH).parse(s) import com.mongodb.casbah.Imports.DBObject val stringAndDate = List(DBObject("string" -> "this is a simple string.", "date" -> dfunc("Mon Aug 10 07:52:49 EDT 2015"))) withEmbedMongoFixture(stringAndDate) { mongodProc => val schema = MongodbSchema(mongodbRDD, 1.0).schema() schema.fields should have size 3 schema.fields.filter(_.name == "date").head.dataType should equal(TimestampType) schema.printTreeString() } } }
Example 110
Source File: TSQR.scala From SparkAndMPIFactorizations with MIT License | 5 votes |
package edu.berkeley.cs.amplab.mlmatrix import java.util.concurrent.ThreadLocalRandom import scala.collection.mutable.ArrayBuffer import breeze.linalg._ import edu.berkeley.cs.amplab.mlmatrix.util.QRUtils import edu.berkeley.cs.amplab.mlmatrix.util.Utils import org.apache.spark.rdd.RDD import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.Accumulator import org.apache.spark.SparkContext._ import java.util.Calendar import java.text.SimpleDateFormat class modifiedTSQR extends Serializable { def report(message: String, verbose: Boolean = true) = { val now = Calendar.getInstance().getTime() val formatter = new SimpleDateFormat("H:m:s") if (verbose) { println("STATUS REPORT (" + formatter.format(now) + "): " + message) } } private def reduceQR( acc: Accumulator[Double], a: Tuple2[DenseVector[Double], DenseMatrix[Double]], b: Tuple2[DenseVector[Double], DenseMatrix[Double]]): Tuple2[DenseVector[Double], DenseMatrix[Double]] = { val begin = System.nanoTime val outmat = QRUtils.qrR(DenseMatrix.vertcat(a._2, b._2), false) val outcolnorms = a._1 + b._1 acc += ((System.nanoTime - begin) / 1e6) (outcolnorms, outmat) } }
Example 111
Source File: HtmlFetcher.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet import java.text.SimpleDateFormat import com.gravity.goose.{Configuration, Goose} import io.gzet.HtmlFetcher.Content import org.apache.commons.lang.StringUtils import org.apache.spark.rdd.RDD class HtmlFetcher( connectionTimeout: Int = 10000, socketTimeout: Int = 10000 ) extends Serializable { final val USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36" final val ISO_SDF = "yyyy-MM-dd'T'HH:mm:ssZ" def fetchWithContext(urlRdd: RDD[(Long, String)]): RDD[(Long, Content)] = { urlRdd mapPartitions { urls => val sdf = new SimpleDateFormat(ISO_SDF) val goose = getGooseScraper urls map { case (id, url) => (id, fetchUrl(goose, url, sdf)) } } } def fetch(urlRdd: RDD[String]): RDD[Content] = { urlRdd mapPartitions { urls => val sdf = new SimpleDateFormat(ISO_SDF) val goose = getGooseScraper urls map(url => fetchUrl(goose, url, sdf)) } } private def getGooseScraper: Goose = { val conf: Configuration = new Configuration conf.setEnableImageFetching(false) conf.setBrowserUserAgent(USER_AGENT) conf.setConnectionTimeout(connectionTimeout) conf.setSocketTimeout(socketTimeout) new Goose(conf) } private def fetchUrl(goose: Goose, url: String, sdf: SimpleDateFormat) : Content = { try { val article = goose.extractContent(url) var body = None: Option[String] var title = None: Option[String] var description = None: Option[String] var publishDate = None: Option[String] if (StringUtils.isNotEmpty(article.cleanedArticleText)) body = Some(article.cleanedArticleText) if (StringUtils.isNotEmpty(article.title)) title = Some(article.title) if (StringUtils.isNotEmpty(article.metaDescription)) description = Some(article.metaDescription) if (article.publishDate != null) publishDate = Some(sdf.format(article.publishDate)) Content(url, title, description, body, publishDate) } catch { case e: Throwable => Content(url, None, None, None, None) } } } object HtmlFetcher { case class Content( url: String, title: Option[String], description: Option[String], body: Option[String], publishedDate: Option[String] ) }
Example 112
Source File: GdeltTagger.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.tagging.gdelt import java.text.SimpleDateFormat import java.util.Date import com.typesafe.config.ConfigFactory import io.gzet.tagging.classifier.Classifier import io.gzet.tagging.html.HtmlHandler import io.gzet.tagging.html.HtmlHandler.Content import org.apache.spark.Accumulator import org.apache.spark.streaming.dstream.DStream import org.apache.spark.util.LongAccumulator import org.elasticsearch.spark._ class GdeltTagger() extends Serializable { val config = ConfigFactory.load().getConfig("io.gzet.kappa") val isoSdf = "yyyy-MM-dd HH:mm:ss" val esIndex = config.getString("gdeltIndex") val vectorSize = config.getInt("vectorSize") val minProba = config.getDouble("minProba") def predict(gdeltStream: DStream[String], batchId: LongAccumulator) = { // Extract HTML content val gdeltContent = fetchHtmlContent(gdeltStream) // Predict each RDD gdeltContent foreachRDD { batch => batch.cache() val count = batch.count() if (count > 0) { if (Classifier.model.isDefined) { val labels = Classifier.model.get.labels // Predict HashTags using latest Twitter model val textRdd = batch.map(_.body.get) val predictions = Classifier.predictProbabilities(textRdd) val taggedGdelt = batch.zip(predictions) map { case (content, probabilities) => val validLabels = probabilities filter { case (label, probability) => probability > minProba } val labels = validLabels.toSeq .sortBy(_._2) .reverse .map(_._1) (content, labels) } // Saving articles to Elasticsearch taggedGdelt map { case (content, hashTags) => gdeltToJson(content, hashTags.toArray) } saveToEs esIndex } else { // Saving articles to Elasticsearch batch map { content => gdeltToJson(content, Array()) } saveToEs esIndex } } batch.unpersist(blocking = false) } } private def gdeltToJson(content: Content, hashTags: Array[String]) = { val sdf = new SimpleDateFormat(isoSdf) Map( "time" -> sdf.format(new Date()), "body" -> content.body.get, "url" -> content.url, "tags" -> hashTags, "title" -> content.title ) } private def fetchHtmlContent(urlStream: DStream[String]) = { urlStream.map(_ -> 1).groupByKey().map(_._1) mapPartitions { urls => val sdf = new SimpleDateFormat(isoSdf) val htmlHandler = new HtmlHandler() val goose = htmlHandler.getGooseScraper urls map { url => htmlHandler.fetchUrl(goose, url, sdf) } } filter { content => content.isDefined && content.get.body.isDefined && content.get.body.get.length > 255 } map { content => content.get } } }
Example 113
Source File: OilPriceFunc.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.geomesa import java.text.SimpleDateFormat import java.util.Calendar import org.apache.spark.sql.SparkSession import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions.{udf, window, last, col, lag} object OilPriceFunc { // use this if the window function misbehaves due to timezone e.g. BST // ./spark-shell --driver-java-options "-Duser.timezone=UTC" // ./spark-submit --conf 'spark.driver.extraJavaOptions=-Duser.timezone=UTC' // define a function to reformat the date field def convert(date:String) : String = { val df1 = new SimpleDateFormat("dd/MM/yyyy") val dt = df1.parse(date) val df2 = new SimpleDateFormat("yyyy-MM-dd") df2.format(dt) } // create and save oil price changes def createOilPriceDF(inputfile: String, outputfile: String, spark: SparkSession) = { val oilPriceDF = spark. read. option("header", "true"). option("inferSchema", "true"). csv(inputfile) val convertDateUDF = udf { (Date: String) => convert(Date) } val oilPriceDatedDF = oilPriceDF.withColumn("DATE", convertDateUDF(oilPriceDF("DATE"))) // offset to start at beginning of week val windowDF = oilPriceDatedDF.groupBy(window(oilPriceDatedDF.col("DATE"), "7 days", "7 days", "4 days")) val windowLastDF = windowDF.agg(last("PRICE") as "last(PRICE)").sort("window") // windowLastDF.show(20, false) val sortedWindow = Window.orderBy("window.start") val lagLastCol = lag(col("last(PRICE)"), 1).over(sortedWindow) val lagLastColDF = windowLastDF.withColumn("lastPrev(PRICE)", lagLastCol) // lagLastColDF.show(20, false) val simplePriceChangeFunc = udf { (last: Double, prevLast: Double) => var change = ((last - prevLast) compare 0).signum if (change == -1) change = 0 change.toDouble } val findDateTwoDaysAgoUDF = udf { (date: String) => val dateFormat = new SimpleDateFormat("yyyy-MM-dd") val cal = Calendar.getInstance cal.setTime(dateFormat.parse(date)) cal.add(Calendar.DATE, -3) dateFormat.format(cal.getTime) } val oilPriceChangeDF = lagLastColDF.withColumn("label", simplePriceChangeFunc( lagLastColDF("last(PRICE)"), lagLastColDF("lastPrev(PRICE)") )).withColumn("commonFriday", findDateTwoDaysAgoUDF(lagLastColDF("window.end"))) // oilPriceChangeDF.show(20, false) oilPriceChangeDF.select("label", "commonFriday"). write. format("com.databricks.spark.csv"). option("header", "true"). //.option("codec", "org.apache.hadoop.io.compress.GzipCodec") save(outputfile) } }
Example 114
Source File: PoFile.scala From scalingua with Apache License 2.0 | 5 votes |
package ru.makkarpov.scalingua.pofile import java.io._ import java.nio.charset.StandardCharsets import java.text.SimpleDateFormat import java.util.Date import ru.makkarpov.scalingua.StringUtils import ru.makkarpov.scalingua.pofile.parse.{ErrorReportingParser, PoLexer} object PoFile { val encoding = StandardCharsets.UTF_8 val GeneratedPrefix = "!Generated:" private def headerComment(s: String) = s"# $GeneratedPrefix $s" def apply(f: File): Seq[Message] = apply(new FileInputStream(f), f.getName) def apply(is: InputStream, filename: String = "<unknown>"): Seq[Message] = { val parser = new ErrorReportingParser(new PoLexer(new InputStreamReader(is, StandardCharsets.UTF_8), filename)) parser.parse().value.asInstanceOf[Seq[Message]] } def update(f: File, messages: Seq[Message], escapeUnicode: Boolean = true): Unit = { val output = new NewLinePrintWriter(new OutputStreamWriter(new FileOutputStream(f), encoding), false) try { output.println(headerComment(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()))) output.println() def printEntry(s: String, m: MultipartString): Unit = { output.print(s + " ") if (m.parts.isEmpty) output.println("\"\"") else for (p <- m.parts) output.println("\"" + StringUtils.escape(p, escapeUnicode) + "\"") } for (m <- messages) { for (s <- m.header.comments) output.println(s"# $s") for (s <- m.header.extractedComments) output.println(s"#. $s") for (s <- m.header.locations.sorted) if (s.line < 0) output.println(s"#: ${s.fileString}") else output.println(s"#: ${s.fileString}:${s.line}") if (m.header.flags.nonEmpty) output.println(s"#, " + m.header.flags.map(_.toString).mkString(", ")) for (t <- m.header.tag) output.println(s"#~ $t") for (c <- m.context) printEntry("msgctxt", c) printEntry("msgid", m.message) m match { case Message.Singular(_, _, _, tr) => printEntry("msgstr", tr) case Message.Plural(_, _, _, id, trs) => printEntry("msgid_plural", id) for ((m, i) <- trs.zipWithIndex) printEntry(s"msgstr[$i]", m) } output.println() } } finally output.close() } }
Example 115
Source File: Utils.scala From lemon-schedule with GNU General Public License v2.0 | 5 votes |
package com.gabry.job.utils import java.sql.Timestamp import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.TimeUnit import scala.collection.mutable.ArrayBuffer def getLoadedClass(classLoader: ClassLoader):Array[Class[_]] = { val loadedClass = ArrayBuffer.empty[Class[_]] val loadedClassEnum = classesField.get(classLoader).asInstanceOf[java.util.Vector[Class[_]]].elements() while(loadedClassEnum.hasMoreElements){ val nextElement = loadedClassEnum.nextElement() loadedClass.append(nextElement) } loadedClass.toArray } }
Example 116
Source File: Output.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.spark.clustering.mtm import java.io._ import org.apache.spark.rdd.RDD import org.apache.spark.mllib.linalg.DenseVector import scala.sys.process._ import java.util.Calendar import java.text.SimpleDateFormat import java.io.File import java.io.FileWriter object Output extends Serializable { def saveStr(savingPath: String, value: String, fileName: String = "") = { s"mkdir -p ${savingPath}".! val finalPath = savingPath + fileName val fw = new FileWriter(finalPath, true) fw.write(value + "\n") fw.close() } def write(outputDir: String, datas: RDD[Array[Double]], model: AbstractModel, nbRowSOM:Int, nbColSOM: Int): String = { val now = Calendar.getInstance().getTime() val format = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss") val time = format.format(now) val dim = datas.first.size val datasWithIndex = datas.zipWithIndex.map(_.swap) val path: String = outputDir + "/EXP-" + time + "/" s"mkdir -p ${path}".! val mapMin = Array.fill[Byte](dim)(0).mkString(",") var header = "# mapDim=2 mapSize={"+ nbRowSOM +"," + nbColSOM + "}" header += " pointDim=" + dim + " pointRealDim=" + dim + " mapMin={" + mapMin + "}" val prototypes = model.prototypes.map( d => (d.id, d.point)).sortBy(_._1).map(_._2) println("Write Prototypes...") val protosString = prototypes.map( d => d.toArray.mkString(",")).mkString("\n") // Utiliser fileWriter saveStr(path, header + "\n" + protosString, "maps") val sumAffectedDatas = datas.map( d => (model.findClosestPrototype(d).id, 1)).reduceByKey{ case (sum1, sum2) => sum1 + sum2 }.collectAsMap // fill in all the prototypes that have 0 observations val card = (0 until prototypes.length).map( d => if (sumAffectedDatas.contains(d)) sumAffectedDatas(d) + "" else "0" ) println("Write Cardinalities...") var cardHeader = "# mapDim=2 mapSize={"+ nbRowSOM +"," + nbColSOM + "}" cardHeader += "pointDim=1 pointRealDim=0 mapMin={0} mapMax={0}" val cardStr = card.mkString("\n") saveStr(path, cardHeader + "\n" + cardStr, "cards") val affHeader = "# mapDim=1 mapSize={" + datas.count() + "} pointDim=1 pointRealDim=0 mapMin={0} mapMax={0}" val aff = datasWithIndex.map(d => (d._1, model.findClosestPrototype(d._2).id + "")).sortByKey().values.collect.mkString("\n") println("Write Affiliate...") saveStr(path, affHeader + "\n" + aff, "affs") println("Write Maps...") val maps = prototypes.zip(card).map(d => d._1.toArray.mkString(",") + "," + d._2).mkString("\n") saveStr(path, maps, "mapscard") println("Write successfully...") time } }
Example 117
Source File: KeysExtractor.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink import java.text.SimpleDateFormat import java.util.TimeZone import org.apache.kafka.common.config.ConfigException import org.apache.kafka.connect.data._ import org.json4s.JsonAST._ object KeysExtractor { private val ISO_DATE_FORMAT: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") private val TIME_FORMAT: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss.SSSZ") ISO_DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")) def fromStruct(struct: Struct, keys: Set[String]): Set[(String, Any)] = { keys.map { key => val schema = struct.schema().field(key).schema() val value = struct.get(key) val v = schema.`type`() match { case Schema.Type.INT32 => if (schema != null && Date.LOGICAL_NAME == schema.name) ISO_DATE_FORMAT.format(Date.toLogical(schema, value.asInstanceOf[Int])) else if (schema != null && Time.LOGICAL_NAME == schema.name) TIME_FORMAT.format(Time.toLogical(schema, value.asInstanceOf[Int])) else value case Schema.Type.INT64 => if (Timestamp.LOGICAL_NAME == schema.name) Timestamp.fromLogical(schema, value.asInstanceOf[(java.util.Date)]) else value case Schema.Type.STRING => value.asInstanceOf[CharSequence].toString case Schema.Type.BYTES => if (Decimal.LOGICAL_NAME == schema.name) value.asInstanceOf[BigDecimal].toDouble else throw new ConfigException(s"Schema.Type.BYTES is not supported for $key.") case Schema.Type.ARRAY => throw new ConfigException(s"Schema.Type.ARRAY is not supported for $key.") case Schema.Type.MAP => throw new ConfigException(s"Schema.Type.MAP is not supported for $key.") case Schema.Type.STRUCT => throw new ConfigException(s"Schema.Type.STRUCT is not supported for $key.") case other => throw new ConfigException(s"$other is not supported for $key.") } key -> v } } def fromMap(map: java.util.Map[String, Any], keys: Set[String]): Set[(String, Any)] = { keys.map { key => if (!map.containsKey(key)) throw new ConfigException(s"The key $key can't be found") val value = map.get(key) match { case t: String => t case t: Boolean => t case t: Int => t case t: Long => t case t: Double => t case t: BigInt => t.toLong case t: BigDecimal => t.toDouble case other => throw new ConfigException(s"The key $key is not supported for type ${Option(other).map(_.getClass.getName).getOrElse("NULL")}") } key -> value } } def fromJson(jvalue: JValue, keys: Set[String]): List[(String, Any)] = { jvalue match { case JObject(children) => children.collect { case JField(name, value) if keys.contains(name) => val v = value match { case JBool(b) => b case JDecimal(d) => d.toDouble case JDouble(d) => d case JInt(i) => i.toLong case JLong(l) => l case JString(s) => s case other => throw new ConfigException(s"Field $name is not handled as a key (${other.getClass}). it needs to be a int, long, string, double or decimal") } name -> v } case other => throw new ConfigException(s"${other.getClass} is not supported") } } }
Example 118
Source File: StructFieldsExtractorBytes.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import java.text.SimpleDateFormat import java.util.TimeZone import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.data._ import scala.collection.JavaConverters._ trait FieldsValuesExtractor { def get(struct: Struct): Seq[(String, Array[Byte])] } case class StructFieldsExtractorBytes(includeAllFields: Boolean, fieldsAliasMap: Map[String, String]) extends FieldsValuesExtractor with StrictLogging { def get(struct: Struct): Seq[(String, Array[Byte])] = { val schema = struct.schema() val fields: Seq[Field] = if (includeAllFields) { schema.fields().asScala } else { val selectedFields = schema.fields().asScala.filter(f => fieldsAliasMap.contains(f.name())) val diffSet = fieldsAliasMap.keySet.diff(selectedFields.map(_.name()).toSet) if (diffSet.nonEmpty) { val errMsg = s"Following columns ${diffSet.mkString(",")} have not been found. Available columns:${fieldsAliasMap.keys.mkString(",")}" logger.error(errMsg) sys.error(errMsg) } selectedFields } val fieldsAndValues = fields.flatMap(field => getFieldBytes(field, struct).map(bytes => fieldsAliasMap.getOrElse(field.name(), field.name()) -> bytes)) fieldsAndValues } private def getFieldBytes(field: Field, struct: Struct): Option[Array[Byte]] = { Option(struct.get(field)) .map { value => Option(field.schema().name()).collect { case Decimal.LOGICAL_NAME => value.asInstanceOf[Any] match { case _:java.math.BigDecimal => value.fromBigDecimal() case arr: Array[Byte] => Decimal.toLogical(field.schema, arr).asInstanceOf[Any].fromBigDecimal() case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value") } case Time.LOGICAL_NAME => value.asInstanceOf[Any] match { case i: Int => StructFieldsExtractorBytes.TimeFormat.format(Time.toLogical(field.schema, i)).asInstanceOf[Any].fromString() case d:java.util.Date => StructFieldsExtractorBytes.TimeFormat.format(d).asInstanceOf[Any].fromString() case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value") } case Timestamp.LOGICAL_NAME => value.asInstanceOf[Any] match { case d:java.util.Date => StructFieldsExtractorBytes.DateFormat.format(d).asInstanceOf[Any].fromString() case l: Long => StructFieldsExtractorBytes.DateFormat.format(Timestamp.toLogical(field.schema, l)).asInstanceOf[Any].fromString() case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value") } }.getOrElse { field.schema().`type`() match { case Schema.Type.BOOLEAN => value.fromBoolean() case Schema.Type.BYTES => value.fromBytes() case Schema.Type.FLOAT32 => value.fromFloat() case Schema.Type.FLOAT64 => value.fromDouble() case Schema.Type.INT8 => value.fromByte() case Schema.Type.INT16 => value.fromShort() case Schema.Type.INT32 => value.fromInt() case Schema.Type.INT64 => value.fromLong() case Schema.Type.STRING => value.fromString() case other => sys.error(s"$other is not a recognized schema!") } } } } } object StructFieldsExtractorBytes { val DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") val TimeFormat = new SimpleDateFormat("HH:mm:ss.SSSZ") DateFormat.setTimeZone(TimeZone.getTimeZone("UTC")) }
Example 119
Source File: StructFieldsExtractor.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.voltdb import java.text.SimpleDateFormat import java.util.TimeZone import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.data.{Field, Struct, _} import scala.collection.JavaConverters._ trait FieldsValuesExtractor { def get(struct: Struct): Map[String, Any] } case class StructFieldsExtractor(targetTable: String, includeAllFields: Boolean, fieldsAliasMap: Map[String, String], isUpsert: Boolean = false) extends FieldsValuesExtractor with StrictLogging { require(targetTable != null && targetTable.trim.length > 0) def get(struct: Struct): Map[String, Any] = { val schema = struct.schema() val fields: Seq[Field] = { if (includeAllFields) { schema.fields().asScala } else { val selectedFields = schema.fields().asScala.filter(f => fieldsAliasMap.contains(f.name())) val diffSet = fieldsAliasMap.keySet.diff(selectedFields.map(_.name()).toSet) if (diffSet.nonEmpty) { val errMsg = s"Following columns ${diffSet.mkString(",")} have not been found. Available columns:${fieldsAliasMap.keys.mkString(",")}" logger.error(errMsg) sys.error(errMsg) } selectedFields } } //need to select all fields including null. the stored proc needs a fixed set of params fields.map { field => val schema = field.schema() val value = Option(struct.get(field)) .map { value => //handle specific schema schema.name() match { case Decimal.LOGICAL_NAME => value.asInstanceOf[Any] match { case _:java.math.BigDecimal => value case arr: Array[Byte] => Decimal.toLogical(schema, arr) case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value") } case Time.LOGICAL_NAME => value.asInstanceOf[Any] match { case i: Int => StructFieldsExtractor.TimeFormat.format(Time.toLogical(schema, i)) case d:java.util.Date => StructFieldsExtractor.TimeFormat.format(d) case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value") } case Timestamp.LOGICAL_NAME => value.asInstanceOf[Any] match { case d:java.util.Date => StructFieldsExtractor.DateFormat.format(d) case l: Long => StructFieldsExtractor.DateFormat.format(Timestamp.toLogical(schema, l)) case _ => throw new IllegalArgumentException(s"${field.name()} is not handled for value:$value") } case _ => value } }.orNull fieldsAliasMap.getOrElse(field.name(), field.name()) -> value }.toMap } } object StructFieldsExtractor { val DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") val TimeFormat: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss.SSSZ") DateFormat.setTimeZone(TimeZone.getTimeZone("UTC")) }
Example 120
Source File: DateFormatConstraint.scala From drunken-data-quality with Apache License 2.0 | 5 votes |
package de.frosner.ddq.constraints import java.text.SimpleDateFormat import org.apache.spark.sql.functions._ import org.apache.spark.sql.{Column, DataFrame} import scala.util.Try case class DateFormatConstraint(columnName: String, formatString: String) extends Constraint { val fun = (df: DataFrame) => { val cannotBeDate = udf((column: String) => column != null && Try { val format = new SimpleDateFormat(formatString) format.setLenient(false) format.parse(column) }.isFailure) val maybeCannotBeDateCount = Try(df.filter(cannotBeDate(new Column(columnName))).count) DateFormatConstraintResult( this, data = maybeCannotBeDateCount.toOption.map(DateFormatConstraintResultData), status = ConstraintUtil.tryToStatus[Long](maybeCannotBeDateCount, _ == 0) ) } } case class DateFormatConstraintResult(constraint: DateFormatConstraint, data: Option[DateFormatConstraintResultData], status: ConstraintStatus) extends ConstraintResult[DateFormatConstraint] { val message: String = { val format = constraint.formatString val columnName = constraint.columnName val maybeFailedRows = data.map(_.failedRows) val maybePluralS = maybeFailedRows.map(failedRows => if (failedRows == 1) "" else "s") val maybeVerb = maybeFailedRows.map(failedRows => if (failedRows == 1) "is" else "are") (status, maybeFailedRows, maybePluralS, maybeVerb) match { case (ConstraintSuccess, Some(0), _, _) => s"Column $columnName is formatted by $format." case (ConstraintFailure, Some(failedRows), Some(pluralS), Some(verb)) => s"Column $columnName contains $failedRows row$pluralS that $verb not formatted by $format." case (ConstraintError(throwable), None, None, None) => s"Checking whether column $columnName is formatted by $format failed: $throwable" case default => throw IllegalConstraintResultException(this) } } } case class DateFormatConstraintResultData(failedRows: Long)
Example 121
Source File: TimeHelper.scala From ez-framework with Apache License 2.0 | 5 votes |
package com.ecfront.ez.framework.core.helper import java.text.SimpleDateFormat import java.time.ZonedDateTime import java.util.{Calendar, Date, TimeZone} object TimeHelper { val msf = new SimpleDateFormat("yyyyMMddHHmmssSSS") val sf = new SimpleDateFormat("yyyyMMddHHmmss") val mf = new SimpleDateFormat("yyyyMMddHHmm") val hf = new SimpleDateFormat("yyyyMMddHH") val df = new SimpleDateFormat("yyyyMMdd") val Mf = new SimpleDateFormat("yyyyMM") val yf = new SimpleDateFormat("yyyy") val yyyy_MM_dd_HH_mm_ss_SSS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS") val yyyy_MM_dd_HH_mm_ss = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") val yyyy_MM_dd = new SimpleDateFormat("yyyy-MM-dd") def dateOffset(offsetValue: Int, offsetUnit: Int, currentTime: Long): Long = { val format = currentTime.toString.length match { case 8 => df case 10 => hf case 12 => mf case 14 => sf case 17 => msf } val calendar = Calendar.getInstance() calendar.setTime(format.parse(currentTime + "")) calendar.add(offsetUnit, offsetValue) format.format(calendar.getTime).toLong } def dateOffset(offsetValue: Int, offsetUnit: Int, currentDate: Date): Date = { val calendar = Calendar.getInstance() calendar.setTime(currentDate) calendar.add(offsetUnit, offsetValue) calendar.getTime } def utc2Local(utcTime: String, localTimePatten: String = "yyyy-MM-dd'T'HH:mm:ss"): String = { val utcDate = Date.from(ZonedDateTime.parse(utcTime).toInstant) val localF = new SimpleDateFormat(localTimePatten) localF.setTimeZone(TimeZone.getDefault) localF.format(utcDate.getTime) } }
Example 122
Source File: TaskMonitor.scala From ez-framework with Apache License 2.0 | 5 votes |
package com.ecfront.ez.framework.core.monitor import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.ConcurrentHashMap import com.ecfront.ez.framework.core.EZ import com.ecfront.ez.framework.core.helper.{TimeHelper, TimerHelper} import com.ecfront.ez.framework.core.logger.Logging import scala.collection.JavaConversions._ object TaskMonitor extends Logging { private val tasks = new ConcurrentHashMap[String, (String, Date)]() def add(taskName: String): String = { val taskId = EZ.createUUID tasks += taskId -> (taskName, new Date()) taskId } def get(taskId:String):(String,Date)={ tasks.get(taskId) } def poll(taskId:String):(String,Date)={ val d=tasks.get(taskId) tasks -= taskId d } def remove(taskId: String): Unit = { tasks -= taskId } def hasTask(): Boolean = { tasks.nonEmpty } def waitFinish(timeout: Long = Long.MaxValue): Unit = { logger.info("[Monitor]waiting task finish...") val waitStart = new Date().getTime while (tasks.nonEmpty && waitStart + timeout < new Date().getTime) { Thread.sleep(500) if (new Date().getTime - waitStart > 60 * 1000) { var warn = "[Monitor]has some unfinished tasks:\r\n" warn += tasks.map(task => s" > id:${task._1} name:${task._2._1} start time:${TimeHelper.yyyy_MM_dd_HH_mm_ss_SSS.format(task._2._2)}").mkString("\r\n") logger.warn(warn) } } if (tasks.nonEmpty) { var error = "[Monitor]has some unfinished tasks,but time is out:\r\n" error += tasks.map(task => s" > id:${task._1} name:${task._2._1} start time:${TimeHelper.yyyy_MM_dd_HH_mm_ss_SSS.format(task._2._2)}").mkString("\r\n") logger.error(error) } // 再等1秒 Thread.sleep(1000) } private val yyyy_MM_dd_HH_mm_ss_SSS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS") TimerHelper.periodic(60L, { if (tasks.nonEmpty) { val info = new StringBuffer(s"\r\n--------------Current Execute Tasks : (${tasks.size()}) --------------\r\n") tasks.foreach { i => info.append(s"------ ${yyyy_MM_dd_HH_mm_ss_SSS.format(i._2._2)} : [${i._1}]${i._2._1}\r\n") } logger.trace(info.toString) } }) }
Example 123
Source File: LateDataMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.sideoutput.lateDataProcess import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.SECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i =74540; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("late_data", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(200) i = i + 1 // System.exit(-1) } } }
Example 124
Source File: ErrorHandler.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.errors import java.text.SimpleDateFormat import java.util.Date import com.typesafe.scalalogging.StrictLogging import scala.util.{Failure, Success, Try} trait ErrorHandler extends StrictLogging { var errorTracker: Option[ErrorTracker] = None private val dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS'Z'") def initialize(maxRetries: Int, errorPolicy: ErrorPolicy): Unit = { errorTracker = Some(ErrorTracker(maxRetries, maxRetries, "", new Date(), errorPolicy)) } def getErrorTrackerRetries() : Int = { errorTracker.get.retries } def errored() : Boolean = { errorTracker.get.retries != errorTracker.get.maxRetries } def handleTry[A](t : Try[A]) : Option[A] = { require(errorTracker.isDefined, "ErrorTracker is not set call. Initialize.") t match { case Success(s) => { //success, check if we had previous errors. if (errorTracker.get.retries != errorTracker.get.maxRetries) { logger.info(s"Recovered from error ${errorTracker.get.lastErrorMessage} at " + s"${dateFormatter.format(errorTracker.get.lastErrorTimestamp)}") } //cleared error resetErrorTracker() Some(s) } case Failure(f) => //decrement the retry count logger.error(s"Encountered error ${f.getMessage}", f) this.errorTracker = Some(decrementErrorTracker(errorTracker.get, f.getMessage)) handleError(f, errorTracker.get.retries, errorTracker.get.policy) None } } def resetErrorTracker() = { errorTracker = Some(ErrorTracker(errorTracker.get.maxRetries, errorTracker.get.maxRetries, "", new Date(), errorTracker.get.policy)) } private def decrementErrorTracker(errorTracker: ErrorTracker, msg: String): ErrorTracker = { if (errorTracker.maxRetries == -1) { ErrorTracker(errorTracker.retries, errorTracker.maxRetries, msg, new Date(), errorTracker.policy) } else { ErrorTracker(errorTracker.retries - 1, errorTracker.maxRetries, msg, new Date(), errorTracker.policy) } } private def handleError(f: Throwable, retries: Int, policy: ErrorPolicy): Unit = { policy.handle(f, true, retries) } }
Example 125
Source File: DataFrameReportPerformanceSpec.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.dataframe import java.sql.Timestamp import java.text.{DateFormat, SimpleDateFormat} import java.util.TimeZone import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DoubleType, StructField, StructType, TimestampType} import org.scalatest.{BeforeAndAfter, Ignore} import io.deepsense.commons.utils.{DoubleUtils, Logging} import io.deepsense.deeplang.{TestFiles, DeeplangIntegTestSupport} // It's ignored because it does not have got assertions, it only prints report generation time. @Ignore class DataFrameReportPerformanceSpec extends DeeplangIntegTestSupport with BeforeAndAfter with TestFiles with Logging { val testFile = absoluteTestsDirPath.pathWithoutScheme + "/demand_without_header.csv" "DataFrame" should { "generate report" when { "DataFrame has 17K of rows" in { val numberOfTries = 10 var results: Seq[Double] = Seq() for (i <- 1 to numberOfTries) { val dataFrame: DataFrame = demandDataFrame() val start = System.nanoTime() val report = dataFrame.report val end = System.nanoTime() val time1: Double = (end - start).toDouble / 1000000000.0 results = results :+ time1 logger.debug("Report generation time: {}", DoubleUtils.double2String(time1)) } logger.debug( "Mean report generation time: {}", DoubleUtils.double2String(results.fold(0D)(_ + _) / numberOfTries.toDouble)) } } } private def demandDataFrame(): DataFrame = { val rddString: RDD[String] = executionContext.sparkContext.textFile(testFile) val data: RDD[Row] = rddString.map(DataFrameHelpers.demandString2Row) executionContext.dataFrameBuilder.buildDataFrame(demandSchema, data) } private def demandSchema: StructType = StructType(Seq( StructField("datetime", TimestampType), StructField("log_count", DoubleType), StructField("workingday", DoubleType), StructField("holiday", DoubleType), StructField("season2", DoubleType), StructField("season3", DoubleType), StructField("season4", DoubleType))) private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } } private object DataFrameHelpers { def demandString2Row(s: String): Row = { val split = s.split(",") Row( timestamp(split(0)), split(1).toDouble, split(2).toDouble, split(3).toDouble, split(4).toDouble, split(5).toDouble, split(6).toDouble ) } private def timestamp(s: String): Timestamp = { val format: DateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format.setTimeZone(TimeZone.getTimeZone("UTC")) new Timestamp(format.parse(s).getTime) } }
Example 126
Source File: ReceiptRenderer.scala From apple-of-my-iap with MIT License | 5 votes |
package com.meetup.iap.receipt import java.text.SimpleDateFormat import com.meetup.iap.AppleApi import AppleApi.{ReceiptResponse, ReceiptInfo} import java.util.{Date, TimeZone} import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ import org.json4s.JsonAST.JValue import org.slf4j.LoggerFactory object ReceiptRenderer { val log = LoggerFactory.getLogger(ReceiptRenderer.getClass) private def appleDateFormat(date: Date): String = { val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'Etc/GMT'") sdf.setTimeZone(TimeZone.getTimeZone("UTC")) sdf.format(date) } def apply(response: ReceiptResponse): String = { pretty(render( ("status" -> response.statusCode) ~ ("latest_receipt_info" -> response.latestReceiptInfo.reverse.map(renderReceipt)) ~ ("latest_receipt" -> response.latestReceipt))) } private def renderReceipt(receiptInfo: ReceiptInfo): JValue = { val origPurchaseDate = receiptInfo.originalPurchaseDate val origPurchaseDateStr = appleDateFormat(origPurchaseDate) val origPurchaseDateMs = origPurchaseDate.getTime val purchaseDate = receiptInfo.purchaseDate val purchaseDateStr = appleDateFormat(purchaseDate) val purchaseDateMs = purchaseDate.getTime val expiresDate = receiptInfo.expiresDate val expiresDateStr = appleDateFormat(expiresDate) val expiresDateMs = expiresDate.getTime val cancellationDate = receiptInfo.cancellationDate.map { date => appleDateFormat(date) } ("quantity" -> "1") ~ ("product_id" -> receiptInfo.productId) ~ ("transaction_id" -> receiptInfo.transactionId) ~ ("original_transaction_id" -> receiptInfo.originalTransactionId) ~ ("purchase_date" -> purchaseDateStr) ~ ("purchase_date_ms" -> purchaseDateMs.toString) ~ ("original_purchase_date" -> origPurchaseDateStr) ~ ("original_purchase_date_ms" -> origPurchaseDateMs.toString) ~ ("expires_date" -> expiresDateStr) ~ ("expires_date_ms" -> expiresDateMs.toString) ~ ("is_trial_period" -> receiptInfo.isTrialPeriod.toString) ~ //We mimic Apple's weird json here by converting the boolean type to a string ("is_in_intro_offer_period" -> receiptInfo.isInIntroOfferPeriod.map(_.toString)) ~ ("cancellation_date" -> cancellationDate) } }
Example 127
Source File: KafkaOffsetRevertTest.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.kafka import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp(true)) var i = 0; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(1000) i = i + 1 // System.exit(-1) } } }
Example 128
Source File: SlotPartitionMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.demo import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MILLISECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val prop = Common.getProp prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i = 0; while (true) { val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("slot_partition", jsonObject.toString()) producer.send(msg) producer.flush() if (MathUtil.random.nextBoolean()) { Thread.sleep(1500) } else { Thread.sleep(500) } i = i + 1 // System.exit(-1) } } }
Example 129
Source File: IntervalJoinKafkaKeyMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.intervalJoin import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject object IntervalJoinKafkaKeyMaker { val topic = "async" def main(args: Array[String]): Unit = { while (true) { left("topic_left") right("topic_right") Thread.sleep(500) } } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") var idLeft = 0 def left(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idLeft = idLeft + 1 val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis())) val jsonObject: JSONObject = new JSONObject(map) println("left : " + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) producer.send(msg) producer.flush() } var idRight = 0 def right(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idRight = idRight + 1 val map = Map("id" -> idRight, "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis())) val jsonObject: JSONObject = new JSONObject(map) println("right : \t\t\t\t\t\t\t\t" + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) producer.send(msg) producer.flush() } }
Example 130
Source File: CacheFile.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.tableJoin import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.scala._ import org.apache.flink.api.common.functions.RichMapFunction import org.apache.flink.configuration.Configuration import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import scala.io.Source object CacheFile { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt") } else { env.setMaxParallelism(1) env.setParallelism(1) // file and register name env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt") } // cache table val sdf = new SimpleDateFormat("yyyyMMddHHmmss") val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp) env.addSource(source) .map(json => { val id = json.get("id").asText() val phone = json.get("phone").asText() Tuple2(id, phone) }) .map(new RichMapFunction[(String, String), String] { var cache = Map("" -> "") override def open(parameters: Configuration): Unit = { // read cache file val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt") if (file.canRead) { val context = Source.fromFile(file, "utf-8").getLines().toArray context.foreach(line => { val tmp = line.split(",") cache += (tmp(0) -> tmp(1)) }) } } override def map(value: (String, String)): String = { val name = cache.get(value._1) value._1 + "," + value._2 + "," + cache.get(value._1) } }) .print() env.execute("cacheFile") } }
Example 131
Source File: StreamingFileSinkDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.filesink import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.common.serialization.{BulkWriter, SimpleStringEncoder} import org.apache.flink.api.scala._ import org.apache.flink.core.fs.Path import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer object StreamingFileSinkDemo { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) } else { env.setMaxParallelism(1) env.setParallelism(1) } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp) // row format val sinkRow = StreamingFileSink .forRowFormat(new Path("D:\\idea_out\\rollfilesink"), new SimpleStringEncoder[ObjectNode]("UTF-8")) .withBucketAssigner(new DayBucketAssigner) .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour .build() // use define BulkWriterFactory and DayBucketAssinger val sinkBuck = StreamingFileSink .forBulkFormat(new Path("D:\\idea_out\\rollfilesink"), new DayBulkWriterFactory) .withBucketAssigner(new DayBucketAssigner()) .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour .build() env.addSource(source) .assignAscendingTimestamps(json => { sdf.parse(json.get("date").asText()).getTime }) .map(json => { // json.get("date") + "-" + json.toString json }) .addSink(sinkBuck) env.execute("StreamingFileSink") } }
Example 132
Source File: FileSinkMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.filesink import java.text.SimpleDateFormat import java.util.Calendar import com.venn.common.Common import com.venn.stream.api.dayWindow.CurrentDayMaker.{calendar, getCreateTime, sdf} import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject object FileSinkMaker { val topic = "async" def main(args: Array[String]): Unit = { while (true) { left("roll_file_sink") Thread.sleep(100) } } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") var idLeft = 0 def left(topic: String) = { val producer = new KafkaProducer[String, String](Common.getProp) idLeft = idLeft + 1 val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime) val jsonObject: JSONObject = new JSONObject(map) println("left : " + jsonObject.toString()) val msg = new ProducerRecord[String, String](topic, jsonObject.toString()) // producer.send(msg) // producer.flush() } var minute : Int = 1 val calendar: Calendar = Calendar.getInstance() def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MINUTE, 10) sdf.format(calendar.getTime) } }
Example 133
Source File: RollingFileSinkDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.filesink import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.connectors.fs.StringWriter import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.api.scala._ val sink = new BucketingSink[String]("D:\\idea_out\\rollfilesink") sink.setBucketer(new DayBasePathBucketer) sink.setWriter(new StringWriter[String]) sink.setBatchSize(1024 * 1024 * 400) // this is 400 MB, // sink.setBatchRolloverInterval(24 * 60 * 60 * 1000) // this is 24 hour // sink.setInProgressPrefix("inProcessPre") // sink.setPendingPrefix("pendingpre") // sink.setPartPrefix("partPre") env.addSource(source) .assignAscendingTimestamps(json => { sdf.parse(json.get("date").asText()).getTime }) .map(json => { json.get("date") + "-" + json.toString }) .addSink(sink) env.execute("rollingFileSink") } }
Example 134
Source File: ProcessWindowForTrigger.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.trigger import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.contrib.streaming.state.RocksDBStateBackend import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector import org.slf4j.LoggerFactory object ProcessWindowDemoForTrigger { val logger = LoggerFactory.getLogger(this.getClass) def main(args: Array[String]): Unit = { // environment val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment env.setParallelism(1) if ("\\".equals(File.pathSeparator)) { val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR) env.setStateBackend(rock) // checkpoint interval env.enableCheckpointing(10000) } val topic = "current_day" val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp) val stream = env.addSource(kafkaSource) .map(s => { s }) .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60))) .trigger(CountAndTimeTrigger.of(10, Time.seconds(10))) .process(new ProcessAllWindowFunction[String, String, TimeWindow] { override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = { var count = 0 elements.iterator.foreach(s => { count += 1 }) logger.info("this trigger have : {} item", count) } }) // execute job env.execute(this.getClass.getName) } }
Example 135
Source File: WindowDemoMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.trigger import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MILLISECOND, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i = 0; while (true) { val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString()) producer.send(msg) producer.flush() if (MathUtil.random.nextBoolean()) { Thread.sleep(1500) } else { Thread.sleep(500) } i = i + 1 // System.exit(-1) } } }
Example 136
Source File: TimeSpec.scala From ez-framework with Apache License 2.0 | 5 votes |
package com.ecfront.ez.framework.core.misc import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.ecfront.ez.framework.core.BasicSpec class TimeSpec extends BasicSpec { test("ZeroTimeOffset Test") { val dfd = new SimpleDateFormat("yyyyMMdd") def getZeroTimeOffset = { val currentTime = new Date() val currentDay = dfd.parse(dfd.format(currentTime)) val calendar = Calendar.getInstance() calendar.setTime(currentDay) calendar.add(Calendar.DATE, 1) calendar.getTime.getTime - currentTime.getTime } println(getZeroTimeOffset) Thread.sleep(10000) println(getZeroTimeOffset) val calendar = Calendar.getInstance() calendar.setTimeInMillis(new Date().getTime + getZeroTimeOffset) println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS").format(calendar.getTime)) } }
Example 137
Source File: CurrentDayPvCountWaterMark.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.dayWindow import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import com.venn.source.TumblingEventTimeWindows import org.apache.flink.api.common.functions.ReduceFunction import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.contrib.streaming.state.RocksDBStateBackend import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.triggers.{ContinuousEventTimeTrigger, ContinuousProcessingTimeTrigger, CountTrigger} import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer} .assignAscendingTimestamps(event => sdf.parse(event.createTime).getTime) .windowAll(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8))) .reduce(new ReduceFunction[Eventx] { override def reduce(event1: Eventx, event2: Eventx): Eventx = { // println("reduce event : " + event2.toString) // val minId:String = if (event1.id.compareTo(event2.id) >= 0 ) event2.id else event1.id // val maxId = if (event1.id.compareTo(event2.id) < 0 ) event1.id else event2.id // val minCreateTime = if ( event1.createTime.compareTo(event2.createTime) >= 0 ) event2.createTime else event1.createTime // val maxCreateTime = if ( event1.createTime.compareTo(event2.createTime) < 0 ) event1.createTime else event2.createTime // val count = event1.count + event2.count // new EventResult(minId, maxId, minCreateTime, maxCreateTime, count) new Eventx(event1.id , event2.id , event1.amt + event2.amt) } }) // format output even, connect min max id, add current timestamp // .map(event => Event(event.id + "-" + event.createTime, sdf.format(System.currentTimeMillis()), event.count)) stream.print("result : ") // execute job env.execute("CurrentDayCount") } }
Example 138
Source File: CurrentDayMaker.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.dayWindow import java.text.SimpleDateFormat import java.util.{Calendar, Date} import com.venn.common.Common import com.venn.util.MathUtil import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.util.parsing.json.JSONObject def getCreateTime(): String = { // minute = minute + 1 calendar.add(Calendar.MINUTE, 10) sdf.format(calendar.getTime) } val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") def main(args: Array[String]): Unit = { val producer = new KafkaProducer[String, String](Common.getProp) calendar.setTime(new Date()) println(sdf.format(calendar.getTime)) var i =0; while (true) { // val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis())) val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10))) val jsonObject: JSONObject = new JSONObject(map) println(jsonObject.toString()) // topic current_day val msg = new ProducerRecord[String, String]("current_day", jsonObject.toString()) producer.send(msg) producer.flush() Thread.sleep(1000) i = i + 1 // System.exit(-1) } } }
Example 139
Source File: PackageSpec.scala From sparkpipe-core with Apache License 2.0 | 5 votes |
package software.uncharted.sparkpipe.ops.core.dataframe.temporal import org.scalatest._ import software.uncharted.sparkpipe.Spark import software.uncharted.sparkpipe.ops.core.rdd.toDF import java.text.SimpleDateFormat import java.sql.Timestamp class PackageSpec extends FunSpec { describe("ops.core.dataframe.temporal") { val rdd = Spark.sc.parallelize(Seq( (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18").getTime), "2015-11-18", 1), (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19").getTime), "2015-11-19", 2), (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-20").getTime), "2015-11-20", 3), (new Timestamp(new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21").getTime), "2015-11-21", 4) )) val df = toDF(Spark.sparkSession)(rdd) describe("#dateFilter()") { it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range") { val df2 = dateFilter( new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-19"), new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-21"), "yyyy-MM-dd", "_2" )(df) assert(df2.count == 3) } it("should support filtering rows in an input DataFrame with a String timetamp column, based on a date range, specified using strings") { val df2 = dateFilter( "2015-11-19", "2015-11-20", "yyyy-MM-dd", "_2" )(df) assert(df2.count == 2) } it("should support filtering rows in an input DataFrame with a Timestamp timestamp column, based on a date range") { val df2 = dateFilter( new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-17"), new SimpleDateFormat("yyyy-MM-dd").parse("2015-11-18"), "_1" )(df) assert(df2.count == 1) } } describe("#parseDate()") { it("should facilitate converting a string timestamp column into a TimestampType and adding it as a new column") { val df2 = parseDate("_2", "new", "yyyy-MM-dd")(df) assert(df2.filter("new = _1").count == df.count) assert(df2.schema.size == df.schema.size+1) } } describe("#dateField()") { it("should facilitate extracting a single field from a Timestamp column, and placing it a new column") { val df2 = dateField("_1", "new", java.util.Calendar.YEAR)(df) assert(df2.filter("new = 2015").count == df.count) assert(df2.schema.size == df.schema.size+1) } } } }
Example 140
Source File: DateUtilsTest.scala From bigdata-examples with Apache License 2.0 | 5 votes |
package com.timeyang.common.util import java.text.SimpleDateFormat import java.util.Calendar import org.junit.Test @Test class DateUtilsTest { @Test def test(): Unit = { // scalastyle:off println(DateUtils.current()) val formatter = new SimpleDateFormat("yyyyMMddHH") println(formatter.format(System.currentTimeMillis())) // scalastyle:on } @Test def testTime(): Unit = { val calendar = Calendar.getInstance() val hour = calendar.get(Calendar.HOUR_OF_DAY) // scalastyle:off println println(hour) // scalastyle:on println calendar.add(Calendar.MILLISECOND, 60 * 60 * 1000) val hourOfNext = calendar.get(Calendar.HOUR_OF_DAY) print(hourOfNext) } }
Example 141
Source File: RequestDSL.scala From twitter4s with Apache License 2.0 | 5 votes |
package com.danielasfregola.twitter4s.helpers import java.text.SimpleDateFormat import java.util.Locale import akka.http.scaladsl.model._ import akka.http.scaladsl.model.headers.RawHeader import akka.testkit.TestProbe import com.danielasfregola.twitter4s.entities.RateLimit import org.specs2.specification.AfterEach import scala.concurrent.duration._ import scala.concurrent.{Await, Future} abstract class RequestDSL extends TestActorSystem with FixturesSupport with AfterEach { def after = system.terminate private val timeout = 10 seconds val headers = List(RawHeader("x-rate-limit-limit", "15"), RawHeader("x-rate-limit-remaining", "14"), RawHeader("x-rate-limit-reset", "1445181993")) val rateLimit = { val dateFormatter = new SimpleDateFormat("EEE MMM dd HH:mm:ss ZZZZ yyyy", Locale.ENGLISH) val resetDate = dateFormatter.parse("Sun Oct 18 15:26:33 +0000 2015").toInstant new RateLimit(limit = 15, remaining = 14, reset = resetDate) } protected val transport = TestProbe() def when[T](future: Future[T]): RequestMatcher[T] = new RequestMatcher(future) class RequestMatcher[T](future: Future[T]) { protected def responder = new Responder(future) def expectRequest(req: HttpRequest): Responder[T] = { transport.expectMsg(timeout, req) responder } def expectRequest(fn: HttpRequest => Unit) = { transport.expectMsgPF(timeout) { case req: HttpRequest => fn(req) } responder } } class Responder[T](future: Future[T]) { def respondWith(response: HttpResponse): Await[T] = { transport.reply(response) new Await(future) } def respondWith(resourcePath: String): Await[T] = respondWith(HttpResponse(StatusCodes.OK, entity = HttpEntity(MediaTypes.`application/json`, load(resourcePath)))) def respondWithRated(resourcePath: String): Await[T] = respondWith( HttpResponse(StatusCodes.OK, headers = headers, entity = HttpEntity(MediaTypes.`application/json`, load(resourcePath)))) def respondWithOk: Await[Unit] = { val response = HttpResponse(StatusCodes.OK, entity = HttpEntity(MediaTypes.`application/json`, """{"code": "OK"}""")) transport.reply(response) new Await(Future.successful((): Unit)) } } class Await[T](future: Future[T]) { private[helpers] val underlyingFuture = future def await(implicit duration: FiniteDuration = 20 seconds) = Await.result(future, duration) } implicit def awaitToReqMatcher[T](await: Await[T]) = new RequestMatcher(await.underlyingFuture) }
Example 142
Source File: NetflixPrizeUtils.scala From zen with Apache License 2.0 | 5 votes |
package com.github.cloudml.zen.examples.ml import java.text.SimpleDateFormat import java.util.{Locale, TimeZone} import breeze.linalg.{SparseVector => BSV} import org.apache.spark.SparkContext import org.apache.spark.mllib.linalg.{SparseVector => SSV} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import scala.collection.mutable.ArrayBuffer object NetflixPrizeUtils { def genSamplesWithTime( sc: SparkContext, input: String, numPartitions: Int = -1, newLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK): (RDD[(Long, LabeledPoint)], RDD[(Long, LabeledPoint)], Array[Long]) = { val probeFile = s"$input/probe.txt" val dataSetFile = s"$input/training_set val views = Array(maxUserId, maxMovieId + maxUserId, numFeatures).map(_.toLong) (trainSet, testSet, views) } }
Example 143
Source File: SparkStreamingAkkaTest.scala From apache-spark-test with Apache License 2.0 | 5 votes |
package com.github.dnvriend.spark.streaming.akka import java.text.SimpleDateFormat import akka.actor.Props import com.github.dnvriend.TestSpec import org.apache.spark.streaming.akka.{ ActorReceiver, AkkaUtils } import scala.concurrent.duration._ class CustomActor extends ActorReceiver { import context.dispatcher def ping() = context.system.scheduler.scheduleOnce(200.millis, self, "foo") def today: String = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS").format(new java.util.Date) def receive = counter(0) def counter(x: Long): Receive = { case _ => store(s"counter: $x, msg: Today is $today, have a nice day!") context.become(counter(x + 1)) ping() } ping() } class SparkStreamingAkkaTest extends TestSpec { it should "stream from an actor" in withStreamingContext() { spark => ssc => import spark.implicits._ val lines = AkkaUtils.createStream[String](ssc, Props[CustomActor](), "CustomReceiver") lines.foreachRDD { rdd => rdd.toDF.show(truncate = false) } ssc.start() advanceClockOneBatch(ssc) sleep() advanceClockOneBatch(ssc) sleep() advanceClockOneBatch(ssc) sleep() advanceClockOneBatch(ssc) sleep() advanceClockOneBatch(ssc) sleep() } }
Example 144
Source File: CreatePosts.scala From apache-spark-test with Apache License 2.0 | 5 votes |
package com.github.dnvriend import java.nio.file.Paths import java.nio.file.StandardOpenOption._ import java.text.SimpleDateFormat import java.util.Date import akka.actor.{ ActorSystem, Terminated } import akka.stream.scaladsl.{ FileIO, Source } import akka.stream.{ ActorMaterializer, Materializer } import akka.util.ByteString import play.api.libs.json.Json import scala.concurrent.{ ExecutionContext, Future } import scala.util.Random object CreatePosts extends App { implicit val system: ActorSystem = ActorSystem() implicit val mat: Materializer = ActorMaterializer() implicit val ec: ExecutionContext = system.dispatcher def terminate: Future[Terminated] = system.terminate() sys.addShutdownHook { terminate } object Post { implicit val format = Json.format[Post] } final case class Post( commentCount: Int, lastActivityDate: String, ownerUserId: Long, body: String, score: Int, creationDate: String, viewCount: Int, title: String, tags: String, answerCount: Int, acceptedAnswerId: Long, postTypeId: Long, id: Long ) def rng = Random.nextInt(20000) def now: String = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").format(new Date()) val lorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam fringilla magna et pharetra vestibulum." val title = " Ut id placerat sapien. Aliquam vel metus orci." Source.fromIterator(() => Iterator from 0).map { id => Post(rng, now, rng, List.fill(Random.nextInt(5))(lorem).mkString("\n"), rng, now, rng, s"$rng - $title", title, rng, rng, rng, id) }.map(Json.toJson(_).toString) .map(json => ByteString(json + "\n")) .take(1000000) .via(LogProgress.flow()) .runWith(FileIO.toPath(Paths.get("/tmp/posts.json"), Set(WRITE, TRUNCATE_EXISTING, CREATE))) .flatMap { done => println(done) terminate }.recoverWith { case cause: Throwable => cause.printStackTrace() terminate } }
Example 145
Source File: Implicits.scala From activemq-cli with Apache License 2.0 | 5 votes |
package activemq.cli.util import com.typesafe.config.Config import java.util.Date import java.util.Locale import java.text.SimpleDateFormat import javax.jms.Message import javax.jms.TextMessage import scala.collection.JavaConversions._ object Implicits { implicit class RichConfig(val underlying: Config) extends AnyVal { def getOptionalString(path: String): Option[String] = if (underlying.hasPath(path)) { Some(underlying.getString(path)) } else { None } } implicit def optionStringToBoolean(o: Option[String]): Boolean = { !o.getOrElse("").isEmpty } implicit class MessageImprovements(val message: Message) { val prettyPrinter = new scala.xml.PrettyPrinter(100000, 2) //scalastyle:ignore def toXML(timestampFormat: Option[String] = None): String = { val addOptional = (condition: Boolean, xml: scala.xml.Elem) ⇒ if (condition) xml else scala.xml.NodeSeq.Empty prettyPrinter.format(<jms-message> <header> <message-id>{ message.getJMSMessageID }</message-id> { addOptional(Option(message.getJMSCorrelationID).isDefined, <correlation-id>{ message.getJMSCorrelationID }</correlation-id>) } <delivery-mode>{ message.getJMSDeliveryMode }</delivery-mode> <destination>{ message.getJMSDestination }</destination> <expiration>{ message.getJMSExpiration }</expiration> <priority>{ message.getJMSPriority }</priority> <redelivered>{ message.getJMSRedelivered }</redelivered> { addOptional(Option(message.getJMSReplyTo).isDefined, <reply-to>{ message.getJMSReplyTo }</reply-to>) } <timestamp>{ timestampFormat match { case Some(matched)⇒ new SimpleDateFormat(matched).format(new Date(message.getJMSTimestamp)) case _⇒ message.getJMSTimestamp } }</timestamp> { addOptional(Option(message.getJMSType).isDefined, <type>{ message.getJMSType }</type>) } </header> { addOptional(message.getPropertyNames.hasMoreElements, <properties> { message.getPropertyNames.map(name ⇒ <property><name>{ name }</name><value>{ message.getStringProperty(name.toString) }</value></property>) } </properties>) } { message match { case textMessage: TextMessage if Option(textMessage.getText).isDefined ⇒ addOptional( textMessage.getText, <body>{ scala.xml.PCData(textMessage.getText.replaceAll("]]>", "]]]]><![CDATA[>")) }</body> ) case _⇒ scala.xml.NodeSeq.Empty } } </jms-message>) } def textMatches(regex: String): Boolean = { if (regex) { message match { case textMessage: TextMessage ⇒ (regex.r findFirstIn textMessage.getText) case _ ⇒ false } } else { true } } } }
Example 146
Source File: DateUtils.scala From sundial with MIT License | 5 votes |
package util import java.text.SimpleDateFormat import java.util.Date import java.util.concurrent.TimeUnit object DateUtils { val basicDateTimeFormat = new SimpleDateFormat("MMM d, H:mm z") def prettyRelativeTime(when: Date, now: Date): String = { s"${prettyDuration(when, now)} ago" } def prettyDuration(start: Date, end: Date): String = { val diff = end.getTime - start.getTime prettyDuration(diff, TimeUnit.MILLISECONDS) } def prettyDuration(amount: Long, unit: TimeUnit): String = { if (unit.toSeconds(amount) < 120) { s"${unit.toSeconds(amount)} seconds" } else if (unit.toMinutes(amount) < 180) { s"${unit.toMinutes(amount)} minutes" } else if (unit.toHours(amount) < 72) { s"${unit.toHours(amount)} hours" } else { s"${unit.toDays(amount)} days" } } }
Example 147
Source File: Util.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.utils import java.text.SimpleDateFormat import java.util.{Date, TimeZone} object Utils { def getCurrentDate = { val fmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'") fmt.setTimeZone(TimeZone.getTimeZone("UTC")) fmt.format(new Date()) } def getCurrentTs = { new Date().getTime } }
Example 148
Source File: StreamingProducer.scala From Scala-Programming-Projects with MIT License | 4 votes |
package coinyser import java.sql.Timestamp import java.text.SimpleDateFormat import java.util.TimeZone import cats.effect.IO import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.pusher.client.Client import com.pusher.client.channel.SubscriptionEventListener import com.typesafe.scalalogging.StrictLogging object StreamingProducer extends StrictLogging { def subscribe(pusher: Client)(onTradeReceived: String => Unit): IO[Unit] = for { _ <- IO(pusher.connect()) channel <- IO(pusher.subscribe("live_trades")) _ <- IO(channel.bind("trade", new SubscriptionEventListener() { override def onEvent(channel: String, event: String, data: String): Unit = { logger.info(s"Received event: $event with data: $data") onTradeReceived(data) } })) } yield () val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") // Very important: the storage must be in UTC sdf.setTimeZone(TimeZone.getTimeZone("UTC")) m.setDateFormat(sdf) } def deserializeWebsocketTransaction(s: String): WebsocketTransaction = mapper.readValue(s, classOf[WebsocketTransaction]) def convertWsTransaction(wsTx: WebsocketTransaction): Transaction = Transaction( timestamp = new Timestamp(wsTx.timestamp.toLong * 1000), tid = wsTx.id, price = wsTx.price, sell = wsTx.`type` == 1, amount = wsTx.amount) def serializeTransaction(tx: Transaction): String = mapper.writeValueAsString(tx) }