scala.util.matching.Regex Scala Examples
The following examples show how to use scala.util.matching.Regex.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: TemplatesEngine.scala From sbt-org-policies with Apache License 2.0 | 6 votes |
package sbtorgpolicies.templates import cats.syntax.either._ import sbtorgpolicies.io._ import sbtorgpolicies.exceptions._ import sbtorgpolicies.templates.utils._ import scala.util.matching.Regex class TemplatesEngine(fileReader: FileReader = FileReader, fileWriter: FileWriter = FileWriter) { def replaceFileContentsWith(inputPath: String, replacements: Replacements): IOResult[String] = fileReader.withFileContent(inputPath, replaceWith(_, replacements)) def replaceWith(content: String, replacements: Replacements): IOResult[String] = Either .catchNonFatal { replacements.foldLeft(content) { case (str, (key, replaceable)) => replacementPattern(key).replaceAllIn(str, escapeGroupRef(replaceable.asString)) } } .leftMap(e => IOException(s"Error replacing content", Some(e))) def insertIn( content: String, appendPosition: AppendPosition, section: String ): IOResult[String] = { def insertBetween(content: String, from: Regex, to: Regex, section: String): Option[String] = for { startMatch <- from.findFirstMatchIn(content) endContent = safeSubStr(content, startMatch.end) endMatch <- to.findFirstMatchIn(endContent) } yield content.substring(0, startMatch.start) + section + safeSubStr( endContent, endMatch.end ) def replaceSection( content: String, from: Regex, to: Regex, section: String, insertIfNotFound: Boolean, defaultTop: Boolean ): String = insertBetween(content, from, to, section) match { case Some(s) => s case _ if insertIfNotFound && defaultTop => section + "\n" + content case _ if insertIfNotFound => content + "\n" + section case _ => content } Either .catchNonFatal { appendPosition match { case AppendAtTheBeginning => section + "\n" + content case AppendAtTheEnd => content + "\n" + section case AppendAfter(regex) => regex.replaceFirstIn(content, "$0\n" + section) case ReplaceSection(from, to, insert, top) => replaceSection(content, from, to, section, insert, top) } } .leftMap(e => IOException(s"Error inserting content", Some(e))) } private[this] def replacementPattern(key: String): Regex = s"\\{\\{$key\\}\\}".r // The '$' char is interpreted by the `replaceAll` method as a substitution of a group // Since we're not using groups in the regex above, it'll fail. If we found a '$' we want literally that char private[this] def escapeGroupRef(str: String): String = { val first = str.headOption .map { case '$' => "\\$" case s => s.toString } .getOrElse("") val tail = str.sliding(2, 1).map(_.splitAt(1)).map { case (s, "$") if s != "\\" => "\\$" case (_, s) => s } first ++ tail.mkString("") } }
Example 2
Source File: InputValidator.scala From iotchain with MIT License | 5 votes |
package jbok.app.helper import jbok.core.models.Account import jbok.evm.solidity.ABIDescription.ParameterType import jbok.evm.solidity.{AddressType, BoolType, BytesNType, BytesType, IntType, SolidityParser, StringType, UIntType} import scala.util.matching.Regex object InputValidator { val number = "0123456789" val alpha = new Regex("[a-z]+") val hex = "0123456789abcdef" val ipv4Item = "([01]?\\d\\d?|2[0-4]\\d|25[0-5])" val ipv4 = new Regex(s"^$ipv4Item\\.$ipv4Item\\.$ipv4Item\\.$ipv4Item" + "$") private def getHexValue(data: String): String = data match { case d if d.startsWith("0x") => d.substring(2) case _ => data } def isValidAddress(address: String): Boolean = { val value = getHexValue(address) value.length == 40 && value.forall(hex.contains(_)) } def isValidNumber(n: String): Boolean = n.length > 0 && n.forall(number.contains(_)) def isValidValue(value: String, account: Option[Account]): Boolean = value.forall(number.contains(_)) && account.forall(_.balance.toN >= BigInt(value)) def isValidData(data: String): Boolean = { val value = getHexValue(data) value.length > 0 && value.length % 2 == 0 && value.forall(hex.contains(_)) } def isValidIPv4(data: String): Boolean = ipv4.pattern.matcher(data).matches def isValidPort(data: String): Boolean = data.nonEmpty && data.forall(number.contains(_)) && data.length <= 5 && data.toInt > 1000 && data.toInt < 65535 def isValidCode(data: String): Boolean = SolidityParser.parseSource(data).isSuccess def isValidBool(data: String): Boolean = data == "true" || data == "false" def isValidBytes(data: String, length: Option[Int] = None): Boolean = { val value = getHexValue(data) val dataValid = data.forall(hex.contains(_)) dataValid && length.map(value.length == _ * 2).getOrElse(value.length % 2 == 0) } def isValidString(data: String): Boolean = data.length >= 2 && data.startsWith("\"") && data.endsWith("\"") def getValidator(parameterType: ParameterType): Option[String => Boolean] = if (parameterType.arrayList.isEmpty) { parameterType.solidityType match { case AddressType() => Some(isValidAddress) case UIntType(_) | IntType(_) => Some(isValidNumber) case StringType() => Some(isValidString) case BoolType() => Some(isValidBool) case BytesType() => def isValidBytesNone(data: String) = isValidString(data) && isValidBytes(data.substring(1, data.length - 1)) Some(isValidBytesNone) case BytesNType(n) => def isValidBytesN(data: String) = isValidString(data) && isValidBytes(data.substring(1, data.length - 1), Some(n)) Some(isValidBytesN) case _ => None } } else { None } def getInputPrefixAndSuffix(parameterType: ParameterType): (String, String) = { val fix = "\"" val empty = "" if (parameterType.arrayList.isEmpty) { parameterType.solidityType match { case AddressType() => fix -> fix case _ => empty -> empty } } else { empty -> empty } } }
Example 3
Source File: ResourceFileGoldenCodecLaws.scala From circe-golden with Apache License 2.0 | 5 votes |
package io.circe.testing.golden import cats.instances.list._, cats.instances.try_._ import cats.syntax.apply._, cats.syntax.traverse._ import io.circe.{ Decoder, Encoder, Printer } import java.io.{ File, PrintWriter } import org.scalacheck.{ Arbitrary, Gen } import scala.reflect.runtime.universe.TypeTag import scala.util.{ Failure, Try } import scala.util.matching.Regex abstract class ResourceFileGoldenCodecLaws[A]( name: String, resourceRootDir: File, resourcePackage: List[String], val size: Int, count: Int, override protected val printer: Printer ) extends GoldenCodecLaws[A] with ExampleGeneration[A] { private[this] val resourceRootPath: String = "/" + resourcePackage.mkString("/") + "/" private[this] val resourceDir: File = resourcePackage.foldLeft(resourceRootDir) { case (acc, p) => new File(acc, p) } private[this] val GoldenFilePattern: Regex = "^-(.{44})\\.json$".r private[this] lazy val loadGoldenFiles: Try[List[(A, String)]] = Resources.open(resourceRootPath).flatMap { dirSource => val files = dirSource.getLines.flatMap { case fileName if fileName.startsWith(name) => fileName.drop(name.length) match { case GoldenFilePattern(seed) => Some((seed, fileName)) case _ => None } case _ => None }.toList.traverse[Try, (A, String)] { case (seed, name) => val contents = Resources.open(resourceRootPath + name).map { source => val lines = source.getLines.mkString("\n") source.close() lines } (getValueFromBase64Seed(seed), contents).tupled } dirSource.close() // Fail if we don't have either zero golden files or the required number. files.flatMap { values => if (values.size == 0 || values.size == count) files else Failure(new IllegalStateException(s"Expected 0 or $count golden files, got ${values.size}")) } } private[this] def generateGoldenFiles: Try[List[(A, String)]] = generateRandomGoldenExamples(count).traverse { case (seed, value, encoded) => Try { resourceDir.mkdirs() val file = new File(resourceDir, s"$name-${seed.toBase64}.json") val writer = new PrintWriter(file) writer.print(encoded) writer.close() (value, encoded) } } protected lazy val goldenExamples: Try[List[(A, String)]] = loadGoldenFiles.flatMap(fs => if (fs.isEmpty) generateGoldenFiles else loadGoldenFiles) } object ResourceFileGoldenCodecLaws { def apply[A]( name: String, resourceRootDir: File, resourcePackage: List[String], size: Int, count: Int, printer: Printer )(implicit decodeA: Decoder[A], encodeA: Encoder[A], arbitraryA: Arbitrary[A]): GoldenCodecLaws[A] = new ResourceFileGoldenCodecLaws[A](name, resourceRootDir, resourcePackage, size, count, printer) { val decode: Decoder[A] = decodeA val encode: Encoder[A] = encodeA val gen: Gen[A] = arbitraryA.arbitrary } def apply[A]( size: Int = 100, count: Int = 1, printer: Printer = Printer.spaces2 )( implicit decodeA: Decoder[A], encodeA: Encoder[A], arbitraryA: Arbitrary[A], typeTagA: TypeTag[A] ): GoldenCodecLaws[A] = apply[A](Resources.inferName[A], Resources.inferRootDir, Resources.inferPackage[A], size, count, printer) }
Example 4
Source File: Translator.scala From slick-jdbc-extension-scala with MIT License | 5 votes |
package com.github.tarao package slickjdbc package query import slick.jdbc.SQLActionBuilder trait Context { import java.lang.StackTraceElement def caller = Thread.currentThread.getStackTrace.reverse.takeWhile { trace => trace.getClassName != getClass.getName }.lastOption getOrElse new StackTraceElement("Unknown", "method", null, -1) } trait Translator { def apply(query: String, context: Context): String } object MarginStripper extends Translator { def apply(query: String, context: Context) = query.stripMargin } object CallerCommenter extends Translator { def apply(query: String, context: Context) = new SQLComment(context.caller).embedTo(query) } case class SQLComment(comment: Any) { import scala.util.matching.Regex def escaped = comment.toString.replaceAllLiterally("*/", """*\\/""") def embedTo(query: String) = query.replaceFirst(" ", Regex.quoteReplacement(s" ")) } object Translator extends Context { implicit val default: Iterable[Translator] = Seq(MarginStripper, CallerCommenter) def translate(query: String)(implicit translators: Iterable[Translator] ) = translators.foldLeft(query) { (q, translate) => translate(q, this) } def translateBuilder(builder: SQLActionBuilder)(implicit translators: Iterable[Translator] ): SQLActionBuilder = { val query = builder.queryParts.iterator.map(String.valueOf).mkString SQLActionBuilder(Seq(translate(query)(translators)), builder.unitPConv) } }
Example 5
Source File: MetricsConfig.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(conf: SparkConf) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var perInstanceSubProperties: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } private[this] def loadPropertiesFromFile(path: Option[String]): Unit = { var is: InputStream = null try { is = path match { case Some(f) => new FileInputStream(f) case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME) } if (is != null) { properties.load(is) } } catch { case e: Exception => val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME) logError(s"Error loading configuration file $file", e) } finally { if (is != null) { is.close() } } } }
Example 6
Source File: ConfigReader.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = m.group(1) val name = m.group(2) val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 7
Source File: FileReader.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.visualization.tensorboard import java.io.{BufferedInputStream} import java.nio.ByteBuffer import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.tensorflow.util.Event import scala.collection.mutable.ArrayBuffer import scala.util.matching.Regex private[bigdl] object FileReader { val fileNameRegex = """bigdl.tfevents.*""".r def readScalar(file: Path, tag: String, fs: FileSystem): Array[(Long, Float, Double)] = { require(fs.isFile(file), s"FileReader: ${file} should be a file") val bis = new BufferedInputStream(fs.open(file)) val longBuffer = new Array[Byte](8) val crcBuffer = new Array[Byte](4) val bf = new ArrayBuffer[(Long, Float, Double)] while (bis.read(longBuffer) > 0) { val l = ByteBuffer.wrap(longBuffer.reverse).getLong() bis.read(crcBuffer) // TODO: checksum // val crc1 = ByteBuffer.wrap(crcBuffer.reverse).getInt() val eventBuffer = new Array[Byte](l.toInt) bis.read(eventBuffer) val e = Event.parseFrom(eventBuffer) if (e.getSummary.getValueCount == 1 && tag.equals(e.getSummary.getValue(0).getTag())) { bf.append((e.getStep, e.getSummary.getValue(0).getSimpleValue, e.getWallTime)) } bis.read(crcBuffer) // val crc2 = ByteBuffer.wrap(crcBuffer.reverse).getInt() } bis.close() bf.toArray.sortWith(_._1 < _._1) } }
Example 8
Source File: UrlUtils.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs.wal.utils import java.net.{URI, URLDecoder} import scala.util.matching.Regex object UrlUtils { val pattern = new Regex("""(\\x[0-9A-Fa-f]{2}){3}""") val koreanPattern = new scala.util.matching.Regex("([가-힣]+[\\-_a-zA-Z 0-9]*)+|([\\-_a-zA-Z 0-9]+[가-힣]+)") // url extraction functions def urlDecode(url: String): (Boolean, String) = { try { val decoded = URLDecoder.decode(url, "UTF-8") (url != decoded, decoded) } catch { case e: Exception => (false, url) } } def hex2String(url: String): String = { pattern replaceAllIn(url, m => { new String(m.toString.replaceAll("[^0-9A-Fa-f]", "").sliding(2, 2).toArray.map(Integer.parseInt(_, 16).toByte), "utf-8") }) } def toDomains(url: String, maxDepth: Int = 3): Seq[String] = { val uri = new URI(url) val domain = uri.getHost if (domain == null) Nil else { val paths = uri.getPath.split("/") if (paths.isEmpty) Seq(domain) else { val depth = Math.min(maxDepth, paths.size) (1 to depth).map { ith => domain + paths.take(ith).mkString("/") } } } } def extract(_url: String): (String, Seq[String], Option[String]) = { try { val url = hex2String(_url) val (encoded, decodedUrl) = urlDecode(url) val kwdOpt = koreanPattern.findAllMatchIn(decodedUrl).toList.map(_.group(0)).headOption.map(_.replaceAll("\\s", "")) val domains = toDomains(url.replaceAll(" ", "")) (decodedUrl, domains, kwdOpt) } catch { case e: Exception => (_url, Nil, None) } } }
Example 9
Source File: ProduceError.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.test.matchers import org.scalatest.matchers.{MatchResult, Matcher} import scala.util.matching.Regex import scala.util.{Left, Right} object ProduceError { def produce(errorPattern: Regex): Matcher[Either[_, _]] = { case r @ Right(_) => MatchResult(matches = false, "expecting {0} to be Left and match: {1}", "got expected error", IndexedSeq(r, errorPattern)) case Left(l) => MatchResult( matches = errorPattern.findFirstIn(l.toString).isDefined, rawFailureMessage = "expecting {0} to match: {1}", rawNegatedFailureMessage = "got expected error", args = IndexedSeq(l, errorPattern) ) } def produce(errorMessage: String): Matcher[Either[_, _]] = { case r @ Right(_) => MatchResult(matches = false, "expecting Left(...{0}...) but got {1}", "got expected error", IndexedSeq(errorMessage, r)) case l @ Left(_) => MatchResult( matches = l.toString contains errorMessage, rawFailureMessage = "expecting Left(...{0}...) but got {1}", rawNegatedFailureMessage = "got expected error", args = IndexedSeq(errorMessage, l) ) } }
Example 10
Source File: XPath.scala From xtract with Apache License 2.0 | 5 votes |
package com.lucidchart.open.xtract import scala.util.matching.Regex import scala.xml.{Node, NodeSeq} sealed trait XPathNode extends Function[NodeSeq, NodeSeq] case class IdxXPathNode(idx: Int) extends XPathNode { def apply(xml: NodeSeq): NodeSeq = xml(idx) override def toString = s"[$idx]" } case class KeyXPathNode(key: String) extends XPathNode { def apply(xml: NodeSeq): NodeSeq = xml \ key override def toString = s"/$key" } case class RecursiveXPathNode(key: String) extends XPathNode { def apply(xml: NodeSeq): NodeSeq = xml \\ key override def toString = s"//$key" } case class RecursiveWildCardXPathNode(regex: Regex) extends XPathNode { def apply(xml: NodeSeq): NodeSeq = (xml \\ "_").filter(node => node.label.matches(regex.regex)) override def toString = s"//?$regex" } case class WildCardXPathNode(regex: Regex) extends XPathNode { def apply(xml: NodeSeq): NodeSeq = (xml \ "_").filter(node => node.label.matches(regex.regex)) override def toString = s"/?$regex" } case class AttributedXPathNode(attr: String, value: Option[String]) extends XPathNode { def apply(xml: NodeSeq): NodeSeq = xml.filter{ node => node.attribute(attr) match { case Some(attrValues) => { value.fold(true)(_ == attrValues.toString) } case None => false } } override def toString = { value match { case Some(v) => s"[@$attr=$v]" case None => s"[@$attr]" } } } object XPath extends XPath(Nil) { }
Example 11
Source File: patch.scala From sonar-scala with GNU Lesser General Public License v3.0 | 5 votes |
package com.mwz.sonar.scala package pr import scala.util.Try import scala.util.matching.Regex import cats.instances.either._ import cats.instances.list._ import cats.syntax.foldable._ final case class PatchLine(value: Int) { def inc: PatchLine = PatchLine(value + 1) } final case class FileLine(value: Int) { def inc: FileLine = FileLine(value + 1) } final case class Patch(fileLine: FileLine, patchLine: PatchLine, fileToPatch: Map[FileLine, PatchLine]) final case class PatchError(text: String) object Patch { private val PatchChunkStartRegex: Regex = new Regex("""@@ \-(\d+),(\d+) \+(\d+),(\d+) @@""", "origStart", "origSize", "newStart", "newSize") // TODO: Maybe it would be more practical if the mapping was reversed: PatchLine -> FileLine? def parse(patch: String): Either[PatchError, Map[FileLine, PatchLine]] = patch .replaceAll("(\r\n)|\r|\n|\\\\n", "\n") .linesIterator .toList .foldLeftM[Either[PatchError, ?], Patch](Patch(FileLine(0), PatchLine(0), Map.empty)) { case (patch, line) => line match { // Start of a hunk. case l if l.startsWith("@@") => // Parse the start of the hunk & get the starting line of the file. PatchChunkStartRegex .findFirstMatchIn(l) .flatMap(regexMatch => Try(regexMatch.group("newStart").toInt).toOption) .fold[Either[PatchError, Patch]](Left(PatchError(l)))(start => Right(Patch(FileLine(start), patch.patchLine.inc, patch.fileToPatch)) ) // Keep track of added and context (unmodified) lines. case l if l.startsWith("+") || l.startsWith(" ") => Right( Patch( patch.fileLine.inc, patch.patchLine.inc, patch.fileToPatch + (patch.fileLine -> patch.patchLine) ) ) // Skip removed and new lines. case _ => Right(Patch(patch.fileLine, patch.patchLine.inc, patch.fileToPatch)) } } .map(_.fileToPatch) .filterOrElse(_.nonEmpty, PatchError(patch)) }
Example 12
Source File: Measures.scala From sonar-scala with GNU Lesser General Public License v3.0 | 5 votes |
package com.mwz.sonar.scala package sensor import scala.annotation.tailrec import scala.util.matching.Regex import cats.instances.char._ import cats.kernel.Eq import cats.syntax.eq._ import scalariform.lexer.{Token, TokenType, Tokens} object Measures { implicit val tokenTypeEq = Eq.fromUniversalEquals[TokenType] val NewLineRegex: Regex = "(\r\n)|\r|\n".r def countClasses(tokens: List[Token]): Int = { tokens.foldLeft(0) { // scalastyle:ignore org.scalastyle.scalariform.NamedArgumentChecker case (acc, token) => val tokenType = token.tokenType if (tokenType === Tokens.CLASS || tokenType === Tokens.OBJECT) acc + 1 else acc } } def countMethods(tokens: List[Token]): Int = { tokens.foldLeft(0) { // scalastyle:ignore org.scalastyle.scalariform.NamedArgumentChecker case (acc, token) => if (token.tokenType === Tokens.DEF) acc + 1 else acc } } @tailrec def countCommentLines(tokens: List[Token], i: Int = 0): Int = { tokens match { case Nil => i case token :: tail if token.tokenType.isComment => token.tokenType match { case Tokens.LINE_COMMENT => countCommentLines(tail, i + 1) case Tokens.MULTILINE_COMMENT => countCommentLines(tail, i + token.rawText.count(_ === '\n') + 1) case Tokens.XML_COMMENT => new scala.NotImplementedError("XML ?!"); i case _ => i // Not a comment! } case _ :: tail => countCommentLines(tail, i) } } @tailrec def countNonCommentLines(tokens: List[Token], i: Int = 0): Int = { @tailrec def getNextLine(tokens: List[Token]): List[Token] = { tokens match { case Nil => Nil case token :: tail if token.tokenType === Tokens.WS && NewLineRegex.findFirstIn(token.text).nonEmpty => tail case token :: tail if token.tokenType === Tokens.LINE_COMMENT => tail case _ :: tail => getNextLine(tail) } } tokens match { case Nil => i case token :: tail if token.tokenType === Tokens.WS => countNonCommentLines(tail, i) case token :: _ if token.tokenType === Tokens.EOF => i case token :: tail => if (!token.tokenType.isNewline & !token.tokenType.isComment) countNonCommentLines(getNextLine(tail), i + 1) else countNonCommentLines(tail, i) } } }
Example 13
Source File: CommentInterceptor.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.entrance.interceptor.impl import java.lang import java.util.regex.Pattern import com.webank.wedatasphere.linkis.entrance.interceptor.EntranceInterceptor import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask import com.webank.wedatasphere.linkis.protocol.task.Task import org.slf4j.{Logger, LoggerFactory} import scala.util.matching.Regex " override def dealComment(code: String): String = { val p = Pattern.compile(scalaCommentPattern) p.matcher(code).replaceAll("$1") } } object CommentMain{ def main(args: Array[String]): Unit = { val sqlCode = "select * from default.user;--你好;show tables" val sqlCode1 = "select * from default.user--你好;show tables" println(SQLCommentHelper.dealComment(sqlCode)) } }
Example 14
Source File: _14_RegularExpressionPatternMatching.scala From LearningScala with Apache License 2.0 | 5 votes |
package _040_pattern_matching import scala.util.matching.Regex class AllInt(val f: (Int, Int) => Int) { private val regex: Regex = """\d+""".r def unapply(s: String): Option[Int] = { val items = regex.findAllIn(s) if (items.isEmpty) None else { Some(items.map(_.toInt).toList.reduce(f)) } } } object WordNumbers { def unapplySeq(x: String): Option[Seq[String]] = { val regex = """\d+""".r val items = regex.findAllIn(x) if (items.isEmpty) None else Some(items.toSeq) } } object _14_RegularExpressionPatternMatching extends App { println("AllInt Example:") val WordNumbers(n1) = "That person is 44 years old" println(s"n1: $n1") val WordNumbers(o1, o2) = "The score in yesterdays game was 100 to 33, it was a blowout!" println(s"o1: $o1, o2: $o2") val WordNumbers(m1, m2, rest@_*) = "The lottery numbers were 100, 33, 12, 19, 10, 6 it was a blowout!" println(s"m1: $m1, m2: $m2, rest: $rest") val string = "The lucky numbers are 100, 56, 44, 33, and 220" val wordNumberMatchResult = string match { case WordNumbers() => "No numbers" case WordNumbers(n) => "One number $n" case WordNumbers(n1, n2) => "Two numbers: $n1 and $n2" case WordNumbers(n1, n2, n3) => "Three numbers: $n1, $n2, $n3" case WordNumbers(n1, n2, n3, rest@_*) => s"First three numbers: $n1, $n2, $n3, and the rest is ${rest.mkString(",")}" } println(wordNumberMatchResult) println("\n=====================\nA Regular Pattern Expression Example:") val AmericanTelephoneNumberRegex: Regex = """1?\s*\((\d{3})\)\s*(\d{3})-(\d{4})""".r val UKTelephoneNumberRegex: Regex = """\+44\s*(d{2})\s*(d{4})\s*(d{4})""".r val telephoneResult = "1(505) 240-3023" match { case AmericanTelephoneNumberRegex(ac, pre, suf) => s"American(ac: $ac, pre: $pre, suf: $suf)" case UKTelephoneNumberRegex(ac, pre, suf) => s"UK(ac: $ac, pre: $pre, suf: $suf)" } println(s"telephoneResult ==> $telephoneResult") }
Example 15
Source File: DTVersions.scala From Converter with GNU General Public License v3.0 | 5 votes |
package org.scalablytyped.converter.internal package importer import java.net.URI import java.time.{Instant, ZonedDateTime} import ammonite.ops.%% import org.scalablytyped.converter.internal.ts.{CalculateLibraryVersion, PackageJsonDeps} import scala.util.matching.Regex import scala.util.{Success, Try} class DTVersions(lastChangedIndex: DTLastChangedIndex) extends CalculateLibraryVersion { val GitAtGithubDotCom: Regex = s"[email protected]:(.*)".r def uri(uriString: String): URI = uriString match { case GitAtGithubDotCom(path) => new URI(s"https://github.com/$path") case other => new URI(other) } override def apply( sourceFolder: InFolder, isStdLib: Boolean, packageJsonOpt: Option[PackageJsonDeps], comments: Comments, ): LibraryVersion = { implicit val wd = sourceFolder.path val libraryVersion = packageJsonOpt.flatMap(_.version) orElse DefinitelyTypedVersion.from(comments) val inGit: Option[InGit] = Try(uri((%% git ('remote, "get-url", 'origin)).out.string.trim)) match { case Success(constants.ConverterRepo) => None case Success(uri) => val lastModified = ZonedDateTime.ofInstant( Instant.ofEpochSecond(lastChangedIndex(sourceFolder.path.toIO)), constants.TimeZone, ) Some(InGit(uri, uri === constants.DefinitelyTypedRepo, lastModified)) case _ => None } LibraryVersion(isStdLib, libraryVersion, inGit) } private object DefinitelyTypedVersion { private val Version = "^\\/\\/ Type definitions for .+ ([a-zA-Z\\d][a-zA-Z\\d.\\-]*)$".r def from(comments: Comments): Option[String] = { val lines = comments.rawCs.flatMap(_.split("\n")) lines.collectFirst { case Version(v) if v.exists(_.isDigit) => v } } } }
Example 16
Source File: ProfessionInfo.scala From redrock with Apache License 2.0 | 5 votes |
package com.decahose import org.slf4j.LoggerFactory import scala.io.Source._ import scala.util.matching.Regex object ProfessionInfo { val logger = LoggerFactory.getLogger(this.getClass) val professions = loadProfessionsTable() def loadProfessionsTable(): Array[(Regex, String)] = { val professionsPath = ApplicationContext.Config.appConf.getString("homePath") + "/twitter-decahose/src/main/resources/Profession/professions.csv" val professionsList = fromFile(professionsPath)("ISO-8859-1") .getLines.drop(1).filter(line => line.trim().length > 0) .map(line => mapProfession(line)).toArray logger.info(s"Professions Loaded ==> ${professionsList.size}") return professionsList } // (RegexSubgroup, SuperGroup) def mapProfession(line: String): (Regex, String) = { val fields = line.trim().split(",") if (fields.size == 4) { val regStr = "\\b" + fields(0).trim() + "\\b" if (fields(3) == "1") { return (regStr.r, fields(1)) } return (("(?i)" + regStr).r, fields(1)) } return (("(?i)\\b" + fields(0).trim() + "\\b").r, fields(1)) } }
Example 17
Source File: model.scala From sbt-org-policies with Apache License 2.0 | 5 votes |
package sbtorgpolicies.templates import org.joda.time.DateTime import sbtorgpolicies.rules.ValidationFunction import sbtorgpolicies.templates.syntax._ import scala.util.matching.Regex trait Replaceable { def asString: String } case class ReplaceableT[T](t: T) extends Replaceable { override def asString: String = t.toString } case class ReplaceableList[T](list: List[T]) extends Replaceable { override def asString: String = list.map(elem => s"* ${elem.asReplaceable.asString}").mkString("\n") } case class FileType( mandatory: Boolean, overWritable: Boolean, finalVersionOnly: Boolean, templatePath: String, outputPath: String, replacements: Replacements, fileSections: List[FileSection] = Nil, validations: List[ValidationFunction] = Nil ) case class FileSection( appendPosition: AppendPosition, template: String, replacements: Replacements, shouldAppend: (String) => Boolean = _ => true ) sealed trait AppendPosition case object AppendAtTheBeginning extends AppendPosition case object AppendAtTheEnd extends AppendPosition case class AppendAfter(line: Regex) extends AppendPosition case class ReplaceSection( from: Regex, to: Regex, insertIfNotFound: Boolean = true, defaultTop: Boolean = true ) extends AppendPosition case class NewReleaseSection(date: DateTime, version: String, changes: String)
Example 18
Source File: ValidationFunctions.scala From sbt-org-policies with Apache License 2.0 | 5 votes |
package sbtorgpolicies.rules import cats.instances.list._ import cats.kernel.instances.unit._ import cats.syntax.foldable._ import cats.syntax.validated._ import sbtorgpolicies.exceptions.ValidationException import sbtorgpolicies.rules.syntax._ import scala.util.matching.Regex trait ValidationFunctions { def requiredStrings(list: List[String]): ValidationFunction = { def validateList(content: String, list: List[String])( validateString: (String) => ValidationResult ): ValidationResult = list.map(validateString).combineAll content: String => validateList(content, list) { string => if (content.contains(string)) ().valid else ValidationException(s"$string not found").invalidNel } } def requiredSection( startRegExp: Regex, endRegExp: Regex, validation: ValidationFunction ): ValidationFunction = { case class Section(started: Boolean = false, ended: Boolean = false, lines: List[String] = Nil) def matches(r: Regex, s: String): Boolean = r.findFirstIn(s).isDefined content: String => val sectionLines = content .split("\n") .foldLeft(Section()) { case (section, _) if section.ended => section case (section, l) if section.started && matches(endRegExp, l) => section.copy(ended = true) case (section, l) if section.started => section.copy(lines = section.lines :+ l) case (section, l) if matches(startRegExp, l) => section.copy(started = true, lines = List(l)) case (section, _) => section } if (sectionLines.lines.isEmpty) { ValidationException("Section not found").invalidNel } else { validation(sectionLines.lines.mkString("\n")) } } def validTravisFile( crossScalaVersions: Seq[String], scriptExpectedTasks: Seq[String], afterSuccessTasks: Seq[String] ): ValidationFunction = { def validateCrossScalaVersions(content: String): ValidationResult = { val travisCrossScalaVersion: List[String] = YamlOps.getFields(content, "scala").toList.sorted if (travisCrossScalaVersion == crossScalaVersions.sorted) ().valid else ValidationException( s".travis.yml is not valid, it doesn't contain all the " + s"cross scala versions for this project: $crossScalaVersions" ).invalidNel } def validateTasks( content: String, section: String, expectedTasks: Seq[String] ): ValidationResult = { val tasksInTravisFile: List[String] = YamlOps.getFields(content, section).toList if (expectedTasks.forall(expectedTsk => tasksInTravisFile.exists(_.contains(expectedTsk)))) ().valid else ValidationException( s".travis.yml is not valid, it doesn't contain all the " + s"expected tasks in the $section section: $expectedTasks" ).invalidNel } content: String => validateCrossScalaVersions(content) combine validateTasks(content, "script", scriptExpectedTasks) combine validateTasks(content, "after_success", afterSuccessTasks) } } object ValidationFunctions extends ValidationFunctions
Example 19
Source File: BazelBuildFile.scala From exodus with MIT License | 5 votes |
package com.wixpress.build.bazel import com.wixpress.build.maven.Exclusion import scala.util.matching.Regex import scala.util.matching.Regex.Match class BazelBuildFile(val content: String) { def ruleByName(name: String): Option[LibraryRule] = findTargetWithSameNameAs(name = name, within = content) .map(extractFullMatchText) .flatMap(parseTargetText(name)) def withTarget(rule: LibraryRule): BazelBuildFile = { findTargetWithSameNameAs(name = rule.name, within = content) match { case Some(matched) => replacedMatchedWithTarget(matched, rule) case None => appendTarget(rule) } } private def appendTarget(rule: LibraryRule) = { new BazelBuildFile( s"""$content | |${rule.serialized} |""".stripMargin) } private def replacedMatchedWithTarget(matched: Match, rule: LibraryRule): BazelBuildFile = { new BazelBuildFile(content.take(matched.start) + rule.serialized + content.drop(matched.end)) } private def findTargetWithSameNameAs(name: String, within: String) = { regexOfScalaLibraryRuleWithNameMatching(name).findFirstMatchIn(within) } private def extractFullMatchText(aMatch: Match) = aMatch.group(0) private def regexOfScalaLibraryRuleWithNameMatching(pattern: String) = (s"(?s)${LibraryRule.RuleType}" + """\(\s*?name\s*?=\s*?"""" + pattern +"""".*?\)""").r private def parseTargetText(ruleName:String)(ruleText: String): Option[LibraryRule] = { Some(LibraryRule( name = ruleName, sources = extractListByAttribute(SrcsFilter, ruleText), jars = extractListByAttribute(JarsFilter, ruleText), exports = extractListByAttribute(ExportsFilter, ruleText), runtimeDeps = extractListByAttribute(RunTimeDepsFilter, ruleText), compileTimeDeps = extractListByAttribute(CompileTimeDepsFilter, ruleText), exclusions = extractExclusions(ruleText))) } private def extractExclusions(ruleText: String) = { ExclusionsFilter .findAllMatchIn(ruleText) .map(m => Exclusion(m.group("groupId"), m.group("artifactId"))) .toSet } private def extractListByAttribute(filter: Regex, ruleText: String) = { val bracketsContentOrEmpty = filter.findFirstMatchIn(ruleText).map(_.group(BracketsContentGroup)).getOrElse("") listOfStringsFilter.findAllMatchIn(bracketsContentOrEmpty).map(_.group(StringsGroup)).toSet } private val BracketsContentGroup = "bracketsContent" private val ExportsFilter = """(?s)exports\s*?=\s*?\[(.+?)\]""".r(BracketsContentGroup) private val SrcsFilter = """(?s)srcs\s*?=\s*?\[(.+?)\]""".r(BracketsContentGroup) private val JarsFilter = """(?s)jars\s*?=\s*?\[(.+?)\]""".r(BracketsContentGroup) private val RunTimeDepsFilter = """(?s)runtime_deps\s*?=\s*?\[(.+?)\]""".r(BracketsContentGroup) private val CompileTimeDepsFilter = """(?s)\n\s*?deps\s*?=\s*?\[(.+?)\]""".r(BracketsContentGroup) private val ExclusionsFilter = "(?m)^\\s*#\\s*EXCLUDES\\s+(.*?):(.*?)\\s*$".r("groupId", "artifactId") private val StringsGroup = "Strings" private val listOfStringsFilter = """"(.+?)"""".r(StringsGroup) } object BazelBuildFile { def apply(content:String = ""):BazelBuildFile = new BazelBuildFile(content) val DefaultHeader: String = """licenses(["reciprocal"]) |package(default_visibility = ["//visibility:public"]) |""".stripMargin def persistTarget(ruleToPersist: RuleToPersist, localWorkspace: BazelLocalWorkspace) = { ruleToPersist.rule match { case rule: LibraryRule => val buildFileContent = localWorkspace.buildFileContent(ruleToPersist.ruleTargetLocator).getOrElse(BazelBuildFile.DefaultHeader) val buildFileBuilder = BazelBuildFile(buildFileContent).withTarget(rule) localWorkspace.overwriteBuildFile(ruleToPersist.ruleTargetLocator, buildFileBuilder.content) case _ => } } }
Example 20
Source File: imageFeatureExtractor.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import java.io.File import javax.imageio.ImageIO import scala.util.matching.Regex import imageUtils._ object imageFeatureExtractor { def processImages(imgs: List[String], resizeImgDim: Int = 128, nPixels: Int = -1): Map[Int, Vector[Int]] = { imgs.map(x => patt_get_jpg_name.findAllIn(x).mkString.toInt -> { val img0 = ImageIO.read(new File(x)) .makeSquare .resizeImg(resizeImgDim, resizeImgDim) // (128, 128) .image2gray if(nPixels != -1) img0.slice(0, nPixels) else img0 } ).filter( x => x._2 != ()) .toMap } }
Example 21
Source File: SqlBuilderSuiteBase.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources.sql import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types.DataType import org.scalatest.FunSuite import scala.util.matching.Regex trait SqlBuilderSuiteBase { self: FunSuite => val sqlBuilder: SqlBuilder // scalastyle:ignore def testExpressionToSql(sql: String)(expr: Expression): Unit = { val cleanSql = cleanUpSql(sql) test(s"expressionToSql: $cleanSql | with $expr") { assertResult(cleanSql)(sqlBuilder.expressionToSql(expr)) } } def testBuildSelect(sql: String) (i1: SqlLikeRelation, i2: Seq[String], i3: Seq[Filter]): Unit = { val cleanSql = cleanUpSql(sql) test(s"buildSelect: $cleanSql | with $i1 $i2 $i3") { assertResult(cleanSql)(sqlBuilder.buildSelect(i1, i2, i3)) } } def testLogicalPlan(sql: String)(plan: LogicalPlan): Unit = { val cleanSql = cleanUpSql(sql) test(s"logical plan: $cleanSql | with $plan") { assertResult(cleanSql)(sqlBuilder.logicalPlanToSql(plan)) } } def testLogicalPlanInternal(sql: String)(plan: LogicalPlan): Unit = { val cleanSql = cleanUpSql(sql) test(s"logical plan (internal): $cleanSql | with $plan") { assertResult(cleanSql)(sqlBuilder.internalLogicalPlanToSql(plan, noProject = true)) } } def testUnsupportedLogicalPlan(plan: LogicalPlan): Unit = { test(s"invalid logical plan: $plan") { intercept[RuntimeException] { sqlBuilder.logicalPlanToSql(plan) } } } private def cleanUpSql(q: String): String = q.replaceAll("\\s+", " ").trim def testUnsupportedLogicalPlanInternal(plan: LogicalPlan): Unit = { test(s"invalid logical plan (internal): $plan") { intercept[RuntimeException] { sqlBuilder.internalLogicalPlanToSql(plan) } } } def testGeneratedSqlDataType(expected: String)(dataType: DataType): Unit = { test(s"The generated sql type for ${dataType.simpleString} is $expected") { val generated = sqlBuilder.typeToSql(dataType) assertResult(expected)(generated) } } }
Example 22
Source File: TestUtils.scala From rug with GNU General Public License v3.0 | 5 votes |
package com.atomist.rug import com.atomist.param.{ParameterValues, SimpleParameterValues} import com.atomist.project.archive._ import com.atomist.project.edit.{ModificationAttempt, ProjectEditor, SuccessfulModification} import com.atomist.rug.ts.TypeScriptBuilder import com.atomist.source._ import com.atomist.source.file.ClassPathArtifactSource import org.scalatest.Matchers import scala.util.matching.Regex object RugArchiveReader { def apply(as: ArtifactSource): Rugs = SimpleRugResolver(as).resolvedDependencies.rugs } object SimpleRugResolver { def apply(as: ArtifactSource) : RugResolver = new ArchiveRugResolver(Dependency(as)) } object TestUtils extends Matchers { val atomistConfig: AtomistConfig = DefaultAtomistConfig def fileInPackage(caller: AnyRef, name: String, pathAbove: String = ""): Option[FileArtifact] = { resourcesInPackage(caller).allFiles.find(_.name == name).map(_.withPath(pathAbove + "/" + name)) } def requiredFileInPackage(caller: AnyRef, name: String, pathAbove: String = ""): FileArtifact = fileInPackage(caller, name, pathAbove).getOrElse( throw new IllegalArgumentException(s"Cannot find file [$name] in [${caller.getClass.getPackage.getName}]") ) def contentOf(caller: AnyRef, name: String): String = requiredFileInPackage(caller, name).content def rugsInSideFile(caller: AnyRef, names: String*): Rugs = { val as = rugsInSideFileAsArtifactSource(caller, names: _*) RugArchiveReader(as) } def rugsInSideFileAsArtifactSource(caller: AnyRef, names: String*): ArtifactSource = { val raw = resourcesInPackage(caller) val tsAs = raw.filter(_ => true, f => names.contains(f.name)) if (tsAs.empty) { fail(s"Can't load ANYTHING from resources named [${names.mkString(",")}] at class path resource in package [${caller.getClass.getPackage.getName}]") } val withAtomistDir = tsAs.edit(new FileEditor { override def canAffect(f: FileArtifact) = true // Put the editor in the .atomist directory so it's found override def edit(f: FileArtifact) = f.withPath(".atomist/editors/" + f.path) }) TypeScriptBuilder.compileWithModel(withAtomistDir) } }
Example 23
Source File: UrlInterpolation.scala From vat-api with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.support import scala.collection.mutable import scala.util.matching.Regex trait UrlInterpolation { def interpolated(path: Regex)(implicit urlPathVariables: mutable.Map[String, String]): String = { interpolated(path.regex) } def interpolated(path: String)(implicit urlPathVariables: mutable.Map[String, String]): String = { interpolate(interpolate(path, "sourceLocation"), "periodLocation") } private def interpolate(path: String, pathVariable: String)(implicit pathVariablesValues: mutable.Map[String, String]): String = { pathVariablesValues.get(pathVariable) match { case Some(variableValue) => path.replace(s"%$pathVariable%", variableValue) case None => path } } }
Example 24
Source File: TestRoutingRegistry.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.testing import io.udash._ import scala.collection.mutable import scala.util.matching.Regex class TestRoutingRegistry extends RoutingRegistry[TestState] { val classStatePattern: Regex = "/(\\w+)/(\\d+)".r var urlsHistory: mutable.ArrayBuffer[Url] = mutable.ArrayBuffer.empty var statesHistory: mutable.ArrayBuffer[TestState] = mutable.ArrayBuffer.empty override def matchUrl(url: Url): TestState = { urlsHistory.append(url) url.value match { case "/" => ObjectState case "/root" => RootState(None) case "/root" / v => RootState(Some(v.toInt)) case "/exception" => ThrowExceptionState case "/next" => NextObjectState case classStatePattern(arg: String, arg2: String) => ClassState(arg, Integer.parseInt(arg2)) case _ => ErrorState } } override def matchState(state: TestState): Url = { statesHistory.append(state) Url(state match { case ObjectState => "/" case RootState(None) => "/root" case RootState(Some(v)) => s"/root/$v" case ThrowExceptionState => "/exception" case NextObjectState => "/next" case ClassState(arg, arg2) => s"/$arg/$arg2" case _ => "" }) } }
Example 25
Source File: TranslationProvider.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.i18n import scala.concurrent.Future import scala.util.Try import scala.util.matching.Regex trait TranslationProvider { import TranslationProvider._ def translate(key: String, argv: Any*)(implicit lang: Lang): Future[Translated] protected def handleMixedPlaceholders(template: String): Unit protected def putArgs(template: String, argv: Any*): Translated = { val args = argv.map(_.toString).map(Regex.quoteReplacement).lift var prevN = -1 var indexed = false var unindexed = false val result = argRegex.replaceSomeIn(template, m => { val n = Try { val i = m.group(1).toInt indexed = true i } getOrElse { unindexed = true prevN + 1 } if (indexed && unindexed) handleMixedPlaceholders(template) prevN = n args(n) }) Translated(result) } } object TranslationProvider { val argRegex = """\{(\d*)\}""".r val indexedArgRegex = """\{(\d+)\}""".r val unindexedArgRegex = """\{\}""".r }
Example 26
Source File: CSVFile.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.randomprojections.file import scala.io.Source import scala.util.matching.Regex case class CSVFileOptions(sep: String = ",", hasHeader: Boolean = true, quote: Option[String] = None, onlyTopRecords: Option[Int] = None) class CSVFile private (_header: Option[Array[String]], source: Source, iter: Iterator[String], opt: CSVFileOptions) { def numColumns: Option[Int] = _header.map(_.length) def header: Option[Array[String]] = _header def processLine(line: String): Array[String] = { CSVFile.processLine(opt, line) //TODO: verify num columns } def getLines(): Iterator[Array[String]] = iter.map(line => processLine(line)) def close() = source.close() } object CSVFile { def processLine(opt: CSVFileOptions, line: String): Array[String] = { line.split(Regex.quote(opt.sep)) } def read(fileName: String, opt: CSVFileOptions): CSVFile = { val source = Source.fromFile(fileName) val linesIterator = source.getLines() val iterator = opt.onlyTopRecords match { case None => linesIterator case Some(n) => linesIterator.take(n + 1) //1 is for the header } val header = if (opt.hasHeader) Some(processLine(opt, iterator.next())) else None new CSVFile(header, source, iterator, opt) } }
Example 27
Source File: EnronEmail.scala From Mastering-Scala-Machine-Learning with MIT License | 5 votes |
package org.akozlov.chapter07 import scala.io.Source import scala.util.hashing.{MurmurHash3 => Hash} import scala.util.matching.Regex import java.util.{Date => javaDateTime} import java.io.File import net.liftweb.json._ import Extraction._ import Serialization.{read, write} object EnronEmail { val emailRe = """[a-zA-Z0-9_.+\-][email protected]""".r.unanchored def emails(s: String) = { for (email <- emailRe findAllIn s) yield email } def hash(s: String) = { java.lang.Integer.MAX_VALUE.toLong + Hash.stringHash(s) } val messageRe = """(?:Message-ID:\s+)(<[A-Za-z0-9_.+\-@]+>)(?s)(?:.*?)(?m) |(?:Date:\s+)(.*?)$(?:.*?) |(?:From:\s+)([a-zA-Z0-9_.+\-][email protected])(?:.*?) |(?:Subject: )(.*?)$""".stripMargin.r.unanchored case class Relation(from: String, fromId: Long, to: String, toId: Long, source: String, messageId: String, date: javaDateTime, subject: String) implicit val formats = Serialization.formats(NoTypeHints) def getFileTree(f: File): Stream[File] = f #:: (if (f.isDirectory) f.listFiles().toStream.flatMap(getFileTree) else Stream.empty) def main(args: Array[String]) { getFileTree(new File(args(0))).par.map { file => { "\\.$".r findFirstIn file.getName match { case Some(x) => try { val src = Source.fromFile(file, "us-ascii") val message = try src.mkString finally src.close() message match { case messageRe(messageId, date, from , subject) => val fromLower = from.toLowerCase for (to <- emails(message).filter(_ != fromLower).toList.distinct) println(write(Relation(fromLower, hash(fromLower), to, hash(to), file.toString, messageId, new javaDateTime(date), subject))) case _ => } } catch { case e: Exception => System.err.println(e) } case _ => } } } } }
Example 28
Source File: RegexTokenizerOp.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.bundle.ops.feature import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.OpModel import ml.combust.mleap.bundle.ops.MleapOp import ml.combust.mleap.core.feature.RegexTokenizerModel import ml.combust.mleap.runtime.MleapContext import ml.combust.mleap.runtime.transformer.feature.RegexTokenizer import scala.util.matching.Regex class RegexTokenizerOp extends MleapOp[RegexTokenizer, RegexTokenizerModel] { override val Model: OpModel[MleapContext, RegexTokenizerModel] = new OpModel[MleapContext, RegexTokenizerModel] { val RegexIdentifier = "regex" val MatchGapsIdentifier = "match_gaps" val MinTokenLengthIdentifer = "token_min_length" val LowercaseText = "lowercase_text" override val klazz: Class[RegexTokenizerModel] = classOf[RegexTokenizerModel] override def opName: String = Bundle.BuiltinOps.feature.regex_tokenizer override def store(model: Model, obj: RegexTokenizerModel) (implicit context: BundleContext[MleapContext]): Model = { model .withValue(RegexIdentifier, Value.string(obj.regex.toString())) .withValue(MatchGapsIdentifier, Value.boolean(obj.matchGaps)) .withValue(MinTokenLengthIdentifer, Value.int(obj.tokenMinLength)) .withValue(LowercaseText, Value.boolean(obj.lowercaseText)) } override def load(model: Model) (implicit context: BundleContext[MleapContext]): RegexTokenizerModel = { RegexTokenizerModel( regex = new Regex(model.value(RegexIdentifier).getString), matchGaps = model.value(MatchGapsIdentifier).getBoolean, tokenMinLength = model.value(MinTokenLengthIdentifer).getInt, lowercaseText = model.value(LowercaseText).getBoolean ) } } override def model(node: RegexTokenizer): RegexTokenizerModel = node.model }
Example 29
Source File: RegexIndexerModel.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.core.feature import ml.combust.mleap.core.Model import ml.combust.mleap.core.types.{ScalarType, StructType} import scala.util.matching.Regex case class RegexIndexerModel(lookup: Seq[(Regex, Int)], defaultIndex: Option[Int]) extends Model { def apply(value: String): Int = { lookup.find { case (r, _) => r.findFirstMatchIn(value).isDefined }.map(_._2).orElse(defaultIndex).get } override def inputSchema: StructType = StructType("input" -> ScalarType.String).get override def outputSchema: StructType = StructType("output" -> ScalarType.Int.nonNullable).get }
Example 30
Source File: AirSpecSbtRunner.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airspec.runner import sbt.testing.{Task, TaskDef} import wvlet.airspec.runner.AirSpecSbtRunner.AirSpecConfig import wvlet.log.{LogSupport, Logger} import scala.util.matching.Regex private[airspec] class AirSpecSbtRunner(config: AirSpecConfig, val remoteArgs: Array[String], classLoader: ClassLoader) extends sbt.testing.Runner { private lazy val taskLogger = new AirSpecLogger() override def args: Array[String] = config.args override def tasks(taskDefs: Array[TaskDef]): Array[Task] = { taskDefs .map(t => new AirSpecTask(config, taskLogger, t, classLoader)) } override def done(): String = { // sbt 1.3.x's layered class loader will not clean up LogHandlers // registered at java.util.logging, so we need to unregister all LogHandlers implementations that // use airframe's code before sbt detaches the class loader taskLogger.clearHandlers Logger.clearAllHandlers "" } // The following methods are defined for Scala.js support: def receiveMessage(msg: String): Option[String] = None def deserializeTask(task: String, deserializer: String => sbt.testing.TaskDef): sbt.testing.Task = { new AirSpecTask(config, taskLogger, deserializer(task), classLoader) } def serializeTask(task: sbt.testing.Task, serializer: sbt.testing.TaskDef => String): String = { serializer(task.taskDef()) } } private[airspec] object AirSpecSbtRunner extends LogSupport { def newRunner(args: Array[String], remoteArgs: Array[String], testClassLoader: ClassLoader): AirSpecSbtRunner = { new AirSpecSbtRunner(AirSpecConfig(args), remoteArgs, testClassLoader) } case class AirSpecConfig(args: Array[String]) { lazy val pattern: Option[Regex] = { // For now, we only support regex-based test name matcher using the first argument args.find(x => !x.startsWith("-")).flatMap { p => try { // Support wildcard (*) for convenience Some(s"(?i)${p.replaceAll("\\*", ".*")}".r) } catch { case e: Throwable => logger.warn(s"Invalid regular expression ${p}: ${e.getMessage}") None } } } } }
Example 31
Source File: Common.scala From Cortex with GNU Affero General Public License v3.0 | 5 votes |
import scala.util.matching.Regex import sbt.Keys._ import sbt._ object Common { val projectSettings = Seq( organizationName := "TheHive-Project", organization := "org.thehive-project", licenses += "AGPL-V3" → url("https://www.gnu.org/licenses/agpl-3.0.html"), organizationHomepage := Some(url("http://thehive-project.org/")), resolvers += Resolver.bintrayRepo("thehive-project", "maven"), resolvers += "elasticsearch-releases" at "https://artifacts.elastic.co/maven", scalaVersion := Dependencies.scalaVersion, scalacOptions ++= Seq( "-deprecation", // Emit warning and location for usages of deprecated APIs. "-feature", // Emit warning and location for usages of features that should be imported explicitly. "-unchecked", // Enable additional warnings where generated code depends on assumptions. //"-Xfatal-warnings", // Fail the compilation if there are any warnings. "-Xlint", // Enable recommended additional warnings. "-Ywarn-adapted-args", // Warn if an argument list is modified to match the receiver. "-Ywarn-dead-code", // Warn when dead code is identified. "-Ywarn-inaccessible", // Warn about inaccessible types in method signatures. "-Ywarn-nullary-override", // Warn when non-nullary overrides nullary, e.g. def foo() over def foo. "-Ywarn-numeric-widen" // Warn when numerics are widened. ), scalacOptions in Test ~= { options ⇒ options filterNot (_ == "-Ywarn-dead-code") // Allow dead code in tests (to support using mockito). }, parallelExecution in Test := false, fork in Test := true, javaOptions += "-Xmx1G", // Redirect logs from ElasticSearch (which uses log4j2) to slf4j libraryDependencies += "org.apache.logging.log4j" % "log4j-to-slf4j" % "2.9.1", excludeDependencies += "org.apache.logging.log4j" % "log4j-core" ) val stableVersion: Regex = "(\\d+\\.\\d+\\.\\d+)-(\\d+)".r val betaVersion: Regex = "(\\d+\\.\\d+\\.\\d+)-[Rr][Cc](\\d+)".r object snapshotVersion { def unapplySeq(version: String): Option[List[String]] = { if (version.endsWith("-SNAPSHOT")) { val v = version.dropRight(9) stableVersion.unapplySeq(v) orElse betaVersion.unapplySeq(v) } else None } } }
Example 32
Source File: PostcodeClient.scala From akka-http-test with Apache License 2.0 | 5 votes |
package com.github.dnvriend.component.webservices.postcode import akka.NotUsed import akka.actor.ActorSystem import akka.event.LoggingAdapter import akka.http.scaladsl.model.{ HttpRequest, HttpResponse } import akka.stream.Materializer import akka.stream.scaladsl.Flow import com.github.dnvriend.component.webservices.generic.HttpClient import spray.json.DefaultJsonProtocol import scala.concurrent.{ ExecutionContext, Future } import scala.util.Try import scala.util.matching.Regex case class Address( street: String, houseNumber: Int, houseNumberAddition: String, postcode: String, city: String, municipality: String, province: String, rdX: Option[Int], rdY: Option[Int], latitude: Double, longitude: Double, bagNumberDesignationId: String, bagAddressableObjectId: String, addressType: String, purposes: Option[List[String]], surfaceArea: Int, houseNumberAdditions: List[String] ) trait Marshallers extends DefaultJsonProtocol { implicit val addressJsonFormat = jsonFormat17(Address) } case class GetAddressRequest(zip: String, houseNumber: String) trait PostcodeClient { def address(postcode: String, houseNumber: Int): Future[Option[Address]] def address[T](implicit system: ActorSystem, mat: Materializer, ec: ExecutionContext): Flow[(GetAddressRequest, T), (Option[Address], T), NotUsed] } object PostcodeClient { import spray.json._ val ZipcodeWithoutSpacePattern: Regex = """([1-9][0-9]{3})([A-Za-z]{2})""".r val ZipcodeWithSpacePattern: Regex = """([1-9][0-9]{3})[\s]([A-Za-z]{2})""".r def mapToAddress(json: String)(implicit reader: JsonReader[Address]): Option[Address] = Try(json.parseJson.convertTo[Address]).toOption def responseToString(resp: HttpResponse)(implicit system: ActorSystem, mat: Materializer, ec: ExecutionContext): Future[String] = HttpClient.responseToString(resp) def getAddressRequestFlow[T]: Flow[(GetAddressRequest, T), (HttpRequest, T), NotUsed] = Flow[(GetAddressRequest, T)].map { case (request, id) => (HttpClient.mkGetRequest(s"/rest/addresses/${request.zip}/${request.houseNumber}/"), id) } def mapResponseToAddressFlow[T](implicit system: ActorSystem, mat: Materializer, ec: ExecutionContext, reader: JsonReader[Address]): Flow[(Try[HttpResponse], T), (Option[Address], T), NotUsed] = HttpClient.responseToString[T].map { case (json, id) => (mapToAddress(json), id) } def normalizeZipcode(zipcode: String): Option[String] = zipcode.toUpperCase match { case ZipcodeWithoutSpacePattern(numbers, letters) => Option(s"$numbers$letters") case ZipcodeWithSpacePattern(numbers, letters) => Option(s"$numbers$letters") case _ => None } def apply()(implicit system: ActorSystem, mat: Materializer, ec: ExecutionContext, log: LoggingAdapter) = new PostcodeClientImpl } class PostcodeClientImpl()(implicit val system: ActorSystem, val mat: Materializer, val ec: ExecutionContext, val log: LoggingAdapter) extends PostcodeClient with Marshallers { import PostcodeClient._ private val client = HttpClient("postcode") override def address(postcode: String, houseNumber: Int): Future[Option[Address]] = normalizeZipcode(postcode) match { case Some(zip) => client.get(s"/rest/addresses/$zip/$houseNumber/") .flatMap(responseToString).map(mapToAddress) case None => Future.successful(None) } override def address[T](implicit system: ActorSystem, mat: Materializer, ec: ExecutionContext): Flow[(GetAddressRequest, T), (Option[Address], T), NotUsed] = getAddressRequestFlow[T] .via(client.cachedHostConnectionFlow[T]) .via(mapResponseToAddressFlow[T]) }
Example 33
Source File: EnvironmentConfig.scala From perf_tester with Apache License 2.0 | 5 votes |
package org.perftester import ammonite.ops.Path import scala.util.matching.Regex case class EnvironmentConfig( // the name of the user running the test - used in results directory username: String = "", checkoutDir: Path = Path.home, testDir: Path = Path.home, outputDir: Path = Path.home, scalacBuildCache: Path = Path.home / "scalacBuildCache", iterations: Int = 30, processes: Int = 1, // the name of the test config to run config: String = "", configString: String = null, analyseOnly: Boolean = false, runWithDebug: Boolean = false, overwriteResults: Boolean = true, summaryFile: Option[Path] = None, summaryPhases: List[Regex] = List(".*".r), summaryPercent: List[Int] = ((100 until (0, -5)).toList ::: List(1)).distinct.sorted, summaryBaseline: Boolean = false)
Example 34
Source File: Commit.scala From sbt-autoversion with Apache License 2.0 | 5 votes |
package autoversion.model import sbtrelease.Version.Bump import scala.util.matching.Regex object Commit { def apply(commitLine: String): Commit = { def parts = commitLine.split(" ") Commit(parts(0), parts.drop(1).mkString(" ")) } } case class Commit(sha: String, msg: String) { def suggestedBump(majorRegexes: Seq[Regex], minorRegexes: Seq[Regex], bugfixRegexes: Seq[Regex]): Option[Bump] = { val majorSuggested = majorRegexes.exists(r => matches(r, msg)) if (majorSuggested) Some(Bump.Major) else { val minorSuggested = minorRegexes.exists(r => matches(r, msg)) if (minorSuggested) Some(Bump.Minor) else { val bugfixSuggested = bugfixRegexes.exists(r => matches(r, msg)) if (bugfixSuggested) Some(Bump.Bugfix) else None } } } private def matches(regex: Regex, s: String): Boolean = regex.pattern.matcher(s).matches }
Example 35
Source File: Plugin.scala From sbt-javacpp with MIT License | 5 votes |
package org.bytedeco.sbt.javacpp import scala.language.postfixOps import sbt._ import sbt.Keys._ import scala.util.Try import scala.util.matching.Regex object Plugin extends AutoPlugin { override def projectSettings: Seq[Setting[_]] = { import autoImport._ Seq( autoCompilerPlugins := true, javaCppPlatform := Platform.current, javaCppVersion := Versions.javaCppVersion, javaCppPresetLibs := Seq.empty, libraryDependencies += { "org.bytedeco" % "javacpp" % javaCppVersion.value jar }, javaCppPresetDependencies) } object Versions { val javaCppVersion = "1.5.3" } object autoImport { val javaCppPlatform = SettingKey[Seq[String]]("javaCppPlatform", """The platform that you want to compile for (defaults to the platform of the current computer). You can also set this via the "sbt.javacpp.platform" System Property """) val javaCppVersion = SettingKey[String]("javaCppVersion", s"Version of Java CPP that you want to use, defaults to ${Versions.javaCppVersion}") val javaCppPresetLibs = SettingKey[Seq[(String, String)]]("javaCppPresetLibs", "List of additional JavaCPP presets that you would wish to bind lazily, defaults to an empty list") } override def requires: Plugins = plugins.JvmPlugin override def trigger: PluginTrigger = allRequirements private def javaCppPresetDependencies: Def.Setting[Seq[ModuleID]] = { import autoImport._ libraryDependencies ++= { val (cppPresetVersion, groupId) = buildPresetVersion(javaCppVersion.value) javaCppPresetLibs.value.flatMap { case (libName, libVersion) => implicit class RegexOps(sc: StringContext) { def r = new Regex(sc.parts.mkString, sc.parts.tail.map(_ => "x"): _*) } val (libNamePrefix, libNamePostfix) = libName match { case r"([^-]+)$prefix(-.+)$postfix" => (prefix, postfix) case _ => (libName, "") } val generic = groupId % libNamePrefix % s"$libVersion-$cppPresetVersion" classifier "" val platformSpecific = javaCppPlatform.value.map { platform => groupId % libNamePrefix % s"$libVersion-$cppPresetVersion" classifier s"$platform$libNamePostfix" } generic +: platformSpecific } } } private def buildPresetVersion(version: String): (String, String) = version match { case VersionSplit(a :: b :: _) if a == 0 || (a == 1 && b <= 3) => (s"$a.$b", "org.bytedeco.javacpp-presets") case VersionSplit(1 :: 4 :: _) => (version, "org.bytedeco.javacpp-presets") case _ => (version, "org.bytedeco") } private object VersionSplit { def unapply(arg: String): Option[List[Int]] = Try(arg.split('.').map(_.toInt).toList).toOption } }
Example 36
Source File: ExasolConfiguration.scala From spark-exasol-connector with Apache License 2.0 | 5 votes |
package com.exasol.spark.util import java.net.InetAddress import scala.util.matching.Regex final case class ExasolConfiguration( host: String, port: Int, username: String, password: String, max_nodes: Int, create_table: Boolean, batch_size: Int ) object ExasolConfiguration { val IPv4_DIGITS: String = "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" val IPv4_REGEX: Regex = raw"""^$IPv4_DIGITS\.$IPv4_DIGITS\.$IPv4_DIGITS\.$IPv4_DIGITS$$""".r def getLocalHost(): String = InetAddress.getLocalHost.getHostAddress def checkHost(host: String): String = host match { case IPv4_REGEX(_*) => host case _ => throw new IllegalArgumentException( "The host value should be an ip address of the first Exasol data node!" ) } @SuppressWarnings( Array("org.wartremover.warts.Overloading", "org.danielnixon.extrawarts.StringOpsPartial") ) def apply(opts: Map[String, String]): ExasolConfiguration = ExasolConfiguration( host = checkHost(opts.getOrElse("host", getLocalHost())), port = opts.getOrElse("port", "8888").toInt, username = opts.getOrElse("username", "sys"), password = opts.getOrElse("password", "exasol"), max_nodes = opts.getOrElse("max_nodes", "200").toInt, create_table = opts.getOrElse("create_table", "false").toBoolean, batch_size = opts.getOrElse("batch_size", "1000").toInt ) }
Example 37
Source File: package.scala From scala-steward with Apache License 2.0 | 5 votes |
package org.scalasteward.core import cats.implicits._ import org.scalasteward.core.util.Change.Unchanged import org.scalasteward.core.util.{Change, Nel} import scala.collection.mutable import scala.util.matching.Regex package object edit { /\\s*scala-steward:off".r def flush(builder: StringBuilder, canReplace: Boolean): Unit = if (builder.nonEmpty) { buffer.append((builder.toString(), canReplace)) builder.clear() } target.linesWithSeparators.foreach { line => if (off.nonEmpty) if (line.contains("scala-steward:on")) { flush(off, false) on.append(line) } else off.append(line) else if (line.contains("scala-steward:off")) { flush(on, true) if (regexIgnoreMultiLinesBegins.findFirstIn(line).isDefined) off.append(line) else // single line off buffer.append((line, false)) } else on.append(line) } flush(on, true) flush(off, false) Nel.fromListUnsafe(buffer.toList) } }
Example 38
Source File: string.scala From scala-steward with Apache License 2.0 | 5 votes |
package org.scalasteward.core.util import cats.Foldable import cats.implicits._ import eu.timepit.refined.api.Refined import eu.timepit.refined.collection.MinSize import eu.timepit.refined.refineV import eu.timepit.refined.types.numeric.NonNegBigInt import org.scalasteward.core.util.Change.{Changed, Unchanged} import scala.util.Try import scala.util.matching.Regex import shapeless.Witness object string { type MinLengthString[N] = String Refined MinSize[N] def splitBetweenLowerAndUpperChars(s: String): List[String] = splitBetween2CharMatches("\\p{javaLowerCase}\\p{javaUpperCase}".r)(s) private def splitBetween2CharMatches(regex: Regex)(s: String): List[String] = { val bounds = regex.findAllIn(s).matchData.map(_.start + 1).toList val indices = 0 +: bounds :+ s.length indices.sliding(2).collect { case i1 :: i2 :: Nil => s.substring(i1, i2) }.toList } }
Example 39
Source File: AddScalaFixCompilerPlugin.scala From Soteria with MIT License | 5 votes |
package com.leobenkel.soteria.Transformations import com.leobenkel.soteria.SoteriaPluginKeys import sbt._ import scala.util.matching.Regex private[Transformations] trait AddScalaFixCompilerPlugin { private[Transformations] def shouldAddCompilerPlugin(scalaVersion: String): Boolean = { val pattern: Regex = "^2\\.1[012]\\..*$".r pattern.pattern.matcher(scalaVersion).find() } def getDefaultAddSemanticValue: Def.Initialize[Boolean] = { Def.settingDyn { val scalaVersion = Keys.scalaVersion.value Def.setting(shouldAddCompilerPlugin(scalaVersion)) } } def addScalaFixCompilerPlugin(): Def.Initialize[Seq[ModuleID]] = { Def.settingDyn { val soteriaAddSemantic = SoteriaPluginKeys.soteriaAddSemantic.value val libraries = Keys.libraryDependencies.value Def.setting { if (soteriaAddSemantic) { libraries :+ compilerPlugin(scalafix.sbt.ScalafixPlugin.autoImport.scalafixSemanticdb) } else { libraries } } } } }
Example 40
Source File: ScalaSig.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.internal.javadsl.api import scala.util.matching.Regex // https://github.com/retronym/scalac-survival-guide/blob/master/src/main/scala/guide/_19_ScalaSig.scala // Jason warned me it may not be robust, but it seems to work fine for the specific purpose we have (i.e., // checking if a top-level Class was created with Scala). object ScalaSig { private val ModuleClassName: Regex = """(.*)\$""".r private val ImplClassName: Regex = """(.*)\$class""".r def isScala(cls: Class[_]) = { import scala.reflect.ScalaLongSignature import scala.reflect.ScalaSignature def hasAnn(cls: Class[_]): Boolean = { val anns = List(classOf[ScalaSignature], classOf[ScalaLongSignature]) anns.exists(ann => cls.getDeclaredAnnotation(ann) != null) } def classForName(name: String, init: Boolean, loader: ClassLoader): Option[Class[_]] = try { Some(Class.forName(name, init, loader)) } catch { case _: ClassNotFoundException => None } def topLevelClass(cls: Class[_]): Class[_] = { if (cls.getEnclosingClass != null) topLevelClass(cls.getEnclosingClass) else { cls.getName match { case ModuleClassName(companionClassName) => classForName(companionClassName, init = false, cls.getClassLoader).getOrElse(cls) case ImplClassName(interfaceName) => classForName(interfaceName, init = false, cls.getClassLoader).getOrElse(cls) case _ => cls } } } hasAnn(topLevelClass(cls)) } }
Example 41
Source File: ConfigReader.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = m.group(1) val name = m.group(2) val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 42
Source File: ZeppelinRDisplay.scala From uberdata with Apache License 2.0 | 5 votes |
package org.apache.zeppelin.spark import org.apache.zeppelin.interpreter.InterpreterResult.Code import org.apache.zeppelin.interpreter.InterpreterResult.Code.{SUCCESS} import org.apache.zeppelin.interpreter.InterpreterResult.Type import org.apache.zeppelin.interpreter.InterpreterResult.Type.{TEXT, HTML, TABLE, IMG} import org.jsoup.Jsoup import org.jsoup.nodes.Element import org.jsoup.nodes.Document.OutputSettings import org.jsoup.safety.Whitelist import scala.collection.JavaConversions._ import scala.util.matching.Regex case class RDisplay(content: String, `type`: Type, code: Code) object ZeppelinRDisplay { val pattern = new Regex("""^ *\[\d*\] """) def render(html: String, imageWidth: String): RDisplay = { val document = Jsoup.parse(html) document.outputSettings().prettyPrint(false) val body = document.body() if (body.getElementsByTag("p").isEmpty) return RDisplay(body.html(), HTML, SUCCESS) val bodyHtml = body.html() if (! bodyHtml.contains("<img") && ! bodyHtml.contains("<script") && ! bodyHtml.contains("%html ") && ! bodyHtml.contains("%table ") && ! bodyHtml.contains("%img ") ) { return textDisplay(body) } if (bodyHtml.contains("%table")) { return tableDisplay(body) } if (bodyHtml.contains("%img")) { return imgDisplay(body) } return htmlDisplay(body, imageWidth) } private def textDisplay(body: Element): RDisplay = { // remove HTML tag while preserving whitespaces and newlines val text = Jsoup.clean(body.html(), "", Whitelist.none(), new OutputSettings().prettyPrint(false)) RDisplay(text, TEXT, SUCCESS) } private def tableDisplay(body: Element): RDisplay = { val p = body.getElementsByTag("p").first().html.replace("“%table " , "").replace("”", "") val r = (pattern findFirstIn p).getOrElse("") val table = p.replace(r, "").replace("\\t", "\t").replace("\\n", "\n") RDisplay(table, TABLE, SUCCESS) } private def imgDisplay(body: Element): RDisplay = { val p = body.getElementsByTag("p").first().html.replace("“%img " , "").replace("”", "") val r = (pattern findFirstIn p).getOrElse("") val img = p.replace(r, "") RDisplay(img, IMG, SUCCESS) } private def htmlDisplay(body: Element, imageWidth: String): RDisplay = { var div = new String() for (element <- body.children) { val eHtml = element.html() var eOuterHtml = element.outerHtml() eOuterHtml = eOuterHtml.replace("“%html " , "").replace("”", "") val r = (pattern findFirstIn eHtml).getOrElse("") div = div + eOuterHtml.replace(r, "") } val content = div .replaceAll("src=\"//", "src=\"http://") .replaceAll("href=\"//", "href=\"http://") body.html(content) for (image <- body.getElementsByTag("img")) { image.attr("width", imageWidth) } RDisplay(body.html, HTML, SUCCESS) } }
Example 43
Source File: EntityConstraints.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.extraction import com.typesafe.scalalogging.LazyLogging import org.clulab.odin.Mention import org.clulab.wm.eidos.utils.TagSet import scala.annotation.tailrec import scala.util.matching.Regex object EntityConstraints extends LazyLogging { val COORD_DEPS: Set[Regex] = Set("^conj_".r, "^cc".r) // Ensure final token of mention span is valid def validFinalTag(mention: Mention, tagSet: TagSet): Boolean = mention.tags.isEmpty || tagSet.isValidFinal(mention.tags.get.last) // Limit entity mentions to at most n tokens def withinMaxLength(mention: Mention, n: Int): Boolean = mention.words.size <= n // Check if brackets and braces match def matchingBrackets(mention: Mention): Boolean = matchingBrackets(mention.words) def matchingBrackets(words: Seq[String]): Boolean = TagSet.BRACKETS.forall(pair => matchingBrackets(words, pair._1, pair._2)) // Each of the brackets is on a different "channel" so that ([)] is valid. // Otherwise, a stack of outstanding unmatched brackets is required. def matchingBrackets(words: Seq[String], opening: String, closing: String): Boolean = { @tailrec def matchingBrackets(index: Int, extraOpening: Int): Boolean = { if (extraOpening < 0) false // too many closing without opening else if (index >= words.length) extraOpening == 0 // if it is just right else if (words(index) == opening) matchingBrackets(index + 1, extraOpening + 1) else if (words(index) == closing) matchingBrackets(index + 1, extraOpening - 1) else matchingBrackets(index + 1, extraOpening) } matchingBrackets(0, 0) } // Decide if the sentence element is a conjunction using just the POS tag def isConjunction(i: Int, mention: Mention, tagSet: TagSet): Boolean = if (i > 0 && tagSet.isAnyAdjective(mention.sentenceObj.tags.get(i - 1))) false else tagSet.isCoordinating(mention.sentenceObj.tags.get(i)) }
Example 44
Source File: Expander.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.expansion import ai.lum.common.ConfigUtils._ import com.typesafe.config.Config import org.clulab.odin.{Mention, State} import org.clulab.wm.eidos.utils.TagSet import scala.util.matching.Regex case class Dependencies(validIncoming: Set[Regex], invalidIncoming: Set[Regex], validOutgoing: Set[Regex], invalidOutgoing: Set[Regex]) trait Expander { def expand(ms: Seq[Mention], avoidState: State = new State()): Seq[Mention] } object Expander { def fromConfig(config: Config, tagSet: TagSet): Expander = { val expandType: String = config[String]("expansionType") // fixme expandType match { case "textbound" => TextBoundExpander.fromConfig(config) // todo: check about scoping with these nested configs case "argument" => ArgumentExpander.fromConfig(config, tagSet) case _ => ??? } } }
Example 45
Source File: MetricsConfig.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file setDefaultProperties(properties) // If spark.metrics.conf is not set, try to get file in class path val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse { try { Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)) } catch { case e: Exception => logError("Error loading default configuration file", e) None } } isOpt.foreach { is => try { properties.load(is) } finally { is.close() } } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { import scala.collection.JavaConversions._ val defaultProperty = propertyCategories(DEFAULT_PREFIX) for { (inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX) (k, v) <- defaultProperty if (prop.getProperty(k) == null) } { prop.setProperty(k, v) } } } def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] import scala.collection.JavaConversions._ prop.foreach { kv => if (regex.findPrefixOf(kv._1).isDefined) { val regex(prefix, suffix) = kv._1 subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } }
Example 46
Source File: TokenizeRule.scala From Adenium with Apache License 2.0 | 5 votes |
package com.adenium.parser.structs import scala.util.matching.Regex case class TokenizeRule(id: Int, sensorId: Long, sensorType: String, regEx: Regex ) object TokenizeRule { def apply(id: Int, sensorId: Long, sensorType: String, regex: String ) : TokenizeRule = { TokenizeRule( id, sensorId, sensorType, regex.r ) } def filter( agents: Array[Agent], rules: Option[Map[Long, Array[TokenizeRule]]]) : Array[ TokenizeRule] = { // agentTypeID de-dupulication : to be deleted later.. after DB refining val uniques = agents.groupBy( _.sensorId).values.map( _.head).toArray val ret = uniques.flatMap { agn => rules.flatMap{ rls => rls.get( agn.sensorId ) } }.flatten ret } }
Example 47
Source File: DDLParser.scala From redshift-fake-driver with Apache License 2.0 | 5 votes |
package jp.ne.opt.redshiftfake.parse import scala.language.implicitConversions import scala.util.matching.Regex class DDLParser extends BaseParser { private[this] val alterTableHandler = new AlterTableHandler private[this] val distStyleRegex = s"(?i)DISTSTYLE$space(EVEN|KEY|ALL)".r private[this] val distKeyRegex = s"(?i)DISTKEY$space\\($space$quotedIdentifier$space\\)".r private[this] val sortKeyRegex = { val columnsRegex = s"$space$quotedIdentifier($space,$space$quotedIdentifier)*$space" s"(?i)(COMPOUND|INTERLEAVED)?${space}SORTKEY$space\\($columnsRegex\\)".r } private[this] val encodeRegex = s"(?i)${space}ENCODE$space$identifier$space".r private[this] val identityRegex = s"(?i)(BIGINT|INT)${space}IDENTITY\\([0-9]*$space,$space[0-9]*\\)".r def sanitize(ddl: String): String = { var sanitized = Seq(distStyleRegex, distKeyRegex, sortKeyRegex, encodeRegex).foldLeft(ddl) { (current, regex) => regex.replaceAllIn(current, "") } sanitized = identityRegex.replaceAllIn(sanitized, "BIGSERIAL") if(alterTableHandler.matches(sanitized)) { val alterTableParsed = alterTableHandler.handle(sanitized) System.out.println("Alter table parsed " + alterTableParsed) alterTableParsed } //Replace default functions in create table statement else { val defaultRegex = "(?i)DEFAULT ([_a-zA-Z]\\w*(\\(\\))?)".r defaultRegex.findAllMatchIn(ddl).foldLeft(sanitized)((x: String, y: Regex.Match) => { val defaultValue = new DefaultParser().convert(y.group(1)) val (original, parsedDefaultValue) = defaultValue.get if (parsedDefaultValue.nonEmpty) { System.out.println("Sanitised function with default: " + x.replace(original, parsedDefaultValue.get)) x.replace(original, parsedDefaultValue.get) } else { x } }) } } }
Example 48
Source File: Scalac.scala From borer with Mozilla Public License 2.0 | 5 votes |
package io.bullet.borer import scala.reflect.macros.blackbox import scala.util.control.NonFatal import scala.util.matching.Regex object Scalac { sealed trait TypeCheck { def assertErrorMsgMatches(string: String): Unit def assertErrorMsgMatches(regex: Regex): Unit } object TypeCheck { final case class Result(code: String, tpe: String) extends TypeCheck { def assertErrorMsgMatches(string: String): Unit = assertErrorMsgMatches(null: Regex) def assertErrorMsgMatches(regex: Regex): Unit = sys.error(s"Code Fragment compiled without error to an expression of type `$tpe`:\n\n$code") } final case class Error(msg: String) extends TypeCheck { def assertErrorMsgMatches(string: String): Unit = assert(msg == string, string) def assertErrorMsgMatches(regex: Regex): Unit = assert(regex.findAllIn(msg).hasNext, regex) private def assert(value: Boolean, expected: Any): Unit = if (!value) sys.error(s"Expected compiler error matching [$expected] but got [$msg]") } } def typecheck(codeFragment: String): TypeCheck = macro Macro.typecheck private object Macro { def typecheck(c: blackbox.Context)(codeFragment: c.Tree): c.Tree = { import c.universe._ val fragment = codeFragment match { case Literal(Constant(x: String)) => x case _ => c.abort(c.enclosingPosition, "`codeFragment` argument must be a literal string") } try { val name0 = TermName(c.freshName) val name1 = TermName(c.freshName) c.typecheck(c.parse(s"object $name0 { val $name1 = { $fragment } }")) match { case ModuleDef(_, _, Template(_, _, List(_, valDef: ValDef, defDef: DefDef))) => val tpe = defDef.symbol.asMethod.returnType.toString q"_root_.io.bullet.borer.Scalac.TypeCheck.Result(${showCode(valDef.rhs)}, $tpe)" case x => c.abort(c.enclosingPosition, s"Unexpected scalac result:\n\n${showCode(x)}") } } catch { case NonFatal(e) => q"_root_.io.bullet.borer.Scalac.TypeCheck.Error(${e.getMessage})" } } } }
Example 49
Source File: index_jawiki.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import collection.JavaConversions._ import it.cnr.isti.hpc.io.reader.JsonRecordParser import it.cnr.isti.hpc.io.reader.RecordReader import it.cnr.isti.hpc.wikipedia.article.Article import it.cnr.isti.hpc.wikipedia.article.Link import it.cnr.isti.hpc.wikipedia.reader.filter.TypeFilter import org.nlp4l.core._ import org.nlp4l.core.analysis.Analyzer import scala.util.matching.Regex val index = "/tmp/index-jawiki" val schema = SchemaLoader.loadFile("examples/schema/jawiki.conf") val writer = IWriter(index, schema) def addDocument(id: Int, title: String, body: String, cat: List[String]): Unit = { writer.write(Document(Set( Field("id", id.toString), Field("title", title), Field("title_ja", title), Field("body", body), Field("ka_pair", body), Field("cat", cat), Field("cat_ja", cat) ))) } val reader = new RecordReader("/tmp/jawiki.json", new JsonRecordParser[Article](classOf[Article])).filter(TypeFilter.STD_FILTER) val ite = reader.iterator val pattern: Regex = """Category:(.+)""".r var id: Int = 0 ite.filterNot(_.getTitle().indexOf("曖昧さ回避") >= 0).foreach{a => id += 1 val title = a.getTitle() val body = a.getText() val cat = a.getCategories().map( _.getId() match { case pattern(a) => a case _ => null } ).filterNot(_ == null).toList addDocument(id, a.getTitle(), body, cat) } writer.close() // print end time new java.util.Date()
Example 50
Source File: trans_katakana_alpha.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import org.nlp4l.lm._ import scala.io._ import scala.util.matching.Regex import scalax.file.Path val index = "/tmp/index-transliteration" // remove index directory before creating it val p = Path(new File(index)) p.deleteRecursively() val indexer = new HmmModelIndexer(index) val file = Source.fromFile("train_data/alpha_katakana_aligned.txt", "UTF-8") val pattern: Regex = """([\u30A0-\u30FF]+)([a-zA-Z]+)(.*)""".r def align(result: List[(String, String)], str: String): List[(String, String)] = { str match { case pattern(a, b, c) => { align(result :+ (a, b), c) } case _ => { result } } } // create hmm model index file.getLines.foreach{ line: String => val doc = align(List.empty[(String, String)], line) indexer.addDocument(doc) } file.close indexer.close // read the model index val model = HmmModel(index) println("\n=== tokenizer test ===") val tokenizer = HmmTokenizer(model) tokenizer.tokens("アルゴリズム") tokenizer.tokens("プログラム") tokenizer.tokens("ケミカル") tokenizer.tokens("ダイニング") tokenizer.tokens("コミッター") tokenizer.tokens("エントリー")
Example 51
Source File: hmm_postagger.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.nio.file.FileSystems import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core._ import org.nlp4l.lm.{HmmTagger, HmmModel, HmmModelIndexer} import scala.io._ import scala.util.matching.Regex import scalax.file.Path import scalax.file.PathSet val index = "/tmp/index-brown-hmm" // delete existing Lucene index val p = Path(new File(index)) p.deleteRecursively() // create HMM model index val c: PathSet[Path] = Path("corpora", "brown", "brown").children() val indexer = HmmModelIndexer(index) c.filter{ e => val s = e.name val c = s.charAt(s.length - 1) c >= '0' && c <= '9' }.toList.sorted.foreach{ f => val source = Source.fromFile(f.path, "UTF-8") source.getLines().map(_.trim).filter(_.length > 0).foreach { g => val pairs = g.split("\\s+") val doc = pairs.map{h => h.split("/")}.filter{_.length==2}.map{i => (i(0).toLowerCase(), i(1))} indexer.addDocument(doc) } } indexer.close() // read the model index val model = HmmModel(index) println("\n=== tagger test ===") val tagger = HmmTagger(model) tagger.tokens("i like to go to france .") tagger.tokens("you executed lucene program .") tagger.tokens("nlp4l development members may be able to present better keywords .")
Example 52
Source File: LoanWordsProcessor.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
package org.nlp4l.syn import org.apache.lucene.search.spell.LuceneLevenshteinDistance import org.nlp4l.core.RawReader import org.nlp4l.framework.models._ import org.nlp4l.framework.processors.{Processor, ProcessorFactory, DictionaryAttributeFactory} import org.nlp4l.lm.{HmmTokenizer, HmmModel} import org.slf4j.LoggerFactory import scala.collection.mutable.ListBuffer import scala.util.matching.Regex class LoanWordsDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) { override def getInstance: DictionaryAttribute = { val list = Seq[CellAttribute]( CellAttribute("word", CellType.StringType, true, true), CellAttribute("synonym", CellType.StringType, false, true) ) new DictionaryAttribute("loanWords", list) } } class LoanWordsProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) { val DEF_THRESHOLD = 0.8F val DEF_MIN_DOCFREQ = 3 override def getInstance: Processor = { val index = getStrParamRequired("index") val field = getStrParamRequired("field") val modelIndex = getStrParamRequired("modelIndex") val threshold = getFloatParam("threshold", DEF_THRESHOLD) val minDocFreq = getIntParam("minDocFreq", DEF_MIN_DOCFREQ) new LoanWordsProcessor(index, field, modelIndex, threshold, minDocFreq) } } class LoanWordsProcessor(val index: String, val field: String, val modelIndex: String, val threshold: Float, val minDocFreq: Int) extends Processor { override def execute(data: Option[Dictionary]): Option[Dictionary] = { val logger = LoggerFactory.getLogger(this.getClass) val reader = RawReader(index) val trModel = new TransliterationModelIndex(modelIndex) val pattern: Regex = """([a-z]+) ([\u30A0-\u30FF]+)""".r val lld = new LuceneLevenshteinDistance() val records = ListBuffer.empty[Record] try{ var progress = 0 val fi = reader.field(field) fi match { case Some(f) => { val len = f.uniqTerms f.terms.foreach { t => progress = progress + 1 if((progress % 10000) == 0){ val percent = ((progress.toFloat / len) * 100).toInt logger.info(s"$percent % done ($progress / $len) term is ${t.text}") } if (t.docFreq >= minDocFreq) { t.text match { case pattern(a, b) => { val predWord = trModel.predict(b) if (lld.getDistance(a, predWord) > threshold) { records += Record(Seq(Cell("word", a), Cell("synonym", b))) } } case _ => {} } } } Some(Dictionary(records)) } case _ => throw new RuntimeException(s"""field "$field" you specified in conf file doesn't exist in the index "$index""") } } finally{ if(reader != null) reader.close } } } class TransliterationModelIndex(index: String){ private val model = HmmModel(index) private val tokenizer = HmmTokenizer(model) def predict(katakana: String): String = { tokenizer.tokens(katakana).map(_.cls).mkString } }
Example 53
Source File: HttpVerb.scala From http-verbs with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.http import java.net.{ConnectException, URL} import java.util.concurrent.TimeoutException import com.typesafe.config.Config import scala.collection.JavaConverters.iterableAsScalaIterableConverter import scala.concurrent.{ExecutionContext, Future} import scala.util.matching.Regex trait HttpVerb extends Request { protected def configuration: Option[Config] def mapErrors(httpMethod: String, url: String, f: Future[HttpResponse])( implicit ec: ExecutionContext): Future[HttpResponse] = f.recoverWith { case e: TimeoutException => Future.failed(new GatewayTimeoutException(gatewayTimeoutMessage(httpMethod, url, e))) case e: ConnectException => Future.failed(new BadGatewayException(badGatewayMessage(httpMethod, url, e))) } def badGatewayMessage(verbName: String, url: String, e: Exception): String = s"$verbName of '$url' failed. Caused by: '${e.getMessage}'" def gatewayTimeoutMessage(verbName: String, url: String, e: Exception): String = s"$verbName of '$url' timed out with message '${e.getMessage}'" lazy val internalHostPatterns: Seq[Regex] = configuration match { case Some(config) if config.hasPathOrNull("internalServiceHostPatterns") => config.getStringList("internalServiceHostPatterns").asScala.map(_.r).toSeq case _ => Seq("^.*\\.service$".r, "^.*\\.mdtp$".r) } lazy val userAgentHeader: Seq[(String, String)] = configuration match { case Some(config) if config.hasPathOrNull("appName") => Seq("User-Agent" -> config.getString("appName")) case _ => Seq.empty } override def applicableHeaders(url: String)(implicit hc: HeaderCarrier): Seq[(String, String)] = { val headers = if (internalHostPatterns.exists(_.pattern.matcher(new URL(url).getHost).matches())) { hc.headers } else { hc.headers.filterNot(hc.otherHeaders.contains(_)) } headers ++ userAgentHeader } }
Example 54
Source File: package.scala From sbt-header with Apache License 2.0 | 5 votes |
package de.heikoseeberger import java.io.File import scala.util.matching.Regex package object sbtheader { type Traversable[+A] = scala.collection.immutable.Traversable[A] type Iterable[+A] = scala.collection.immutable.Iterable[A] type Seq[+A] = scala.collection.immutable.Seq[A] type IndexedSeq[+A] = scala.collection.immutable.IndexedSeq[A] final object FileOps { val extensionPattern: Regex = """.+\.(.+)""".r } final implicit class FileOps(val file: File) extends AnyVal { def extension: Option[String] = file.getName match { case FileOps.extensionPattern(ext) => Some(ext) case _ => None } } val newLine: String = System.lineSeparator() }
Example 55
Source File: LoggingSpec.scala From cluster-broccoli with Apache License 2.0 | 5 votes |
package de.frosner.broccoli import org.mockito.{ArgumentCaptor, Matchers} import org.scalacheck.Gen import org.specs2.ScalaCheck import org.specs2.mock.Mockito import org.specs2.mutable.Specification import scala.util.matching.Regex class LoggingSpec extends Specification with Mockito with ScalaCheck { import logging._ trait F[T] { def body(): T def log(message: String): Unit } "logging the execution time" should { "execute the block just once" in { val f = mock[F[Unit]] logExecutionTime("foo") { f.body() }(Function.const(())) there was one(f).body() there was no(f).log(Matchers.any[String]()) } "invokes the log function" in prop { label: String => val f = mock[F[Int]] logExecutionTime(label) { 42 }(f.log(_)) val message = ArgumentCaptor.forClass(classOf[String]) there was one(f).log(message.capture()) message.getValue must beMatching(s"${Regex.quote(label)} took \\d+ ms") there was no(f).body() }.setGen(Gen.identifier.label("label")) "returns the result of the body" in prop { ret: Int => logExecutionTime("foo") { ret }(Function.const(())) === ret } } }
Example 56
Source File: BaseFiltering.scala From scala-commons with MIT License | 5 votes |
package com.avsystem.commons package mongo.core.ops import java.util.regex.Pattern import com.avsystem.commons.mongo.text.TextSearchLanguage import com.google.common.collect.ImmutableList import com.mongodb.client.model.geojson.{Geometry, Point} import com.mongodb.client.model.{Filters, TextSearchOptions} import org.bson.BsonType import org.bson.conversions.Bson import scala.util.matching.Regex trait BaseFiltering[T] extends Any with KeyValueHandling[T] { def equal(t: T): Bson = use(t)(Filters.eq) def notEqual(t: T): Bson = use(t)(Filters.ne) def gt(t: T): Bson = use(t)(Filters.gt) def lt(t: T): Bson = use(t)(Filters.lt) def gte(t: T): Bson = use(t)(Filters.gte) def lte(t: T): Bson = use(t)(Filters.lte) def in(ts: T*): Bson = Filters.in(key, ts.map(encode).asJava) def nin(ts: T*): Bson = Filters.nin(key, ts.map(encode).asJava) def exists(exists: Boolean = true): Bson = Filters.exists(key, exists) def ofType(bsonType: BsonType): Bson = Filters.`type`(key, bsonType) def ofType(typeName: String): Bson = Filters.`type`(key, typeName) def mod(divisor: Long, remainder: Long): Bson = Filters.mod(key, divisor, remainder) def regex(re: Regex): Bson = regex(re.pattern) def regex(pattern: Pattern): Bson = Filters.regex(key, pattern) def regex(patternStr: String): Bson = Filters.regex(key, patternStr) def regex(patternStr: String, options: String): Bson = Filters.regex(key, patternStr, options) def text(str: String, caseSensitive: OptArg[Boolean] = OptArg.Empty, language: OptArg[TextSearchLanguage] = OptArg.Empty, diacriticSensitive: OptArg[Boolean] = OptArg.Empty): Bson = { val searchOptions = new TextSearchOptions().setup { options => caseSensitive.foreach(b => options.caseSensitive(b)) language.foreach(l => options.language(l.code)) diacriticSensitive.foreach(b => options.diacriticSensitive(b)) } Filters.text(str, searchOptions) } def bitsAllClear(bitMask: Long): Bson = Filters.bitsAllClear(key, bitMask) def bitsAllSet(bitMask: Long): Bson = Filters.bitsAllSet(key, bitMask) def bitsAnyClear(bitMask: Long): Bson = Filters.bitsAnyClear(key, bitMask) def bitsAnySet(bitMask: Long): Bson = Filters.bitsAnySet(key, bitMask) def geoWithinBson(geometryBson: Bson): Bson = Filters.geoWithin(key, geometryBson) def geoWithin(geometry: Geometry): Bson = Filters.geoWithin(key, geometry) def geoWithinBox(lowerLeftX: Double, lowerLeftY: Double, upperRightX: Double, upperRightY: Double): Bson = { Filters.geoWithinBox(key, lowerLeftX, lowerLeftY, upperRightX, upperRightY) } def geoWithinPolygon(points: (Double, Double)*): Bson = { val javaPoints = points.map { case (x, y) => ImmutableList.of(x: JDouble, y: JDouble): JList[JDouble] }.asJava Filters.geoWithinPolygon(key, javaPoints) } def geoWithinCenter(x: Double, y: Double, radius: Double): Bson = Filters.geoWithinCenter(key, x, y, radius) def geoWithinCenterSphere(x: Double, y: Double, radius: Double): Bson = Filters.geoWithinCenterSphere(key, x, y, radius) def geoIntersectsBson(geometryBson: Bson): Bson = Filters.geoIntersects(key, geometryBson) def geoIntersects(geometry: Geometry): Bson = Filters.geoIntersects(key, geometry) private def jDouble(doubleOpt: Opt[Double]): JDouble = doubleOpt.map(d => d: JDouble).orNull private def useMinMax(min: Opt[Double], max: Opt[Double])(f: (JDouble, JDouble) => Bson): Bson = { f(jDouble(min), jDouble(max)) } def nearBson(geometryBson: Bson, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = { useMinMax(minDistance, maxDistance)(Filters.near(key, geometryBson, _, _)) } def nearPoint(point: Point, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = { useMinMax(minDistance, maxDistance)(Filters.near(key, point, _, _)) } def nearXY(x: Double, y: Double, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = { useMinMax(minDistance, maxDistance)(Filters.near(key, x, y, _, _)) } def nearSphereBson(geometryBson: Bson, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = { useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, geometryBson, _, _)) } def nearSpherePoint(point: Point, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = { useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, point, _, _)) } def nearSphereXY(x: Double, y: Double, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = { useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, x, y, _, _)) } }
Example 57
Source File: GremlinParser.scala From vm with GNU Affero General Public License v3.0 | 5 votes |
package org.mmadt.language.gremlin import org.mmadt.VmException import org.mmadt.language.LanguageException import org.mmadt.language.obj._ import org.mmadt.language.obj.`type`.__ import org.mmadt.storage.StorageFactory._ import scala.util.matching.Regex import scala.util.parsing.combinator.JavaTokenParsers class GremlinParser extends JavaTokenParsers { override val whiteSpace: Regex = """[\s\n]+""".r override def decimalNumber: Parser[String] = """-?\d+\.\d+""".r // all mm-ADT languages must be able to accept a string representation of an expression in the language and return an Obj private def parse[O <: Obj](input: String): O = { this.parseAll(expr, input.trim) match { case Success(result, _) => (result `,`).asInstanceOf[O] case NoSuccess(y) => throw LanguageException.parseError( y._1, y._2.source.toString, y._2.pos.line.asInstanceOf[java.lang.Integer], y._2.pos.column.asInstanceOf[java.lang.Integer]) } } lazy val expr: Parser[Obj] = rep1sep(step, opt(".")) ^^ (x => { x.flatten.foldLeft[Obj](new __())((a, b) => b.exec(a)) }) lazy val aobj: Parser[Obj] = astr | abool | aint | astr lazy val abool: Parser[Bool] = ("true" | "false") ^^ (x => bool(x.equals("true"))) lazy val aint: Parser[Int] = wholeNumber ^^ (x => int(x.toLong)) lazy val astr: Parser[Str] = """'([^'\x00-\x1F\x7F\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*'""".r ^^ (x => str(x.subSequence(1, x.length - 1).toString)) lazy val step: Parser[List[Inst[Obj, Obj]]] = "[a-zA-Z]+".r ~ ("(" ~> repsep(aobj, ",") <~ ")") ^^ (x => TraversalMonoid.resolve(x._1, x._2)) } object GremlinParser { def parse[O <: Obj](script: String): O = try { new GremlinParser().parse[O](script) } catch { case e: VmException => throw e case e: Exception => { e.printStackTrace() throw new LanguageException(e.getMessage) } } }
Example 58
Source File: Facets1.scala From incubator-daffodil with Apache License 2.0 | 5 votes |
package org.apache.daffodil.dsom import org.apache.daffodil.util.Enum import scala.util.matching.Regex object Facet extends Enum { sealed abstract trait Type extends EnumValueType case object enumeration extends Type case object fractionDigits extends Type case object maxExclusive extends Type case object maxInclusive extends Type case object maxLength extends Type case object minExclusive extends Type case object minInclusive extends Type case object minLength extends Type case object pattern extends Type case object totalDigits extends Type case object whiteSpace extends Type } object FacetTypes { type Values = String type ValuesR = Regex type FacetValue = (Facet.Type, Values) type FacetValueR = (Facet.Type, ValuesR) type ElemFacets = Seq[FacetValue] type ElemFacetsR = Seq[FacetValueR] }
Example 59
Source File: MetricDataGenerator.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends import com.expedia.metrics.MetricData import com.expedia.open.tracing.Span import com.expedia.www.haystack.commons.entities.encoders.Encoder import com.expedia.www.haystack.commons.metrics.MetricsSupport import com.expedia.www.haystack.trends.transformer.MetricDataTransformer import scala.util.matching.Regex trait MetricDataGenerator extends MetricsSupport { private val SpanValidationErrors = metricRegistry.meter("span.validation.failure") private val BlackListedSpans = metricRegistry.meter("span.validation.black.listed") private val metricPointGenerationTimer = metricRegistry.timer("metricpoint.generation.time") def isValidSpan(span: Span, blackListedServices: List[Regex]): Boolean = { if (span.getServiceName.isEmpty || span.getOperationName.isEmpty) { SpanValidationErrors.mark() return false } val isBlacklisted = blackListedServices.exists { regexp => regexp.pattern.matcher(span.getServiceName).find() } if (isBlacklisted) BlackListedSpans.mark() !isBlacklisted } }
Example 60
Source File: AppConfiguration.scala From haystack-trends with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trends.config import java.util.Properties import com.expedia.www.haystack.commons.config.ConfigurationLoader import com.expedia.www.haystack.commons.entities.encoders.EncoderFactory import com.expedia.www.haystack.trends.config.entities.{KafkaConfiguration, TransformerConfiguration} import com.typesafe.config.Config import org.apache.kafka.streams.StreamsConfig import org.apache.kafka.streams.Topology.AutoOffsetReset import org.apache.kafka.streams.processor.TimestampExtractor import scala.collection.JavaConverters._ import scala.util.matching.Regex class AppConfiguration { private val config = ConfigurationLoader.loadConfigFileWithEnvOverrides() val healthStatusFilePath: String = config.getString("health.status.path") def kafkaConfig: KafkaConfiguration = { // verify if the applicationId and bootstrap server config are non empty def verifyRequiredProps(props: Properties): Unit = { require(props.getProperty(StreamsConfig.APPLICATION_ID_CONFIG).nonEmpty) require(props.getProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG).nonEmpty) } def addProps(config: Config, props: Properties, prefix: (String) => String = identity): Unit = { config.entrySet().asScala.foreach(kv => { val propKeyName = prefix(kv.getKey) props.setProperty(propKeyName, kv.getValue.unwrapped().toString) }) } val kafka = config.getConfig("kafka") val producerConfig = kafka.getConfig("producer") val consumerConfig = kafka.getConfig("consumer") val streamsConfig = kafka.getConfig("streams") val props = new Properties // add stream specific properties addProps(streamsConfig, props) // validate props verifyRequiredProps(props) val timestampExtractor = Class.forName(props.getProperty("timestamp.extractor", "org.apache.kafka.streams.processor.WallclockTimestampExtractor")) KafkaConfiguration(new StreamsConfig(props), produceTopic = producerConfig.getString("topic"), consumeTopic = consumerConfig.getString("topic"), if (streamsConfig.hasPath("auto.offset.reset")) AutoOffsetReset.valueOf(streamsConfig.getString("auto.offset.reset").toUpperCase) else AutoOffsetReset.LATEST , timestampExtractor.newInstance().asInstanceOf[TimestampExtractor], kafka.getLong("close.timeout.ms")) } }
Example 61
Source File: LogStationColorizer.scala From logstation with Apache License 2.0 | 5 votes |
package com.jdrews.logstation.utils import akka.actor.{Actor, ActorLogging, Terminated} import com.jdrews.logstation.config.BridgeController import com.jdrews.logstation.service.ServiceShutdown import com.jdrews.logstation.webserver.LogMessage import scala.util.control.Breaks import scala.util.matching.Regex class LogStationColorizer extends Actor with ActorLogging { // contains a map of syntaxName to regular expression. var syntaxList = scala.collection.mutable.Map[String, Regex]() private val bridge = BridgeController.getBridgeActor def receive = { case syntax: scala.collection.mutable.Map[String, Regex] => log.debug(s"Got config $syntax}") // load up the syntaxes syntaxList = syntax case lm: LogMessage => var msg = lm.logMessage // colorize it! val loop = new Breaks loop.breakable { // for each syntax in list syntaxList.foreach(syntax => // get the first syntax regex, and find the first one to match the log message if (syntax._2.findFirstIn(lm.logMessage).isDefined) { // log.debug(s"got a match! ${syntax._1}") // wrap log message in new colors msg = s"<span style='color:${syntax._1}'>${xml.Utility.escape(lm.logMessage)}</span>" loop.break } ) } // send it to bridge actor bridge ! LogMessage(msg, lm.logFile) case ServiceShutdown => context stop self case actTerminated: Terminated => log.info(actTerminated.toString) case something => log.warning(s"huh? $something") } }
Example 62
Source File: LogStationServiceActor.scala From logstation with Apache License 2.0 | 5 votes |
package com.jdrews.logstation.service import akka.actor._ import akka.pattern._ import com.jdrews.logstation.tailer.{LogTailerActor, LogThisFile} import com.jdrews.logstation.utils.LogStationColorizer import scala.concurrent.Await import scala.concurrent.duration._ import scala.util.matching.Regex class LogStationServiceActor extends Actor with ActorLogging{ private var logTailers = Set.empty[ActorRef] private var logStationColorizers = Set.empty[ActorRef] var syntaxList = scala.collection.mutable.Map[String, Regex]() def receive = { case logThisFile: LogThisFile => log.info(s"About to begin logging ${logThisFile.logFile}") val logStationColorizer = context.actorOf(Props[LogStationColorizer], name = s"LogStationColorizer-${logThisFile.logFile.replaceAll("[^A-Za-z0-9]", ":")}") logStationColorizer ! syntaxList context watch logStationColorizer logStationColorizers += logStationColorizer val logTailerActor = context.actorOf(Props[LogTailerActor], name = s"LogTailerActor-${logThisFile.logFile.replaceAll("[^A-Za-z0-9]", ":")}") logTailerActor ! logThisFile logTailerActor ! logStationColorizer context watch logTailerActor logTailers += logTailerActor case syntax: scala.collection.mutable.Map[String, Regex] => log.info(s"passing the syntax file! $syntax") // store list to send to any new colorizers syntaxList = syntax logStationColorizers.foreach(colorizer => colorizer ! syntax) case ServiceShutdown => // for each logTailers and logStationColorizers, send shutdown call and wait for it to shut down. log.info("got ServiceShutdown") logTailers.foreach(actor => try { Await.result(gracefulStop(actor, 20 seconds, ServiceShutdown), 20 seconds) } catch { case e: AskTimeoutException ⇒ log.error("The actor didn't stop in time!" + e.toString) } ) logStationColorizers.foreach(actor => try { Await.result(gracefulStop(actor, 20 seconds, ServiceShutdown), 20 seconds) } catch { case e: AskTimeoutException ⇒ log.error("The actor didn't stop in time!" + e.toString) } ) context stop self case actTerminated: Terminated => log.info(actTerminated.toString) case something => log.warning(s"huh? $something") } }
Example 63
Source File: TlcConfigLexer.scala From apalache with Apache License 2.0 | 5 votes |
package at.forsyte.apalache.io.tlc.config import java.io.Reader import scala.util.matching.Regex import scala.util.parsing.combinator.RegexParsers def apply(reader: Reader): List[TlcConfigToken] = parseAll(program, reader) match { case Success(result, _) => result case NoSuccess(msg, next) => throw new TlcConfigParseError(msg, next.pos) } def program: Parser[List[TlcConfigToken]] = skip ~> rep(token <~ skip) <~ eof def eof: Parser[String] = "\\z".r | failure("unexpected character") def token: Parser[TlcConfigToken] = positioned( constant | init | next | specification | invariant | property | constraint | actionConstraint | symmetry | leftArrow | eq | identifier ) /// // it is important that linefeed is not a whiteSpace, as otherwise singleComment consumes the whole input! def skip: Parser[Unit] = rep(whiteSpace | singleComment | multiComment | linefeed) ^^^ Unit def linefeed: Parser[Unit] = "\n" ^^^ Unit def singleComment: Parser[Unit] = "\\*" ~ rep(not("\n") ~ ".".r) ^^^ Unit def multiComment: Parser[Unit] = "(*" ~ rep(not("*)") ~ "(?s).".r) ~ "*)" ^^^ Unit private def identifier: Parser[IDENT] = { "[a-zA-Z_][a-zA-Z0-9_]*".r ^^ { name => IDENT(name) } } private def constant: Parser[CONST] = { "CONSTANT(S|)".r ^^ (_ => CONST()) } private def init: Parser[INIT] = { "INIT" ^^ (_ => INIT()) } private def next: Parser[NEXT] = { "NEXT" ^^ (_ => NEXT()) } private def specification: Parser[SPECIFICATION] = { "SPECIFICATION" ^^ (_ => SPECIFICATION()) } private def invariant: Parser[INVARIANT] = { "INVARIANT(S|)".r ^^ (_ => INVARIANT()) } private def property: Parser[PROPERTY] = { "PROPERT(Y|IES)".r ^^ (_ => PROPERTY()) } private def constraint: Parser[CONSTRAINT] = { "CONSTRAINT(S|)".r ^^ (_ => CONSTRAINT()) } private def actionConstraint: Parser[ACTION_CONSTRAINT] = { "ACTION_CONSTRAINT(S|)".r ^^ (_ => ACTION_CONSTRAINT()) } private def symmetry: Parser[SYMMETRY] = { "SYMMETRY".r ^^ (_ => SYMMETRY()) } private def leftArrow: Parser[LEFT_ARROW] = { "<-" ^^ (_ => LEFT_ARROW()) } private def eq: Parser[EQ] = { "=" ^^ (_ => EQ()) } }
Example 64
Source File: ISO8601DateTime.scala From toketi-iothubreact with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package A_APIUSage import java.time.format.DateTimeFormatter import java.time.{ZoneId, ZonedDateTime} import scala.util.matching.Regex case class ISO8601DateTime(text: String) { private lazy val pattern1: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{1,2}):(\d{1,2}).(\d{1,3})Z""".r private lazy val pattern2: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{1,2}):(\d{1,2})Z""".r private lazy val pattern3: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})""".r private lazy val format : DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") private lazy val zone : ZoneId = ZoneId.of("UTC") private lazy val zonedDateTime: ZonedDateTime = { text match { case pattern1(y, m, d, h, i, s, n) ⇒ val ni = n.toInt val nanos = if (ni > 99) ni * 1000000 else if (ni > 9) ni * 10000000 else ni * 100000000 ZonedDateTime.of(y.toInt, m.toInt, d.toInt, h.toInt, i.toInt, s.toInt, nanos, zone) case pattern2(y, m, d, h, i, s) ⇒ ZonedDateTime.of(y.toInt, m.toInt, d.toInt, h.toInt, i.toInt, s.toInt, 0, zone) case pattern3(y, m, d) ⇒ ZonedDateTime.of(y.toInt, m.toInt, d.toInt, 0, 0, 0, 0, zone) case null ⇒ null case _ ⇒ throw new Exception(s"wrong date time format: $text") } } override def toString: String = if (zonedDateTime == null) "" else zonedDateTime.format(format) }
Example 65
Source File: ISO8601DateTime.scala From toketi-iothubreact with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package B_PrintTemperature import java.time.format.DateTimeFormatter import java.time.{ZoneId, ZonedDateTime} import scala.util.matching.Regex case class ISO8601DateTime(text: String) { private lazy val pattern1: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{1,2}):(\d{1,2}).(\d{1,3})Z""".r private lazy val pattern2: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{1,2}):(\d{1,2})Z""".r private lazy val pattern3: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})""".r private lazy val format : DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") private lazy val zone : ZoneId = ZoneId.of("UTC") private lazy val zonedDateTime: ZonedDateTime = { text match { case pattern1(y, m, d, h, i, s, n) ⇒ val ni = n.toInt val nanos = if (ni > 99) ni * 1000000 else if (ni > 9) ni * 10000000 else ni * 100000000 ZonedDateTime.of(y.toInt, m.toInt, d.toInt, h.toInt, i.toInt, s.toInt, nanos, zone) case pattern2(y, m, d, h, i, s) ⇒ ZonedDateTime.of(y.toInt, m.toInt, d.toInt, h.toInt, i.toInt, s.toInt, 0, zone) case pattern3(y, m, d) ⇒ ZonedDateTime.of(y.toInt, m.toInt, d.toInt, 0, 0, 0, 0, zone) case null ⇒ null case _ ⇒ throw new Exception(s"wrong date time format: $text") } } override def toString: String = if (zonedDateTime == null) "" else zonedDateTime.format(format) }
Example 66
Source File: ISO8601DateTime.scala From toketi-iothubreact with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package F_SendMessageToDevice import java.time.format.DateTimeFormatter import java.time.{ZoneId, ZonedDateTime} import scala.util.matching.Regex case class ISO8601DateTime(text: String) { private lazy val pattern1: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{1,2}):(\d{1,2}).(\d{1,3})Z""".r private lazy val pattern2: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{1,2}):(\d{1,2})Z""".r private lazy val pattern3: Regex = """(\d{4})-(\d{1,2})-(\d{1,2})""".r private lazy val format : DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") private lazy val zone : ZoneId = ZoneId.of("UTC") private lazy val zonedDateTime: ZonedDateTime = { text match { case pattern1(y, m, d, h, i, s, n) ⇒ val ni = n.toInt val nanos = if (ni > 99) ni * 1000000 else if (ni > 9) ni * 10000000 else ni * 100000000 ZonedDateTime.of(y.toInt, m.toInt, d.toInt, h.toInt, i.toInt, s.toInt, nanos, zone) case pattern2(y, m, d, h, i, s) ⇒ ZonedDateTime.of(y.toInt, m.toInt, d.toInt, h.toInt, i.toInt, s.toInt, 0, zone) case pattern3(y, m, d) ⇒ ZonedDateTime.of(y.toInt, m.toInt, d.toInt, 0, 0, 0, 0, zone) case null ⇒ null case _ ⇒ throw new Exception(s"wrong date time format: $text") } } override def toString: String = if (zonedDateTime == null) "" else zonedDateTime.format(format) }
Example 67
Source File: DictionaryBasedNormalizer.scala From scalastringcourseday7 with Apache License 2.0 | 5 votes |
package text.normalizer import java.nio.charset.{CodingErrorAction, StandardCharsets} import java.nio.file.Path import text.{StringNone, StringOption} import util.Config import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.sys.process.Process import scala.util.matching.Regex class DictionaryBasedNormalizer(dictionaryNameOpt: StringOption) { private def ascii2native(inputPath: Path): Iterator[String] = { import util.ProcessBuilderUtils._ Process(Seq[String]( s"${System.getProperty("java.home")}/../bin/native2ascii", "-reverse", "-encoding", "UTF-8", inputPath.toAbsolutePath.toString)).lineStream( StandardCharsets.UTF_8, CodingErrorAction.REPORT, CodingErrorAction.REPORT, StringNone) } private val regex: Regex = """([^#:][^:]*):\[([^#]+)\](#.*)?""".r private val terms: Seq[(String, String)] = initialize() private def initialize(): Seq[(String, String)] = { if (dictionaryNameOpt.isEmpty) { return Nil } val dictionaryName: String = dictionaryNameOpt.get val map = mutable.Map.empty[String, List[String]] val buffer = ListBuffer.empty[(String, String)] val filePath: Path = Config.resourceFile("normalizer", dictionaryName) ascii2native(filePath) foreach { case regex(representation, notationalVariants, _) => val trimmedRepresentation: String = representation.trim match { case "\"\"" => "" case otherwise => otherwise } val sortedNotationalVariants: List[String] = sortNotationVariants(notationalVariants.split(',').toList) map(trimmedRepresentation) = if (map.contains(trimmedRepresentation)) { sortNotationVariants(map(trimmedRepresentation) ++ sortedNotationalVariants) } else { sortedNotationalVariants } case _ => //Do nothing } sortRepresentations(map.keySet.toList) foreach { representation => map(representation) foreach { notationalVariant => buffer += ((notationalVariant, representation)) } } buffer.result } protected def sortNotationVariants(notationVariants: List[String]): List[String] = { notationVariants.sorted//alphabetical order } protected def sortRepresentations(representations: List[String]): List[String] = { representations.sorted//alphabetical order } def normalize(text: StringOption): StringOption = { text map { t: String => var result: String = t if (terms.nonEmpty) { terms foreach { case (term, replacement) => result = replaceAll(result, term, replacement) case _ => //Do nothing } } result } } protected def replaceAll(input: String, term: String, replacement: String): String = { import util.primitive._ input.replaceAllLiteratim(term, replacement) } }
Example 68
Source File: package.scala From pureconfig with Mozilla Public License 2.0 | 5 votes |
package pureconfig import java.util.regex.Pattern import scala.util.matching.Regex import org.scalactic.Equality import org.scalactic.TypeCheckedTripleEquals._ package object equality { implicit final val PatternEquality = new Equality[Pattern] { def areEqual(a: Pattern, b: Any): Boolean = b match { case bp: Pattern => a.pattern === bp.pattern case _ => false } } implicit final val RegexEquality = new Equality[Regex] { override def areEqual(a: Regex, b: Any): Boolean = b match { case r: Regex => PatternEquality.areEqual(a.pattern, r.pattern) case _ => false } } }
Example 69
Source File: ModuleMatcher.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.util import java.util.regex.Pattern import coursier.core.{Module, ModuleName, Organization} import dataclass.data import scala.annotation.tailrec import scala.util.matching.Regex @data class ModuleMatcher(matcher: Module) { import ModuleMatcher.blobToPattern lazy val orgPattern = blobToPattern(matcher.organization.value) lazy val namePattern = blobToPattern(matcher.name.value) lazy val attributesPattern = matcher .attributes .mapValues(blobToPattern(_)) .iterator .toMap def matches(module: Module): Boolean = orgPattern.pattern.matcher(module.organization.value).matches() && namePattern.pattern.matcher(module.name.value).matches() && module.attributes.keySet == attributesPattern.keySet && attributesPattern.forall { case (k, p) => module.attributes.get(k).exists(p.pattern.matcher(_).matches()) } } object ModuleMatcher { def apply(org: Organization, name: ModuleName, attributes: Map[String, String] = Map.empty): ModuleMatcher = ModuleMatcher(Module(org, name, attributes)) def all: ModuleMatcher = ModuleMatcher(Module(Organization("*"), ModuleName("*"), Map.empty)) @tailrec private def blobToPattern(s: String, b: StringBuilder = new StringBuilder): Regex = if (s.isEmpty) b.result().r else { val idx = s.indexOf('*') if (idx < 0) { b ++= Pattern.quote(s) b.result().r } else { if (idx > 0) b ++= Pattern.quote(s.substring(0, idx)) b ++= ".*" blobToPattern(s.substring(idx + 1), b) } } }
Example 70
Source File: SQLParser.scala From CSYE7200_Old with MIT License | 5 votes |
package edu.neu.coe.csye7200.parse import com.phasmid.laScala.values.Scalar import scala.language.implicitConversions import scala.util._ import scala.util.matching.Regex def orderByClause: Parser[Scalar] = SQLParser.sOrderBy ~> identifier override def reserved: Parser[String] = super.reserved | SQLParser.sLimit | SQLParser.sOrderBy | SQLParser.sAs | SQLParser.sFrom | SQLParser.sWhere | SQLParser.sSelect } object SQLParser { val sSelect: Regex = """(?i)SELECT""".r val sAs: Regex = """(?i)AS""".r val sFrom: Regex = """(?i)FROM""".r val sWhere: Regex = """(?i)WHERE""".r val sLimit: Regex = """(?i)LIMIT""".r val sOrderBy: Regex = """(?i)ORDER BY""".r }
Example 71
Source File: EnronEmail.scala From ml-in-scala with The Unlicense | 5 votes |
package org.akozlov.chapter07 import scala.io.Source import scala.util.hashing.{MurmurHash3 => Hash} import scala.util.matching.Regex import java.util.{Date => javaDateTime} import java.io.File import net.liftweb.json._ import Extraction._ import Serialization.{read, write} object EnronEmail { val emailRe = """[a-zA-Z0-9_.+\-][email protected]""".r.unanchored def emails(s: String) = { for (email <- emailRe findAllIn s) yield email } def hash(s: String) = { java.lang.Integer.MAX_VALUE.toLong + Hash.stringHash(s) } val messageRe = """(?:Message-ID:\s+)(<[A-Za-z0-9_.+\-@]+>)(?s)(?:.*?)(?m) |(?:Date:\s+)(.*?)$(?:.*?) |(?:From:\s+)([a-zA-Z0-9_.+\-][email protected])(?:.*?) |(?:Subject: )(.*?)$""".stripMargin.r.unanchored case class Relation(from: String, fromId: Long, to: String, toId: Long, source: String, messageId: String, date: javaDateTime, subject: String) implicit val formats = Serialization.formats(NoTypeHints) def getFileTree(f: File): Stream[File] = f #:: (if (f.isDirectory) f.listFiles().toStream.flatMap(getFileTree) else Stream.empty) def main(args: Array[String]) { getFileTree(new File(args(0))).par.map { file => { "\\.$".r findFirstIn file.getName match { case Some(x) => try { val src = Source.fromFile(file, "us-ascii") val message = try src.mkString finally src.close() message match { case messageRe(messageId, date, from , subject) => val fromLower = from.toLowerCase for (to <- emails(message).filter(_ != fromLower).toList.distinct) println(write(Relation(fromLower, hash(fromLower), to, hash(to), file.toString, messageId, new javaDateTime(date), subject))) case _ => } } catch { case e: Exception => System.err.println(e) } case _ => } } } } }
Example 72
Source File: ConfigReader.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = m.group(1) val name = m.group(2) val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 73
Source File: KsqlDriver.scala From ksql-jdbc-driver with Apache License 2.0 | 5 votes |
package com.github.mmolimar.ksql.jdbc import java.sql.{Connection, Driver, DriverPropertyInfo} import java.util.Properties import java.util.logging.Logger import com.github.mmolimar.ksql.jdbc.Exceptions._ import scala.util.matching.Regex object KsqlDriver { val ksqlName = "ksqlDB" val ksqlPrefix = "jdbc:ksql://" val driverName = "ksqlDB JDBC driver" val driverMajorVersion = 1 val driverMinorVersion = 2 val driverVersion = s"$driverMajorVersion.$driverMinorVersion" val jdbcMajorVersion = 4 val jdbcMinorVersion = 1 val ksqlMajorVersion = 5 val ksqlMinorVersion = 4 val ksqlMicroVersion = 0 val ksqlVersion = s"$ksqlMajorVersion.$ksqlMinorVersion.$ksqlMicroVersion" private val ksqlUserPassRegex = "((.+):(.+)@){0,1}" private val ksqlServerRegex = "([A-Za-z0-9._%+-]+):([0-9]{1,5})" private val ksqlPropsRegex = "(\\?([A-Za-z0-9._-]+=[A-Za-z0-9._-]+(&[A-Za-z0-9._-]+=[A-Za-z0-9._-]+)*)){0,1}" val urlRegex: Regex = s"$ksqlPrefix$ksqlUserPassRegex$ksqlServerRegex$ksqlPropsRegex\\z".r def parseUrl(url: String): KsqlConnectionValues = url match { case urlRegex(_, username, password, ksqlServer, port, _, props, _) => KsqlConnectionValues( ksqlServer, port.toInt, Option(username), Option(password), Option(props).map(_.split("&").map(_.split("=")).map(p => p(0) -> p(1)).toMap).getOrElse(Map.empty) ) case _ => throw InvalidUrl(url) } } class KsqlDriver extends Driver { override def acceptsURL(url: String): Boolean = Option(url).exists(_.startsWith(KsqlDriver.ksqlPrefix)) override def jdbcCompliant: Boolean = false override def getPropertyInfo(url: String, info: Properties): scala.Array[DriverPropertyInfo] = scala.Array.empty override def getMinorVersion: Int = KsqlDriver.driverMinorVersion override def getMajorVersion: Int = KsqlDriver.driverMajorVersion override def getParentLogger: Logger = throw NotSupported("getParentLogger") override def connect(url: String, properties: Properties): Connection = { if (!acceptsURL(url)) throw InvalidUrl(url) val connection = buildConnection(KsqlDriver.parseUrl(url), properties) connection.validate() connection } private[jdbc] def buildConnection(values: KsqlConnectionValues, properties: Properties): KsqlConnection = { new KsqlConnection(values, properties) } }
Example 74
Source File: PrometheusUtils.scala From kafka-lag-exporter with Apache License 2.0 | 5 votes |
package com.lightbend.kafkalagexporter.integration import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.http.scaladsl.model.{HttpRequest, HttpResponse, StatusCodes} import akka.http.scaladsl.unmarshalling.Unmarshal import akka.stream.Materializer import com.lightbend.kafkalagexporter.MetricsSink.GaugeDefinition import org.scalatest.Matchers import org.scalatest.concurrent.ScalaFutures import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.{ExecutionContext, Future} import scala.util.matching.Regex val regex = s"""$name\\{$labels.*\\}\\s+(-?.+)""".r log.debug(s"Created regex: {}", regex.pattern.toString) Rule(regex, assertion) } } case class Rule(regex: Regex, assertion: String => _) case class Result(rule: Rule, groupResults: List[String]) { def assertDne(): Unit = { log.debug(s"Rule: ${rule.regex.toString}") groupResults.length shouldBe 0 } def assert(): Unit = { log.debug(s"Rule: ${rule.regex.toString}") groupResults.length shouldBe 1 log.debug(s"Actual value is ${groupResults.head}") rule.assertion(groupResults.head) } } }
Example 75
Source File: MetricsConfig.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file setDefaultProperties(properties) // If spark.metrics.conf is not set, try to get file in class path val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse { try { Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)) } catch { case e: Exception => logError("Error loading default configuration file", e) None } } isOpt.foreach { is => try { properties.load(is) } finally { is.close() } } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { import scala.collection.JavaConversions._ val defaultProperty = propertyCategories(DEFAULT_PREFIX) for { (inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX) (k, v) <- defaultProperty if (prop.getProperty(k) == null) } { prop.setProperty(k, v) } } } def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] import scala.collection.JavaConversions._ prop.foreach { kv => if (regex.findPrefixOf(kv._1).isDefined) { val regex(prefix, suffix) = kv._1 subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } }
Example 76
Source File: ValidatorsJsonProtocol.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.params.validators import scala.util.matching.Regex import spray.json._ object ValidatorsJsonProtocol extends DefaultJsonProtocol { implicit object RegexJsonFormat extends JsonFormat[Regex] { override def write(regex: Regex): JsValue = regex.toString.toJson override def read(json: JsValue): Regex = ??? } implicit val rangeValidatorFormat = jsonFormat( RangeValidator.apply, "begin", "end", "beginIncluded", "endIncluded", "step") implicit val regexValidatorFormat = jsonFormat(RegexValidator, "regex") implicit val arrayLengthValidator = jsonFormat(ArrayLengthValidator.apply, "min", "max") implicit val complexArrayValidator = new JsonFormat[ComplexArrayValidator] { def write(v: ComplexArrayValidator): JsValue = { v.rangeValidator.configurationToJson } def read(json: JsValue): ComplexArrayValidator = ??? } // TODO DS-3225 Complex Array Validator serialization // implicit val complexArrayValidator = // jsonFormat(ComplexArrayValidator.apply, "rangeValidator", "arrayLengthValidator") }
Example 77
Source File: Loader.scala From tensorflow_scala with Apache License 2.0 | 5 votes |
package org.platanios.tensorflow.data import com.typesafe.scalalogging.Logger import java.io.IOException import java.net.URL import java.nio.file.{Files, Path} import scala.collection.compat.immutable.LazyList import scala.io.Source import scala.util.matching.Regex trait Loader { protected val logger: Logger protected val googleDriveConfirmTokenRegex: Regex = { """<a id="uc-download-link".*href="/uc\?export=download&(confirm=.*)&id=.*">Download anyway</a>""".r } def maybeDownload(path: Path, url: String, bufferSize: Int = 8192): Boolean = { if (Files.exists(path)) { false } else { try { logger.info(s"Downloading file '$url'.") Files.createDirectories(path.getParent) download(path, url, bufferSize) // Small hack to deal with downloading large Google Drive files. if (Files.size(path) < 1024 * 1024 && url.contains("drive.google.com")) { val content = Source.fromFile(path.toFile).getLines().mkString("\n") googleDriveConfirmTokenRegex.findFirstMatchIn(content) match { case Some(confirmToken) => download(path, s"$url&${confirmToken.group(1)}", bufferSize) case None => () } } logger.info(s"Downloaded file '$url'.") true } catch { case e: IOException => logger.error(s"Could not download file '$url'", e) throw e } } } protected def download(path: Path, url: String, bufferSize: Int = 8192): Unit = { val connection = new URL(url).openConnection() val contentLength = connection.getContentLengthLong val inputStream = connection.getInputStream val outputStream = Files.newOutputStream(path) val buffer = new Array[Byte](bufferSize) var progress = 0L var progressLogTime = System.currentTimeMillis LazyList.continually(inputStream.read(buffer)).takeWhile(_ != -1).foreach(numBytes => { outputStream.write(buffer, 0, numBytes) progress += numBytes val time = System.currentTimeMillis if (time - progressLogTime >= 1e4) { if (contentLength > 0) { val numBars = Math.floorDiv(10 * progress, contentLength).toInt logger.info(s"[${"=" * numBars}${" " * (10 - numBars)}] $progress / $contentLength bytes downloaded.") progressLogTime = time } else { logger.info(s"$progress bytes downloaded.") progressLogTime = time } } }) outputStream.close() } }
Example 78
Source File: ComposedMetricConnection.scala From DataQuality with GNU Lesser General Public License v3.0 | 5 votes |
package models.metrics import models.AppDB import org.squeryl.PrimitiveTypeMode._ import org.squeryl.annotations.Column import scala.collection.immutable.Seq import scala.util.matching.Regex import scala.util.parsing.combinator.JavaTokenParsers ^]".r ~ factor) ^^ { case t ~ ts => ts.foldLeft(t) { case (t1, "*" ~ t2) => Mul(t1, t2) case (t1, "/" ~ t2) => Div(t1, t2) case (t1, "^" ~ t2) => Pow(t1, t2) } } lazy val factor = "(" ~> expr <~ ")" | num | met lazy val num = floatingPointNumber ^^ { t => Num(t.toDouble) } lazy val met = rgx ^^ { t => Met(t.toString)} }
Example 79
Source File: ComposedMetricConnection.scala From DataQuality with GNU Lesser General Public License v3.0 | 5 votes |
package dbmodel.metrics import dbmodel.AppDB import org.squeryl.PrimitiveTypeMode._ import org.squeryl.annotations.Column import scala.collection.immutable.Seq import scala.util.matching.Regex import scala.util.parsing.combinator.JavaTokenParsers ^]".r ~ factor) ^^ { case t ~ ts => ts.foldLeft(t) { case (t1, "*" ~ t2) => Mul(t1, t2) case (t1, "/" ~ t2) => Div(t1, t2) case (t1, "^" ~ t2) => Pow(t1, t2) } } lazy val factor = "(" ~> expr <~ ")" | num | met lazy val num = floatingPointNumber ^^ { t => Num(t.toDouble) } lazy val met = rgx ^^ { t => Met(t.toString)} }
Example 80
Source File: Path.scala From shield with MIT License | 5 votes |
package shield.routing import scala.annotation.tailrec import scala.util.matching.Regex object Path { def apply(path: String): Path = { new Path(PathTemplateParser.pathToSegments(path)) } } case class Path(segments: List[Segment]) extends Ordered[Path] { override def compare(that: Path): Int = { @tailrec def analyze(segments: (List[Segment], List[Segment])) : Int = { segments match { case (Nil, Nil) => 0 case (Nil, _) => 1 case (_, Nil) => -1 case (this_head :: this_tail, that_head :: that_tail) => (this_head, that_head) match { case (l, r) if l.priority != r.priority => l.priority - r.priority case (l, r) => analyze(this_tail, that_tail) } } } val comparison = analyze(segments, that.segments) if (comparison == 0) { segments.toString().compare(that.segments.toString()) } else { comparison } } override lazy val toString : String = { (Iterable("/") ++ segments.map(_.toString)).mkString("") } // todo: safely handle invalid regexs due to bad swagger documentation lazy val regex : Regex = { (Iterable("^/") ++ segments.map(_.regexPiece) ++ Iterable("/?$")).mkString("").r } }
Example 81
Source File: MIT.scala From banditsbook-scala with MIT License | 5 votes |
import de.heikoseeberger.sbtheader.HeaderPattern import de.heikoseeberger.sbtheader.license.License import scala.util.matching.Regex object MIT extends License { import HeaderPattern._ override def apply(yyyy: String, copyrightOwner: String, commentStyle: String = "*"): (Regex, String) = { commentStyle match { case "*" => ( cStyleBlockComment, s"""| | |""".stripMargin ) case "#" => ( hashLineComment, s"""|# |# Copyright (c) $yyyy $copyrightOwner |# |# Permission is hereby granted, free of charge, to any person obtaining a copy of |# this software and associated documentation files (the "Software"), to deal in |# the Software without restriction, including without limitation the rights to |# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |# the Software, and to permit persons to whom the Software is furnished to do so, |# subject to the following conditions: |# |# The above copyright notice and this permission notice shall be included in all |# copies or substantial portions of the Software. |# |# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |""".stripMargin ) case _ => throw new IllegalArgumentException(s"Comment style '$commentStyle' not supported") } } }
Example 82
Source File: GenderDetectStrategy.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.feature import enumeratum.{Enum, EnumEntry} import org.json4s.CustomSerializer import org.json4s.JsonAST.JString import scala.util.Try import scala.util.matching.Regex sealed class GenderDetectStrategy extends EnumEntry case object GenderDetectStrategy extends Enum[GenderDetectStrategy] { val values: Seq[GenderDetectStrategy] = findValues val delimiter = " WITH VALUE " val ByIndexString = "ByIndex" val ByLastString = "ByLast" val ByRegexString = "ByRegex" val FindHonorificString = "FindHonorific" case class ByIndex(index: Int) extends GenderDetectStrategy { override def toString: String = ByIndexString + delimiter + index.toString } case class ByLast() extends GenderDetectStrategy { override def toString: String = ByLastString } case class ByRegex(pattern: Regex) extends GenderDetectStrategy { override def toString: String = ByRegexString + delimiter + pattern.toString } case class FindHonorific() extends GenderDetectStrategy { override def toString: String = FindHonorificString } def fromString(s: String): GenderDetectStrategy = { Option(s).map(_.split(delimiter)) match { case Some(Array(ByIndexString, index)) if Try(index.toInt).isSuccess => ByIndex(index.toInt) case Some(Array(ByLastString)) => ByLast() case Some(Array(ByRegexString, regex)) if Try (regex.r).isSuccess => ByRegex(regex.r) case Some(Array(FindHonorificString)) => FindHonorific() case None => sys.error("Attempted to deserialize GenderDetectStrategy but found empty value") case _ => sys.error(s"Attempted to deserialize GenderDetectStrategy but no matching entry found for value '$s'") } } def json4s: CustomSerializer[GenderDetectStrategy] = new CustomSerializer[GenderDetectStrategy](_ => ( { case JString(s) => GenderDetectStrategy.fromString(s) }, { case x: GenderDetectStrategy => JString(x.toString) } ) ) }
Example 83
Source File: SuppressingReporter.scala From silencer with Apache License 2.0 | 5 votes |
package com.github.ghik.silencer import scala.collection.mutable.ArrayBuffer import scala.reflect.internal.util.{Position, SourceFile} import scala.reflect.io.AbstractFile import scala.tools.nsc.reporters.{FilteringReporter, ForwardingReporter} import scala.util.matching.Regex class SuppressingReporter( original: FilteringReporter, globalFilters: List[Regex], protected val lineContentFilters: List[Regex], protected val pathFilters: List[Regex], protected val sourceRoots: List[AbstractFile] ) extends ForwardingReporter(original) with SuppressingReporterBase { //Suppressions are sorted by end offset of their suppression ranges so that nested suppressions come before //their containing suppressions. This is ensured by FindSuppressions traverser in SilencerPlugin. //This order is important for proper unused @silent annotation detection. def isSuppressed(suppressions: List[Suppression], pos: Position, msg: String): Boolean = suppressions.find(_.suppresses(pos, msg)) match { case Some(suppression) => suppression.used = true true case _ => false } def setSuppressions(source: SourceFile, suppressions: List[Suppression]): Unit = { fileSuppressions(source) = suppressions for ((pos, msg) <- deferredWarnings.remove(source).getOrElse(Seq.empty)) warning(pos, msg) // will invoke `filter` } override def reset(): Unit = { super.reset() deferredWarnings.clear() fileSuppressions.clear() } override def filter(pos: Position, msg: String, severity: Severity): Int = { def globallySuppressed: Boolean = matchesPathFilter(pos) || anyMatches(globalFilters, msg) || matchesLineContentFilter(pos) def locallySuppressed: Boolean = fileSuppressions.get(pos.source) match { case Some(suppressions) => isSuppressed(suppressions, pos, msg) case None => deferredWarnings.getOrElseUpdate(pos.source, new ArrayBuffer) += ((pos, msg)) true } if (severity == WARNING && (globallySuppressed || locallySuppressed)) 2 else super.filter(pos, msg, severity) } }
Example 84
Source File: SuppressingReporterBase.scala From silencer with Apache License 2.0 | 5 votes |
package com.github.ghik.silencer import java.io.File import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.reflect.internal.util.{Position, SourceFile} import scala.reflect.io.AbstractFile import scala.tools.nsc.reporters.Reporter import scala.util.matching.Regex // Code that's shared between the version-dependent sources for 2.12 and 2.13 trait SuppressingReporterBase { self: Reporter => protected def pathFilters: List[Regex] protected def lineContentFilters: List[Regex] protected def sourceRoots: List[AbstractFile] protected val deferredWarnings = new mutable.HashMap[SourceFile, ArrayBuffer[(Position, String)]] protected val fileSuppressions = new mutable.HashMap[SourceFile, List[Suppression]] protected val normalizedPathCache = new mutable.HashMap[SourceFile, String] def checkUnused(source: SourceFile): Unit = fileSuppressions(source).foreach(_.reportUnused(this)) protected def relativize(dir: AbstractFile, child: AbstractFile): Option[String] = { val childPath = child.canonicalPath val dirPath = dir.canonicalPath + File.separator if (childPath.startsWith(dirPath)) Some(childPath.substring(dirPath.length)) else None } protected def matchesPathFilter(pos: Position): Boolean = pathFilters.nonEmpty && pos.isDefined && { val filePath = normalizedPathCache.getOrElseUpdate(pos.source, { val file = pos.source.file val relIt = sourceRoots.iterator.flatMap(relativize(_, file)) val relPath = if (relIt.hasNext) relIt.next() else file.canonicalPath relPath.replace("\\", "/") }) anyMatches(pathFilters, filePath) } protected def matchesLineContentFilter(pos: Position): Boolean = lineContentFilters.nonEmpty && pos.isDefined && anyMatches(lineContentFilters, pos.source.lineToString(pos.line - 1)) protected def anyMatches(patterns: List[Regex], value: String): Boolean = patterns.exists(_.findFirstIn(value).isDefined) }
Example 85
Source File: SuppressingReporter.scala From silencer with Apache License 2.0 | 5 votes |
package com.github.ghik.silencer import scala.collection.mutable.ArrayBuffer import scala.reflect.internal.util.{Position, SourceFile} import scala.reflect.io.AbstractFile import scala.tools.nsc.reporters.Reporter import scala.util.matching.Regex class SuppressingReporter( original: Reporter, globalFilters: List[Regex], protected val lineContentFilters: List[Regex], protected val pathFilters: List[Regex], protected val sourceRoots: List[AbstractFile] ) extends Reporter with SuppressingReporterBase { //Suppressions are sorted by end offset of their suppression ranges so that nested suppressions come before //their containing suppressions. This is ensured by FindSuppressions traverser in SilencerPlugin. //This order is important for proper unused @silent annotation detection. def suppressOrForward(suppressions: List[Suppression], pos: Position, msg: String): Unit = suppressions.find(_.suppresses(pos, msg)) match { case Some(suppression) => suppression.used = true case None => original.warning(pos, msg) } def setSuppressions(source: SourceFile, suppressions: List[Suppression]): Unit = { fileSuppressions(source) = suppressions for ((pos, msg) <- deferredWarnings.remove(source).getOrElse(Seq.empty)) { suppressOrForward(suppressions, pos, msg) } updateCounts() } override def reset(): Unit = { super.reset() original.reset() deferredWarnings.clear() fileSuppressions.clear() } protected def info0(pos: Position, msg: String, severity: Severity, force: Boolean): Unit = { severity match { case INFO => original.info(pos, msg, force) case WARNING if matchesPathFilter(pos) || anyMatches(globalFilters, msg) || matchesLineContentFilter(pos) => () case WARNING if !pos.isDefined => original.warning(pos, msg) case WARNING if !fileSuppressions.contains(pos.source) => deferredWarnings.getOrElseUpdate(pos.source, new ArrayBuffer) += ((pos, msg)) case WARNING => suppressOrForward(fileSuppressions(pos.source), pos, msg) case ERROR => original.error(pos, msg) } updateCounts() } private def updateCounts(): Unit = { INFO.count = original.INFO.count WARNING.count = original.WARNING.count ERROR.count = original.ERROR.count } private def originalSeverity(severity: Severity): original.Severity = severity match { case INFO => original.INFO case WARNING => original.WARNING case ERROR => original.ERROR } override def hasErrors: Boolean = original.hasErrors || cancelled override def hasWarnings: Boolean = original.hasWarnings override def resetCount(severity: Severity): Unit = { super.resetCount(severity) original.resetCount(originalSeverity(severity)) } override def flush(): Unit = { super.flush() original.flush() } }
Example 86
Source File: GlobalAcl.scala From vinyldns with Apache License 2.0 | 5 votes |
package vinyldns.api.domain.access import vinyldns.api.domain.zone.ZoneRecordValidations import vinyldns.core.domain.DomainHelpers import vinyldns.core.domain.auth.AuthPrincipal import vinyldns.core.domain.record.RecordType.RecordType import vinyldns.core.domain.record.{PTRData, RecordData, RecordType} import vinyldns.core.domain.zone.Zone import scala.util.matching.Regex final case class GlobalAcl(groupIds: List[String], fqdnRegexList: List[String]) final case class GlobalAcls(acls: List[GlobalAcl]) { // Create a map of Group ID -> Regexes valid for that group id private val aclMap: Map[String, List[Regex]] = { val tuples = for { acl <- acls groupId <- acl.groupIds regex = ZoneRecordValidations.toCaseIgnoredRegexList(acl.fqdnRegexList) } yield groupId -> regex tuples.groupBy(_._1).map { case (groupId, regexes) => groupId -> regexes.flatMap(_._2) } } def isAuthorized( authPrincipal: AuthPrincipal, recordName: String, recordType: RecordType, zone: Zone, recordData: List[RecordData] = List.empty ): Boolean = { def isAuthorized(authPrincipal: AuthPrincipal, fqdn: String): Boolean = { val regexList = authPrincipal.memberGroupIds.flatMap(aclMap.getOrElse(_, List.empty)).toList val normalizedFqdn = DomainHelpers.ensureTrailingDot(fqdn.toLowerCase) ZoneRecordValidations.isStringInRegexList(regexList, normalizedFqdn) } recordType match { case RecordType.PTR => val ptrs = recordData .collect { case p: PTRData => p.ptrdname } // forall returns true if the list is empty ptrs.nonEmpty && ptrs.forall(ptrdname => isAuthorized(authPrincipal, ptrdname.fqdn)) case _ => val fqdn = if (recordName.endsWith(".")) recordName else s"$recordName.${zone.name}" isAuthorized(authPrincipal, fqdn) } } }
Example 87
Source File: ZoneRecordValidations.scala From vinyldns with Apache License 2.0 | 5 votes |
package vinyldns.api.domain.zone import cats.implicits._ import cats.data._ import com.comcast.ip4s.IpAddress import com.comcast.ip4s.interop.cats.implicits._ import vinyldns.core.domain.{ DomainHelpers, DomainValidationError, HighValueDomainError, RecordRequiresManualReview } import vinyldns.core.domain.record.{NSData, RecordSet} import scala.util.matching.Regex object ZoneRecordValidations { def toCaseIgnoredRegexList(rawList: List[String]): List[Regex] = rawList.map(raw => s"(?i)$raw".r) def containsApprovedNameServers( approvedServerList: List[Regex], nsRecordSet: RecordSet ): ValidatedNel[String, RecordSet] = { val validations: List[ValidatedNel[String, NSData]] = nsRecordSet.records .collect { case ns: NSData => ns } .map(isApprovedNameServer(approvedServerList, _)) validations.sequence.map(_ => nsRecordSet) } def isNotHighValueFqdn( highValueRegexList: List[Regex], fqdn: String ): ValidatedNel[DomainValidationError, Unit] = if (!isStringInRegexList(highValueRegexList, fqdn)) { ().validNel } else { HighValueDomainError(fqdn).invalidNel } def isNotHighValueIp( highValueIpList: List[IpAddress], ip: String ): ValidatedNel[DomainValidationError, Unit] = if (!isIpInIpList(highValueIpList, ip)) { ().validNel } else { HighValueDomainError(ip).invalidNel } def domainDoesNotRequireManualReview( regexList: List[Regex], fqdn: String ): ValidatedNel[DomainValidationError, Unit] = if (!isStringInRegexList(regexList, fqdn)) { ().validNel } else { RecordRequiresManualReview(fqdn).invalidNel } def ipDoesNotRequireManualReview( regexList: List[IpAddress], ip: String ): ValidatedNel[DomainValidationError, Unit] = if (!isIpInIpList(regexList, ip)) { ().validNel } else { RecordRequiresManualReview(ip).invalidNel } def zoneDoesNotRequireManualReview( zonesRequiringReview: Set[String], zoneName: String, fqdn: String ): ValidatedNel[DomainValidationError, Unit] = if (!zonesRequiringReview.contains(DomainHelpers.ensureTrailingDot(zoneName.toLowerCase))) { ().validNel } else { RecordRequiresManualReview(fqdn).invalidNel } }
Example 88
Source File: SqsMessageQueueProvider.scala From vinyldns with Apache License 2.0 | 5 votes |
package vinyldns.sqs.queue import cats.effect.IO import cats.implicits._ import com.amazonaws.auth.{AWSStaticCredentialsProvider, BasicAWSCredentials} import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration import com.amazonaws.services.sqs.model.QueueDoesNotExistException import com.amazonaws.services.sqs.{AmazonSQSAsync, AmazonSQSAsyncClientBuilder} import org.slf4j.LoggerFactory import pureconfig._ import pureconfig.generic.auto._ import pureconfig.module.catseffect.syntax._ import cats.effect.Blocker import vinyldns.core.queue.{MessageQueue, MessageQueueConfig, MessageQueueProvider} import scala.util.matching.Regex import cats.effect.ContextShift class SqsMessageQueueProvider extends MessageQueueProvider { import SqsMessageQueueProvider._ private implicit val cs: ContextShift[IO] = IO.contextShift(scala.concurrent.ExecutionContext.global) def load(config: MessageQueueConfig): IO[MessageQueue] = for { settingsConfig <- Blocker[IO].use( ConfigSource.fromConfig(config.settings).loadF[IO, SqsMessageQueueSettings](_) ) _ <- IO.fromEither(validateQueueName(settingsConfig.queueName)) client <- setupClient(settingsConfig) queueUrl <- setupQueue(client, settingsConfig.queueName) _ <- IO(logger.error(s"Queue URL: $queueUrl\n")) } yield new SqsMessageQueue(queueUrl, client) def validateQueueName(queueName: String): Either[InvalidQueueName, String] = { val validQueueNameRegex: Regex = """^([\w\-]{1,80})$""".r validQueueNameRegex .findFirstIn(queueName) .map(Right(_)) .getOrElse(Left(InvalidQueueName(queueName))) } def setupClient(sqsMessageQueueSettings: SqsMessageQueueSettings): IO[AmazonSQSAsync] = IO { logger.error( s"Setting up queue client with settings: " + s"service endpoint: ${sqsMessageQueueSettings.serviceEndpoint}; " + s"signing region: ${sqsMessageQueueSettings.serviceEndpoint}; " + s"queue name: ${sqsMessageQueueSettings.queueName}" ) AmazonSQSAsyncClientBuilder .standard() .withEndpointConfiguration( new EndpointConfiguration( sqsMessageQueueSettings.serviceEndpoint, sqsMessageQueueSettings.signingRegion ) ) .withCredentials( new AWSStaticCredentialsProvider( new BasicAWSCredentials( sqsMessageQueueSettings.accessKey, sqsMessageQueueSettings.secretKey ) ) ) .build() } def setupQueue(client: AmazonSQSAsync, queueName: String): IO[String] = // Create queue if it doesn't exist IO { logger.error(s"Setting up queue with name [$queueName]") client.getQueueUrl(queueName).getQueueUrl }.recoverWith { case _: QueueDoesNotExistException => IO(client.createQueue(queueName).getQueueUrl) } } object SqsMessageQueueProvider { final case class InvalidQueueName(queueName: String) extends Throwable( s"Invalid queue name: $queueName. Must be 1-80 alphanumeric, hyphen or underscore characters. FIFO queues " + "(queue names ending in \".fifo\") are not supported." ) private val logger = LoggerFactory.getLogger(classOf[SqsMessageQueueProvider]) }
Example 89
Source File: MetricsConfig.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.util.Utils import org.apache.spark.{Logging, SparkConf} private[spark] class MetricsConfig(conf: SparkConf) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file // 添加默认属性的情况下,没有任何属性文件 setDefaultProperties(properties) loadPropertiesFromFile(conf.getOption("spark.metrics.conf")) // Also look for the properties in provided Spark configuration //还要查找提供的Spark配置中的属性 val prefix = "spark.metrics.conf." conf.getAll.foreach { case (k, v) if k.startsWith(prefix) => properties.setProperty(k.substring(prefix.length()), v) case _ => } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { import scala.collection.JavaConversions._ val defaultProperty = propertyCategories(DEFAULT_PREFIX) for { (inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX) (k, v) <- defaultProperty if (prop.getProperty(k) == null) } { prop.setProperty(k, v) } } } //使用正则匹配properties中以source.开头的属性,然后将属性中的source反映得到的实例加入HashMap def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] import scala.collection.JavaConversions._ prop.foreach { kv => if (regex.findPrefixOf(kv._1).isDefined) { val regex(prefix, suffix) = kv._1 subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } private[this] def loadPropertiesFromFile(path: Option[String]): Unit = { var is: InputStream = null try { is = path match { case Some(f) => new FileInputStream(f) case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME) } if (is != null) { properties.load(is) } } catch { case e: Exception => val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME) logError(s"Error loading configuration file $file", e) } finally { if (is != null) { is.close() } } } }
Example 90
Source File: StringAssertion.scala From cornichon with Apache License 2.0 | 5 votes |
package com.github.agourlay.cornichon.steps.regular.assertStep import cats.syntax.validated._ import com.github.agourlay.cornichon.core.CornichonError import com.github.agourlay.cornichon.core.Done._ import scala.util.matching.Regex abstract class StringAssertion extends Assertion case class StringContainsAssertion(input: String, expectedPart: String) extends StringAssertion { val validated = if (input.contains(expectedPart)) validDone else StringContainsAssertionError(input, expectedPart).invalidNel } case class StringContainsAssertionError(input: String, expectedPart: String) extends CornichonError { val baseErrorMessage = s"""expected string '$expectedPart' to be contained but it is not the case with value : |$input""".stripMargin } case class RegexAssertion(input: String, expectedRegex: Regex) extends StringAssertion { val validated = { val matching = expectedRegex.findFirstIn(input) if (matching.isDefined) validDone else RegexAssertionError(input, expectedRegex).invalidNel } } case class RegexAssertionError(input: String, expectedRegex: Regex) extends CornichonError { val baseErrorMessage = s"""expected regular expression '$expectedRegex' to be matched but it is not the case with value : |$input""".stripMargin }
Example 91
Source File: SQLParser.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.parse import com.phasmid.laScala.values.Scalar import scala.language.implicitConversions import scala.util._ import scala.util.matching.Regex def orderByClause: Parser[Scalar] = SQLParser.sOrderBy ~> identifier override def reserved: Parser[String] = super.reserved | SQLParser.sLimit | SQLParser.sOrderBy | SQLParser.sAs | SQLParser.sFrom | SQLParser.sWhere | SQLParser.sSelect } object SQLParser { val sSelect: Regex = """(?i)SELECT""".r val sAs: Regex = """(?i)AS""".r val sFrom: Regex = """(?i)FROM""".r val sWhere: Regex = """(?i)WHERE""".r val sLimit: Regex = """(?i)LIMIT""".r val sOrderBy: Regex = """(?i)ORDER BY""".r }
Example 92
Source File: StatefulDataType.scala From deequ with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import com.amazon.deequ.analyzers.DataTypeHistogram import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} import org.apache.spark.sql.types._ import scala.util.matching.Regex private[sql] class StatefulDataType extends UserDefinedAggregateFunction { val SIZE_IN_BYTES = 40 val NULL_POS = 0 val FRACTIONAL_POS = 1 val INTEGRAL_POS = 2 val BOOLEAN_POS = 3 val STRING_POS = 4 val FRACTIONAL: Regex = """^(-|\+)? ?\d*\.\d*$""".r val INTEGRAL: Regex = """^(-|\+)? ?\d*$""".r val BOOLEAN: Regex = """^(true|false)$""".r override def inputSchema: StructType = StructType(StructField("value", StringType) :: Nil) override def bufferSchema: StructType = StructType(StructField("null", LongType) :: StructField("fractional", LongType) :: StructField("integral", LongType) :: StructField("boolean", LongType) :: StructField("string", LongType) :: Nil) override def dataType: types.DataType = BinaryType override def deterministic: Boolean = true override def initialize(buffer: MutableAggregationBuffer): Unit = { buffer(NULL_POS) = 0L buffer(FRACTIONAL_POS) = 0L buffer(INTEGRAL_POS) = 0L buffer(BOOLEAN_POS) = 0L buffer(STRING_POS) = 0L } override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { if (input.isNullAt(0)) { buffer(NULL_POS) = buffer.getLong(NULL_POS) + 1L } else { input.getString(0) match { case FRACTIONAL(_) => buffer(FRACTIONAL_POS) = buffer.getLong(FRACTIONAL_POS) + 1L case INTEGRAL(_) => buffer(INTEGRAL_POS) = buffer.getLong(INTEGRAL_POS) + 1L case BOOLEAN(_) => buffer(BOOLEAN_POS) = buffer.getLong(BOOLEAN_POS) + 1L case _ => buffer(STRING_POS) = buffer.getLong(STRING_POS) + 1L } } } override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { buffer1(NULL_POS) = buffer1.getLong(NULL_POS) + buffer2.getLong(NULL_POS) buffer1(FRACTIONAL_POS) = buffer1.getLong(FRACTIONAL_POS) + buffer2.getLong(FRACTIONAL_POS) buffer1(INTEGRAL_POS) = buffer1.getLong(INTEGRAL_POS) + buffer2.getLong(INTEGRAL_POS) buffer1(BOOLEAN_POS) = buffer1.getLong(BOOLEAN_POS) + buffer2.getLong(BOOLEAN_POS) buffer1(STRING_POS) = buffer1.getLong(STRING_POS) + buffer2.getLong(STRING_POS) } override def evaluate(buffer: Row): Any = { DataTypeHistogram.toBytes(buffer.getLong(NULL_POS), buffer.getLong(FRACTIONAL_POS), buffer.getLong(INTEGRAL_POS), buffer.getLong(BOOLEAN_POS), buffer.getLong(STRING_POS)) } }
Example 93
Source File: PythonProcess.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.util import java.io._ import scala.io._ import scala.sys.process._ import scala.util.matching.Regex import com.typesafe.scalalogging.LazyLogging object PythonProcess extends LazyLogging { val JAR_PREFIX = "^jar:(.*)!(.*)$".r val FILE_PREFIX = "f".r def scriptPath: String = { val resource = getClass().getClassLoader().getResource("__main__.py") if(resource == null){ throw new IOException("Python integration unsupported: __main__.py is unavailable"); } var path = resource.toURI().toString() val prefix = "(jar|file):(.*)".r logger.debug(s"Base Path: ${path}") var done = false; while(!done) { (prefix findFirstMatchIn path) match { case None => done = true case Some(hit) => { path = hit.group(2) logger.debug(s"Prefix: '${hit.group(1)}' Path Now: ${path}") hit.group(1) match { case "jar" => { val splitPoint = path.lastIndexOf('!') path = path.substring(0, splitPoint) logger.debug(s"Stripping Resource Path; Path Now: ${path}") } case "file" => { return path } } } } } throw new IOException(s"Python integration unsupported: Unknown access method for __main__.py") } def apply(operation: String, io: ProcessIO = null): Process = { val cmd = s"python2.7 ${scriptPath} ${operation}" logger.debug(s"Running: $cmd") if(io == null){ cmd.run() } else { cmd.run(io) } } }
Example 94
Source File: masking.scala From tofu with Apache License 2.0 | 5 votes |
package tofu.logging.derivation import scala.annotation.tailrec import scala.util.matching.Regex import magnolia.Param sealed trait MaskMode object MaskMode { case object Full extends MaskMode case object Erase extends MaskMode case class ForLength(offset: Int, maxLength: Int = -1) extends MaskMode { def this(length: Int) = this(0, length) } case class Regexp(pattern: Regex) extends MaskMode } object masking { private val SomeRe = "(?<=^Some(\\{value=|\\())(.+)(?=(\\}|\\))$)".r private[derivation] def string(shown: String, mode: MaskMode) = { @tailrec def loop(arr: Array[Char], cur: Int, left: Int): String = { if (left == 0 || cur == arr.length) new String(arr) else { val char = arr(cur) if (char.isDigit) arr(cur) = '#' else if (char.isLetter) arr(cur) = '*' loop(arr, cur + 1, left - 1) } } mode match { case MaskMode.Erase => "..." case MaskMode.Full => loop(shown.toCharArray, 0, shown.length) case MaskMode.Regexp(pattern) => pattern .findFirstMatchIn(shown) .collect { case m if m.groupCount == 1 => val start = m.start(1) val end = m.end(1) loop(shown.toCharArray, start, end - start) } .getOrElse(shown) case MaskMode.ForLength(offset, maxLength) => loop(shown.toCharArray, shown.length min (offset max 0), if (maxLength == -1) shown.length else maxLength) } } private[derivation] def field[T](field: T, shown: String, mode: MaskMode) = field match { case None => shown case Some(_) => SomeRe.replaceSomeIn(shown, m => Some(string(m.toString, mode))) case _ => string(shown, mode) } private[derivation] def params[TypeClass[_], Type]( tpe: Type, params: Seq[Param[TypeClass, Type]] )(fn: TypeClass[Any] => Any => String) = params.iterator .filterNot(_.annotations.contains(hidden())) .map { param => import param._ val value: PType = dereference(tpe) val shown = fn(typeclass.asInstanceOf[TypeClass[Any]])(value) val repr = annotations.collectFirst { case masked(mode) => field(value, shown, mode) } .getOrElse(shown) s"$label=$repr" } implicit final class Ops(private val value: String) extends AnyVal { def mask: String = mask(MaskMode.Full) def mask(mode: MaskMode): String = string(value, mode) } }
Example 95
Source File: ReaderConfig.scala From pulsar4s with Apache License 2.0 | 5 votes |
package com.sksamuel.pulsar4s import org.apache.pulsar.client.api._ import scala.concurrent.duration.FiniteDuration import scala.util.matching.Regex import java.util.concurrent.TimeUnit sealed trait StartMessage final case class Message(messageId: MessageId) extends StartMessage final case class RollBack(rollbackDuration: Long, timeunit: TimeUnit) extends StartMessage case class ReaderConfig(topic: Topic, @deprecated("use startMessage instead", "2.5.3") seek: MessageId = MessageId.earliest, startMessage: StartMessage, startMessageIdInclusive: Boolean = true, receiverQueueSize: Option[Int] = None, reader: Option[String] = None, readCompacted: Option[Boolean] = None, additionalProperties: Map[String, AnyRef] = Map.empty)
Example 96
Source File: ProducerConfig.scala From pulsar4s with Apache License 2.0 | 5 votes |
package com.sksamuel.pulsar4s import org.apache.pulsar.client.api._ import scala.concurrent.duration.FiniteDuration import scala.util.matching.Regex case class ProducerConfig(topic: Topic, encryptionKey: Option[String] = None, batchingMaxBytes: Option[Int] = None, batchingMaxMessages: Option[Int] = None, batchingMaxPublishDelay: Option[FiniteDuration] = None, blockIfQueueFull: Option[Boolean] = None, cryptoFailureAction: Option[ProducerCryptoFailureAction] = None, cryptoKeyReader: Option[CryptoKeyReader] = None, enableBatching: Option[Boolean] = None, hashingScheme: Option[HashingScheme] = None, initialSequenceId: Option[Long] = None, maxPendingMessages: Option[Int] = None, maxPendingMessagesAcrossPartitions: Option[Int] = None, messageRouter: Option[MessageRouter] = None, messageRoutingMode: Option[MessageRoutingMode] = None, roundRobinRouterBatchingPartitionSwitchFrequency: Option[Int] = None, producerName: Option[String] = None, sendTimeout: Option[FiniteDuration] = None, compressionType: Option[CompressionType] = None, additionalProperties: Map[String, AnyRef] = Map.empty)
Example 97
Source File: ConsumerConfig.scala From pulsar4s with Apache License 2.0 | 5 votes |
package com.sksamuel.pulsar4s import org.apache.pulsar.client.api._ import scala.concurrent.duration.FiniteDuration import scala.util.matching.Regex case class ConsumerConfig(subscriptionName: Subscription, topics: Seq[Topic] = Nil, topicPattern: Option[Regex] = None, consumerEventListener: Option[ConsumerEventListener] = None, cryptoFailureAction: Option[ConsumerCryptoFailureAction] = None, consumerName: Option[String] = None, cryptoKeyReader: Option[CryptoKeyReader] = None, autoUpdatePartitions: Option[Boolean] = None, maxTotalReceiverQueueSizeAcrossPartitions: Option[Int] = None, negativeAckRedeliveryDelay: Option[FiniteDuration] = None, patternAutoDiscoveryPeriod: Option[Int] = None, priorityLevel: Option[Int] = None, receiverQueueSize: Option[Int] = None, subscriptionInitialPosition: Option[SubscriptionInitialPosition] = None, subscriptionTopicsMode: Option[RegexSubscriptionMode] = None, subscriptionType: Option[SubscriptionType] = None, readCompacted: Option[Boolean] = None, ackTimeout: Option[FiniteDuration] = None, ackTimeoutTickTime: Option[FiniteDuration] = None, acknowledgmentGroupTime: Option[FiniteDuration] = None, additionalProperties: Map[String, AnyRef] = Map.empty, deadLetterPolicy: Option[DeadLetterPolicy] = None)
Example 98
Source File: LabelMatcher.scala From odinson with Apache License 2.0 | 5 votes |
package ai.lum.odinson.digraph import scala.util.matching.Regex import ai.lum.common.StringUtils._ sealed trait LabelMatcher { def matches(labelId: Int): Boolean } class RegexLabelMatcher(val regex: Regex, val vocabulary: Vocabulary) extends LabelMatcher { override def toString: String = s"/${regex.pattern.pattern.replaceAllLiterally("/", "\\/")}/" // mimic lucene regex behavior (i.e., regex must match the whole string) def matches(labelId: Int): Boolean = vocabulary.getTerm(labelId) match { case Some(regex.anchored(_*)) => true case _ => false } }
Example 99
Source File: MetricsConfig.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.util.Utils import org.apache.spark.{Logging, SparkConf} private[spark] class MetricsConfig(conf: SparkConf) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file setDefaultProperties(properties) loadPropertiesFromFile(conf.getOption("spark.metrics.conf")) // Also look for the properties in provided Spark configuration val prefix = "spark.metrics.conf." conf.getAll.foreach { case (k, v) if k.startsWith(prefix) => properties.setProperty(k.substring(prefix.length()), v) case _ => } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { val defaultProperty = propertyCategories(DEFAULT_PREFIX).asScala for((inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX); (k, v) <- defaultProperty if (prop.get(k) == null)) { prop.put(k, v) } } } def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] prop.asScala.foreach { kv => if (regex.findPrefixOf(kv._1.toString).isDefined) { val regex(prefix, suffix) = kv._1.toString subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2.toString) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } private[this] def loadPropertiesFromFile(path: Option[String]): Unit = { var is: InputStream = null try { is = path match { case Some(f) => new FileInputStream(f) case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME) } if (is != null) { properties.load(is) } } catch { case e: Exception => val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME) logError(s"Error loading configuration file $file", e) } finally { if (is != null) { is.close() } } } }
Example 100
Source File: DataClass.scala From gospeak with Apache License 2.0 | 5 votes |
package gospeak.libs.scala.domain import java.util.UUID import gospeak.libs.scala.Extensions._ import scala.util.Try import scala.util.matching.Regex abstract class DataClass(val value: String) { def canEqual(other: Any): Boolean = other.isInstanceOf[DataClass] override def equals(other: Any): Boolean = other match { case that: DataClass => (that canEqual this) && value == that.value case _ => false } override def hashCode(): Int = { val state = Seq(value) state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b) } override def toString: String = value } trait IId { val value: String } abstract class UuidIdBuilder[A <: IId](clazz: String, build: String => A) { def generate(): A = build(UUID.randomUUID().toString) def from(id: IId): A = build(id.value) def from(in: String): Either[CustomException, A] = { val errs = errors(in) if (errs.isEmpty) Right(build(in)) else Left(CustomException(s"'$in' is an invalid $clazz", errs)) } private[domain] def errors(in: String): Seq[CustomError] = { val tryUuid = Try(UUID.fromString(in)) Seq( tryUuid.failed.map(_.getMessage).toOption ).flatten.map(CustomError) } } trait ISlug { val value: String } abstract class SlugBuilder[A <: ISlug](clazz: String, build: String => A) { import SlugBuilder._ def from(in: String): Either[CustomException, A] = { val errs = errors(in) if (errs.isEmpty) Right(build(in)) else Left(CustomException(s"'$in' is an invalid $clazz", errs)) } private[domain] def errors(in: String): Seq[CustomError] = Seq( if (in.length > maxLength) Some(s"$clazz should not exceed $maxLength chars") else None, in match { case pattern() => None case _ => Some(s"do not match pattern $pattern") } ).flatten.map(CustomError) } object SlugBuilder { val maxLength: Int = Values.maxLength.title val pattern: Regex = "[a-z0-9-_]+".r } trait StringEnum extends Product with Serializable { def value: String } abstract class EnumBuilder[A <: StringEnum](clazz: String) { val all: Seq[A] def from(str: String): Either[CustomException, A] = all.find(_.value == str).toEither(CustomException(s"'$str' in not a valid $clazz")) }
Example 101
Source File: CommandComplete.scala From skunk with MIT License | 5 votes |
// Copyright (c) 2018-2020 by Rob Norris // This software is licensed under the MIT License (MIT). // For more information see LICENSE or https://opensource.org/licenses/MIT package skunk.net.message import scodec.Decoder import scodec.codecs._ import skunk.data.Completion import scala.util.matching.Regex case class CommandComplete(completion: Completion) extends BackendMessage object CommandComplete { final val Tag = 'C' private object Patterns { val Select: Regex = """SELECT (\d+)""".r val Delete: Regex = """DELETE (\d+)""".r val Update: Regex = """UPDATE (\d+)""".r val Insert: Regex = """INSERT (\d+ \d+)""".r } //TODO: maybe make lazy val def decoder: Decoder[CommandComplete] = cstring.map { case "BEGIN" => apply(Completion.Begin) case "COMMIT" => apply(Completion.Commit) case "CREATE INDEX" => apply(Completion.CreateIndex) case "DROP INDEX" => apply(Completion.DropIndex) case "LISTEN" => apply(Completion.Listen) case "LOCK TABLE" => apply(Completion.LockTable) case "NOTIFY" => apply(Completion.Notify) case "RESET" => apply(Completion.Reset) case "SET" => apply(Completion.Set) case "UNLISTEN" => apply(Completion.Unlisten) case "ROLLBACK" => apply(Completion.Rollback) case "SAVEPOINT" => apply(Completion.Savepoint) case "CREATE TABLE" => apply(Completion.CreateTable) case "DROP TABLE" => apply(Completion.DropTable) case "ALTER TABLE" => apply(Completion.AlterTable) case "CREATE SCHEMA" => apply(Completion.CreateSchema) case "DROP SCHEMA" => apply(Completion.DropSchema) case Patterns.Select(s) => apply(Completion.Select(s.toInt)) case Patterns.Delete(s) => apply(Completion.Delete(s.toInt)) case Patterns.Update(s) => apply(Completion.Delete(s.toInt)) case Patterns.Insert(s) => apply(Completion.Insert(s.drop(2).toInt)) // more .. fill in as we hit them case s => apply(Completion.Unknown(s)) } }
Example 102
Source File: PluginParser.scala From sbt-dependency-updates with Apache License 2.0 | 5 votes |
package org.jmotor.sbt.parser import sbt.ModuleID import scala.util.matching.Regex object PluginParser { lazy val AddSbtPluginRegex: Regex = """addSbtPlugin\("([\w\.-]+)" *%{1,2} *"([\w\.-]+)"\ *% *"([\w\.-]+)"\)""".r def parse(lines: Seq[String]): Seq[ModuleID] = { lines.map(_.trim).filter { line ⇒ line.nonEmpty && line.startsWith("addSbtPlugin") }.flatMap { case AddSbtPluginRegex(org, n, v) ⇒ Some(ModuleID(org, n, v)) case _ ⇒ None } } }
Example 103
Source File: VersionParser.scala From sbt-dependency-updates with Apache License 2.0 | 5 votes |
package org.jmotor.sbt.parser import scala.util.matching.Regex object VersionParser { lazy val VersionRegex: Regex = """val ?(\w+) ?= ?"(.*)"""".r lazy val VersionsObjectRegex: Regex = """[\t ]*object ?Versions ?\{([^{]*)[\t ]*\}""".r def parseVersionLines(text: String): Array[String] = { (for (m ← VersionsObjectRegex.findFirstMatchIn(text)) yield m.group(1)) match { case None ⇒ Array.empty case Some(v) ⇒ v.split("\n").map { line ⇒ line.replace("\t", "").trim }.filter(_.nonEmpty) } } }
Example 104
Source File: QueryConstants.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.parser.utilities import scala.util.matching.Regex object QueryConstants { val DDL_CREATE_STRING: String = "CREATE" val DDL_DROP_STRING: String = "DROP" val DDL_TRUNCATE_STRING: String = "TRUNCATE" val DDL_ALTER_STRING: String = "ALTER" val DDL_FROM_STRING: String = "FROM" val DDL_DELETE_STRING: String = "DELETE" val DML_INSERT_STRING: String = "INSERT" val DML_UPDATE_STRING: String = "UPDATE" val SQL_SELECT_STRING: String = "SEL" val SQL_LIMIT_STRING: String = "LIMIT" val IS_EXECUTE_QUERY: Seq[String] = Seq(SQL_SELECT_STRING, "EXPLAIN") val IS_DDL_QUERY: Seq[String] = Seq(DDL_CREATE_STRING, DDL_DROP_STRING, DDL_TRUNCATE_STRING, DDL_ALTER_STRING) val IS_DDL_DML_QUERY: Seq[String] = IS_DDL_QUERY ++ Seq( DDL_DELETE_STRING, DML_INSERT_STRING, DML_UPDATE_STRING, "COLLECT" ) val SAMPLE_ROWS_IDENTIFIER: String = "samples are specified as a number of rows" val SAMPLE_ROWS_PATTERN: Regex = s"\\b$SAMPLE_ROWS_IDENTIFIER\\b".r val NEW_LINE: String = "\n" val SPACE_CHAR: String = " " val SQL_COMMENT : String = "--" val SEMI_COLON: String = ";" val TERADATA_ANALYTICAL_FUNCTIONS: Seq[String] = Seq("GROUP", "COUNT", "HAVING", "OVER", "ORDER", "SUM", "MIN", "MAX", "SUM", "FIRST_VALUE", "LAST_VALUE", "ROW_NUMBER", "RANK", "DENSE_RANK") val GTS_GET_ALL_TABLES_DEFAULT_SEARCH_LIST: Set[String] = Set("FROM", "JOIN") val GTS_GET_ALL_TABLES_ALL_SQL_TYPES_SEARCH_LIST: Set[String] = Set("INTO", "VIEW", "TABLE", "FROM", "JOIN", "DESCRIBE") }
Example 105
Source File: OutputTransformer.scala From play-swagger with Apache License 2.0 | 5 votes |
package com.iheart.playSwagger import java.util.regex.Pattern import com.iheart.playSwagger.OutputTransformer.SimpleOutputTransformer import play.api.libs.json.{ JsArray, JsString, JsValue, JsObject } import scala.util.matching.Regex import scala.util.{ Success, Failure, Try } def >=>(b: JsObject ⇒ Try[JsObject]): OutputTransformer = SimpleOutputTransformer { value: JsObject ⇒ this.apply(value).flatMap(b) } } object OutputTransformer { final case class SimpleOutputTransformer(run: (JsObject ⇒ Try[JsObject])) extends OutputTransformer { override def apply(value: JsObject): Try[JsObject] = run(value) } def traverseTransformer(vals: JsArray)(transformer: JsValue ⇒ Try[JsValue]): Try[JsArray] = { val tryElements = vals.value.map { case value: JsObject ⇒ traverseTransformer(value)(transformer) case value: JsArray ⇒ traverseTransformer(value)(transformer) case value: JsValue ⇒ transformer(value) } val failures: Seq[Failure[JsValue]] = tryElements.filter(_.isInstanceOf[Failure[_]]).asInstanceOf[Seq[Failure[JsValue]]] if (failures.nonEmpty) { Failure(failures.head.exception) } else { Success(JsArray(tryElements.asInstanceOf[Seq[Success[JsValue]]].map(_.value))) } } def traverseTransformer(obj: JsObject)(transformer: JsValue ⇒ Try[JsValue]): Try[JsObject] = { val tryFields = obj.fields.map { case (key, value: JsObject) ⇒ (key, traverseTransformer(value)(transformer)) case (key, values: JsArray) ⇒ (key, traverseTransformer(values)(transformer)) case (key, value: JsValue) ⇒ (key, transformer(value)) } val failures: Seq[(String, Failure[JsValue])] = tryFields .filter(_._2.isInstanceOf[Failure[_]]) .asInstanceOf[Seq[(String, Failure[JsValue])]] if (failures.nonEmpty) { Failure(failures.head._2.exception) } else { Success(JsObject(tryFields.asInstanceOf[Seq[(String, Success[JsValue])]].map { case (key, Success(result)) ⇒ (key, result) })) } } } class PlaceholderVariablesTransformer(map: String ⇒ Option[String], pattern: Regex = "^\\$\\{(.*)\\}$".r) extends OutputTransformer { def apply(value: JsObject) = OutputTransformer.traverseTransformer(value) { case JsString(pattern(key)) ⇒ map(key) match { case Some(result) ⇒ Success(JsString(result)) case None ⇒ Failure(new IllegalStateException(s"Unable to find variable $key")) } case e: JsValue ⇒ Success(e) } } final case class MapVariablesTransformer(map: Map[String, String]) extends PlaceholderVariablesTransformer(map.get) class EnvironmentVariablesTransformer extends PlaceholderVariablesTransformer((key: String) ⇒ Option(System.getenv(key)))
Example 106
Source File: NamingStrategy.scala From play-swagger with Apache License 2.0 | 5 votes |
package com.iheart.playSwagger import scala.util.matching.Regex sealed abstract class NamingStrategy(f: String ⇒ String) extends (String ⇒ String) { override def apply(keyName: String): String = f(keyName) } object NamingStrategy { val regex: Regex = "[A-Z\\d]".r object None extends NamingStrategy(identity) object SnakeCase extends NamingStrategy(x ⇒ regex.replaceAllIn(x, { m ⇒ "_" + m.group(0).toLowerCase() })) object KebabCase extends NamingStrategy(x ⇒ regex.replaceAllIn(x, { m ⇒ "-" + m.group(0).toLowerCase() })) object LowerCase extends NamingStrategy(x ⇒ regex.replaceAllIn(x, { m ⇒ m.group(0).toLowerCase() })) object UpperCamelCase extends NamingStrategy(x ⇒ { val (head, tail) = x.splitAt(1) head.toUpperCase() + tail }) def from(naming: String): NamingStrategy = naming match { case "snake_case" ⇒ SnakeCase case "kebab-case" ⇒ KebabCase case "lowercase" ⇒ LowerCase case "UpperCamelCase" ⇒ UpperCamelCase case _ ⇒ None } }
Example 107
Source File: StringFunctions.scala From albedo with MIT License | 5 votes |
package ws.vinta.albedo.closures import scala.util.matching.Regex object StringFunctions extends Serializable { val wordPatternEngOnly = """\w\.\-_""" val wordPatternIncludeCJK = """\w\.\-_\p{InHiragana}\p{InKatakana}\p{InBopomofo}\p{InCJKCompatibilityIdeographs}\p{InCJKUnifiedIdeographs}""" val reExtractWords: Regex = s"([$wordPatternEngOnly]+)".r val reExtractWordsIncludeCJK: Regex = s"([$wordPatternIncludeCJK]+)".r val reExtractEmailDomain: Regex = s"@([$wordPatternEngOnly]+)".r def extractWords(text: String): List[String] = { reExtractWords.findAllIn(text).toList } def extractWordsIncludeCJK(text: String): List[String] = { reExtractWordsIncludeCJK.findAllIn(text).toList } def extractEmailDomain(email: String): String = { try { reExtractEmailDomain.findAllIn(email).matchData.toList(0).group(1) } catch { case _: IndexOutOfBoundsException => { email } } } }
Example 108
Source File: ConfigReader.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.common.config import java.util.{Map => JMap} import scala.collection.mutable import scala.util.matching.Regex private object ConfigReader { private val REF_REGEX = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } private def getOrDefault(conf: ConfigProvider, key: String): Option[String] = { conf.get(key).orElse { ConfigEntry.findEntry(key) match { case e: ConfigEntryWithDefault[_] => Option(e.defaultValueString) case e: ConfigEntryWithDefaultString[_] => Option(e.defaultValueString) case e: ConfigEntryWithDefaultFunction[_] => Option(e.defaultValueString) case e: FallbackConfigEntry[_] => getOrDefault(conf, e.fallback.key) case _ => None } } } }
Example 109
Source File: macro_impl.scala From lms-clean with BSD 3-Clause "New" or "Revised" License | 5 votes |
package lms.experimental //import language.experimental.macros import scala.annotation.StaticAnnotation import scala.reflect.macros.whitebox.Context import scala.util.matching.Regex import scala.collection.mutable object ir_impl { def impl(c: Context)(annottees: c.Expr[Any]*): c.Expr[Any] = { import c.universe._ println("YOUPI") println(c.prefix) val List(a) = annottees println(a) a.tree match { case q"def $name[..$t](..$args): $tpe" => val args1 = args map { case ValDef(_,x,_,_) => q"ref($x)" } return c.Expr(q"def $name[..$t](..$args): $tpe = reflect[$tpe](${name.toString},..$args1)") case q"def $name[..$t](..$args): $tpe = $body" => // TODO: strip by-name type val args0 = args map { case ValDef(_,x,_,_) => q"$x" } val args1 = args map { case ValDef(_,x,_,_) => q"ref($x)" } val name_next = TermName(name.toString + "_next") // TODO: what if we have type parameters? just disallow? return c.Expr(q""" lower((..$args) => Rewrite[$tpe]($name(..$args0), $name_next(..$args0))) def $name[..$t](..$args): $tpe = reflect[$tpe](${name.toString},..$args1) def $name_next[..$t](..$args): $tpe = $body """) // TODO class def //case t@ClassDef(_,_,_,_) => } } }
Example 110
Source File: DataPreprocess.scala From xgbspark-text-classification with Apache License 2.0 | 5 votes |
package com.lenovo.ml import org.apache.spark.sql.{SparkSession, DataFrame, Dataset} import scala.collection.mutable import scala.util.matching.Regex import org.ansj.library.DicLibrary import org.ansj.recognition.impl.StopRecognition import org.ansj.splitWord.analysis.DicAnalysis object DataPreprocess { def textCleaner(sparkSession: SparkSession, rawText: DataFrame): Dataset[String] = { // 过滤文本中的时间、网址和邮箱 val regex1 = new Regex("""[-—0-9a-z]+[:]+[0-9a-z]+[:]?""") val regex2 = new Regex("""[0-9]+年|[0-9]+月|[0-9]+[日]|[0-9]+[天]|[0-9]+[号]|[0-9]+[次]""") val regex3 = new Regex("""http[s]?://[a-z0-9./?=_-]+""") val regex4 = new Regex("""[0-9_a-z]+([-+.][0-9_a-z]+)*@[0-9_a-z]+([-.][0-9_a-z]+)*\.[0-9_a-z]+([-.][0-9_a-z]+)*""") import sparkSession.implicits._ rawText.map(x => x.toString).map(x => x.substring(1,x.length - 1).toLowerCase).map(x => regex1.replaceAllIn(x,"")) .map(x => regex2.replaceAllIn(x,"")).map(x => regex3.replaceAllIn(x,"")).map(x => regex4.replaceAllIn(x,"")) } def segWords(sparkSession: SparkSession, stopWordsPath: String, dictionaryPath: String, synonymWordsPath: String, singleWordsPath: String, rawText: DataFrame): DataFrame = { val filter = new StopRecognition() // 设定停用词性 filter.insertStopNatures("w","ns","nr","t","r","u","e","y","o") // 加载停用词表 val stopWords = sparkSession.sparkContext.textFile(stopWordsPath).cache() stopWords.collect().foreach{line => filter.insertStopWords(line)} // 加载自定义词表 val dictionary = sparkSession.sparkContext.textFile(dictionaryPath).cache() dictionary.collect().foreach{line => DicLibrary.insert(DicLibrary.DEFAULT, line)} stopWords.collect().foreach{line => DicLibrary.insert(DicLibrary.DEFAULT, line)} // 构建同义词表 val synonymWords = sparkSession.sparkContext.textFile(synonymWordsPath).cache() var synonymMap: Map[String, String] = Map() synonymWords.collect().foreach{line => val data = line.split(" ",2) synonymMap = synonymMap + (data(0) -> data(1)) } // 构建单字白名单 val singleWords = sparkSession.sparkContext.textFile(singleWordsPath).cache() val singleWhiteList: mutable.Set[String] = mutable.Set() singleWords.collect().foreach{line => singleWhiteList.add(line)} // 通过广播将词表发送给各节点 val stop = sparkSession.sparkContext.broadcast(filter) val dic = sparkSession.sparkContext.broadcast(DicLibrary.get(DicLibrary.DEFAULT)) val synonym = sparkSession.sparkContext.broadcast(synonymMap) val single = sparkSession.sparkContext.broadcast(singleWhiteList) // 读取文本数据,过滤后分词 import sparkSession.implicits._ textCleaner(sparkSession, rawText).map { x => val parse = DicAnalysis.parse(x, dic.value).recognition(stop.value) // 抽取分词结果,不附带词性 val words = for(i<-Range(0,parse.size())) yield parse.get(i).getName val filterWords = words.map(_.trim).filter(x => x.length > 1 || single.value.contains(x)) filterWords.map(x => if(synonym.value.contains(x)) synonym.value(x) else x).mkString(" ") }.toDF("words") } }
Example 111
Source File: FullId.scala From shapenet-viewer with MIT License | 5 votes |
package edu.stanford.graphics.shapenet.common import java.io.File import scala.util.matching.Regex case class FullId(source: String, id: String) { lazy val fullid = source + "." + id } object FullId { val fullIdRegex = new Regex("([a-zA-z0-9_-]+)\\.([a-zA-z0-9_-]+)") def apply(fullid: String, defaultSource: Option[String] = None): FullId = { val dotIndex = fullid.indexOf('.') val (source, id) = if (fullid.startsWith("http://") || fullid.startsWith("https://")) { ("raw", fullid) } else if (fullid.startsWith("file://")) { ("raw", fullid.substring(7)) } else if (fullid.startsWith("/")) { ("raw", fullid) } else if (new File(fullid).isAbsolute) { ("raw", fullid) } else if (dotIndex > 0) { (fullid.substring(0, dotIndex), fullid.substring(dotIndex + 1)) } else { val s = defaultSource.getOrElse(if (fullid.contains("scene")) "wssScenes" else "3dw") (s, fullid) } new FullId(source,id) } def matches(id1: String, id2: String): Boolean = { val f1 = FullId(id1) val f2 = FullId(id2) f1 == f2 } def isFullId(s: String): Boolean = { fullIdRegex.pattern.matcher(s).matches() } }
Example 112
Source File: InMemoryJavaSource.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.models.h2o.compiler import java.net.URI import javax.tools.JavaFileObject.Kind import javax.tools.SimpleJavaFileObject import com.eharmony.aloha.reflect.{RefInfo, RefInfoOps} import scala.util.matching.Regex private[h2o] case class InMemoryJavaSource[B](code: String, className: String)(implicit baseClassInfo: RefInfo[B]) extends SimpleJavaFileObject(InMemoryJavaSource.classNameToUri(className), Kind.SOURCE) { override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = code } private[h2o] object InMemoryJavaSource { val pkgRe = """package\s+([a-z_][a-zA-Z0-9_]*(\.[a-z_][a-zA-Z0-9_]*)*)\s*;""".r def fromString[B](code: String)(implicit baseClass: RefInfo[B]): Option[InMemoryJavaSource[B]] = { val classNameRe = classNameRegex[B](code) val className = determineClassName(code, classNameRe) map determineCanonicalClassName(code) className map (cn => new InMemoryJavaSource[B](code, cn)) } def classNameRegex[B](code: String)(implicit baseClass: RefInfo[B]): Regex = { val ext = if (RefInfoOps.isJavaInterface[B]) "implements" else "extends" val re = RefInfoOps.classRegex[B].toString() ("""public\s+class\s+([A-Za-z_][0-9A-Za-z_]*)\s+""" + ext + """\s+""" + re + """[\s<\{]""").r } def determineClassName(code: String, classNameRegex: Regex): Option[String] = classNameRegex.findFirstMatchIn(code) map { _.group(1) } def determineCanonicalClassName(code: String)(className: String) = (pkgRe.findFirstIn(code) map { case pkgRe(p, _) => s"$p." } getOrElse "") + className def classNameToUri(className: String): URI = URI.create("string:///" + className.replace(".", "/") + Kind.SOURCE.extension) }
Example 113
Source File: JacksonInstances.scala From circe-jackson with Apache License 2.0 | 5 votes |
package io.circe.jackson import cats.Eq import cats.instances.list._ import cats.instances.map._ import io.circe.{ Json, JsonBigDecimal, JsonBiggerDecimal, JsonDecimal, JsonDouble, JsonFloat, JsonLong, JsonNumber } import io.circe.Json.{ JArray, JNumber, JObject, JString } import io.circe.numbers.BiggerDecimal import io.circe.testing.ArbitraryInstances import org.scalacheck.Arbitrary import scala.util.matching.Regex import scala.util.Try import java.nio.ByteBuffer trait JacksonInstances { this: ArbitraryInstances => def cleanNumber(n: JsonNumber): JsonNumber = n.toString match { case SigExpPattern(exp) if !Try(exp.toLong).toOption.exists(_ <= Short.MaxValue.toLong) => replacement case _ => n match { case v @ JsonDecimal(_) => cleanNumber(JsonBiggerDecimal(v.toBiggerDecimal, v.toString)) case v @ JsonBiggerDecimal(value, _) => value.toBigDecimal.map(BigDecimal(_)).fold(replacement) { d => val fromBigDecimal = BiggerDecimal.fromBigDecimal(d.bigDecimal) if (fromBigDecimal == value && d.abs <= BigDecimal(Double.MaxValue)) v else JsonBiggerDecimal(fromBigDecimal, fromBigDecimal.toString) } case v @ JsonBigDecimal(_) => v case v @ JsonDouble(_) => v case v @ JsonFloat(_) => v case v @ JsonLong(_) => v } } def cleanNumbers(json: Json): Json = json.mapNumber(cleanNumber).mapArray(_.map(cleanNumbers)).mapObject(_.mapValues(cleanNumbers)) val arbitraryCleanedJson: Arbitrary[Json] = Arbitrary(Arbitrary.arbitrary[Json].map(cleanNumbers)) }
Example 114
Source File: Template.scala From scala-clippy with Apache License 2.0 | 5 votes |
package com.softwaremill.clippy import java.util.regex.Pattern import scala.util.Try import scala.util.matching.Regex sealed trait Template { def v: String } case class ExactT(v: String) extends Template { override def toString = v } case class RegexT(v: String) extends Template { lazy val regex = Try(new Regex(v)).getOrElse(new Regex("^$")) def matches(e: ExactT): Boolean = regex.pattern.matcher(e.v).matches() override def toString = v } object RegexT { def fromPattern(pattern: String): RegexT = { val regexp = pattern .split("\\*", -1) .map(el => if (el != "") Pattern.quote(el) else el) .flatMap(el => List(".*", el)) .tail .filter(_.nonEmpty) .mkString("") RegexT.fromRegex(regexp) } def fromRegex(v: String): RegexT = new RegexT(v) def setMatches(rr: Set[RegexT], ee: Set[ExactT]): Boolean = if (rr.size != ee.size) false else { rr.toList.forall { r => ee.exists(r.matches) } } }
Example 115
Source File: package.scala From twitter4s with Apache License 2.0 | 5 votes |
package com.danielasfregola.twitter4s import java.net.URLEncoder import scala.util.matching.Regex package object http { implicit class RichString(val value: String) extends AnyVal { def urlEncoded = { val urlEncodePattern = """\+|\*|%7E""".r val replacer: Regex.Match => String = m => m.group(0) match { case "+" => "%20" case "*" => "%2A" case "%7E" => "~" } val encodedValue = URLEncoder.encode(value, "UTF-8") urlEncodePattern.replaceAllIn(encodedValue, replacer) } } }
Example 116
Source File: CliUtils.scala From flamy with Apache License 2.0 | 5 votes |
package com.flaminem.flamy.utils import com.flaminem.flamy.parsing.ParsingUtils import scala.util.matching.Regex import scala.util.matching.Regex.Groups def split(string: String): Seq[String] = { def replacer(m: Regex.Match): String = { m match { case Groups(quote, s) => Regex.quoteReplacement(removeBackSlashes(s, quote)) } } for { Groups(arg, _, _) <- argRegex.findAllMatchIn(string).toSeq } yield { if(arg == null){ "" } else { ParsingUtils.quotedStringRegex.replaceAllIn(arg, replacer _) } } } }
Example 117
Source File: TableName.scala From flamy with Apache License 2.0 | 5 votes |
package com.flaminem.flamy.model.names import com.flaminem.flamy.parsing.ParsingUtils import org.rogach.scallop.ValueConverter import scala.language.implicitConversions import scala.reflect.runtime.universe._ import scala.util.matching.Regex class TableName private (val fullName: String) extends ItemName { lazy val schemaName: SchemaName = SchemaName(fullName.split('.')(0)) lazy val name: String = fullName.split('.')(1) def isInSchema(schema: ItemName): Boolean = schema match { case s: SchemaName => s.equals(schemaName) case default => false } override def isInOrEqual(that: ItemName): Boolean = that match { case name: SchemaName => this.isInSchema(name) case name: TableName => name==this case _ => false } } object TableName { // TODO: during parsing, table names with wrong names are allowed. We should probably add some safety about that. def apply(fullName: String): TableName = { new TableName(fullName.toLowerCase) } def unapply(tableName: TableName): Option[String] = Some(tableName.fullName) def apply(schemaName: String, tableName: String): TableName = new TableName(schemaName.toLowerCase + "." + tableName.toLowerCase) def apply(schemaName: SchemaName, tableName: String): TableName = new TableName(schemaName.fullName + "." + tableName.toLowerCase) implicit val order: Ordering[TableName] = new Ordering[TableName]{ override def compare(x: TableName, y: TableName): Int = x.fullName.compareTo(y.fullName) } val t: String = ParsingUtils.t val tableRegex: Regex = s"\\A$t[.]$t\\z".r def parse(s: String): Option[TableName] = { s match { case tableRegex() => Some(new TableName(s.toLowerCase)) case _ => None } } private def fromArg(arg: String): Either[String, Option[TableName]] = { val res: Option[TableName] = parse(arg) if(res.isDefined){ Right(Some(res.get)) } else { Left("") } } private def fromArgs(args: Seq[String]): Either[String, Option[List[TableName]]] = { val tries: Seq[Option[TableName]] = args.map{parse} if(tries.forall{_.isDefined}){ Right(Some(tries.map{_.get}.toList)) } else { Left("") } } implicit val scallopConverter: ValueConverter[TableName] = { new ValueConverter[TableName] { override def parse(s: List[(String, List[String])]): Either[String, Option[TableName]] = { s match { case l if l.nonEmpty => fromArg(l.flatMap{_._2}.head) case Nil => Right(None) } } override val tag: TypeTag[TableName] = typeTag[TableName] override val argType = org.rogach.scallop.ArgType.SINGLE } } implicit val scallopConverterList: ValueConverter[List[TableName]] = { new ValueConverter[List[TableName]] { override def parse(s: List[(String, List[String])]): Either[String, Option[List[TableName]]] = { s match { case l if l.nonEmpty => fromArgs(l.flatMap{_._2}) case Nil => Right(None) } } override val tag: TypeTag[List[TableName]] = typeTag[List[TableName]] override val argType = org.rogach.scallop.ArgType.LIST } } implicit def fromString(s: String): TableName = TableName(s) }
Example 118
Source File: SchemaName.scala From flamy with Apache License 2.0 | 5 votes |
package com.flaminem.flamy.model.names import com.flaminem.flamy.parsing.ParsingUtils import scala.language.implicitConversions import scala.util.matching.Regex class SchemaName private (val fullName: String) extends ItemName { def name: String = fullName override def isInOrEqual(that: ItemName): Boolean = that match { case name: SchemaName => name==this case _ => false } } object SchemaName { def apply(fullName: String): SchemaName = { parse(fullName).getOrElse{ throw new IllegalArgumentException( s"$fullName is not a valid name for schemas. Valid names only contain alphabet characters, numbers and _." ) } } val t: String = ParsingUtils.t val schemaRegex: Regex = s"\\A$t\\z".r def parse(s: String): Option[SchemaName] = { s match { case schemaRegex() => Some(new SchemaName(s)) case _ => None } } def unapply(schemaName: SchemaName): Option[String] = Some(schemaName.fullName) implicit val order: Ordering[SchemaName] = new Ordering[SchemaName]{ override def compare(x: SchemaName, y: SchemaName): Int = x.fullName.compareTo(y.fullName) } implicit def toString(schema: SchemaName): String = schema.fullName }
Example 119
Source File: FlamyParsingException.scala From flamy with Apache License 2.0 | 5 votes |
package com.flaminem.flamy.parsing.hive import com.flaminem.flamy.exec.utils.io.FlamyOutput import com.flaminem.flamy.model.exceptions.FlamyException import com.flaminem.flamy.utils.TextUtils import scala.util.matching.Regex class FlamyParsingException( val query: Option[String], message: Option[String], cause: Option[Throwable] ) extends FlamyException(message, cause) { def this(message: String) { this(None, Option(message),None) } def this(cause: Throwable) { this(None, Option(cause.getMessage),Option(cause)) } def this(message: String, cause: Throwable) { this(None, Option(message),Option(cause)) } } object FlamyParsingException { private val lineNumPattern: Regex = "^line (\\d+):(\\d+) .*".r @throws(classOf[FlamyException]) def apply(query: String, e: Throwable, verbose: Boolean): FlamyParsingException = { val message: String = e.getMessage if (verbose) { FlamyOutput.err.error("Error parsing query:\n\n" + query + "\n\n" + Option(message).map{_ + "\n"}.getOrElse("")) } val querySample: Option[String] = if(Option(message).isDefined){ lineNumPattern.findFirstMatchIn(message) match { case Some(matcher) => val lineNumber: Int = matcher.group(1).toInt val charNumber: Int = matcher.group(2).toInt Some( message + "\n- " + TextUtils.getLine(query, lineNumber - 1) + "\n>>> " + TextUtils.getLine(query, lineNumber) + "\n- " + TextUtils.getLine(query, lineNumber + 1) ) case None => Some(message) } } else { None } new FlamyParsingException(Some(query), querySample, Some(e)) } }
Example 120
Source File: TableGraphFullExporter.scala From flamy with Apache License 2.0 | 5 votes |
package com.flaminem.flamy.graph.export import com.flaminem.flamy.graph.TableGraph import com.flaminem.flamy.model.Column import com.flaminem.flamy.model.names.TableName import scala.util.matching.Regex class TableGraphFullExporter(preTableGraph: TableGraph) extends TableGraphExporter(preTableGraph){ val STRUCT_RE: Regex = "[^,<>&;]+:[^,<>&;]+".r def xmlEncode(s: String): String = s.replace("<","<").replace(">",">") def formatColumnType(column: Column): String = { val s = xmlEncode(column.columnType.getOrElse("").toUpperCase) if (s.contains("STRUCT")) { var res = s for (couple <- STRUCT_RE.findAllIn(s)) { val Array(col,typ) = couple.split(':') res = res.replace(couple,s"<BR/> ${col.toLowerCase}:${typ.toUpperCase}") } res } else { s } } def formatColumn(column: Column): Seq[AlignedString] = { val pk = Option(column.getComment) match { case Some(comment) if comment.toUpperCase.startsWith("PK") => "\u26B7 " case _ => " " } Seq(AlignedString(pk + column.columnName), AlignedString(" " + formatColumnType(column))) } override def formatTableVertex(td: TableName): Seq[Seq[Seq[AlignedString]]] = { val nodeName = td.name val header: Seq[Seq[AlignedString]] = Seq(Seq(AlignedString(nodeName, Alignment.CENTER))) val columns: Seq[Seq[AlignedString]] = model.getTable(td.fullName).toSeq.flatMap{table => table.columns.map{formatColumn}} val partitions: Seq[Seq[AlignedString]] = model.getTable(td.fullName).toSeq.flatMap{table => table.partitions.map{formatColumn}} Seq(header,columns,partitions) } }
Example 121
Source File: doc.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import scala.util.matching.Regex object Doc { val isabelle_tool = Isabelle_Tool("doc", "view Isabelle documentation", args => { val getopts = Getopts(""" Usage: isabelle doc [DOC ...] View Isabelle documentation. """) val docs = getopts(args) val entries = contents() if (docs.isEmpty) Output.writeln(cat_lines(contents_lines().map(_._2)), stdout = true) else { docs.foreach(doc => entries.collectFirst { case Doc(name, _, path) if doc == name => path } match { case Some(path) => view(path) case None => error("No Isabelle documentation entry: " + quote(doc)) } ) } }) }
Example 122
Source File: invoke_scala.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.lang.reflect.{Method, Modifier, InvocationTargetException} import scala.util.matching.Regex object Invoke_Scala { class Invoke_Scala extends Session.Protocol_Handler { private var session: Session = null private var futures = Map.empty[String, Future[Unit]] override def init(init_session: Session): Unit = synchronized { session = init_session } override def exit(): Unit = synchronized { for ((id, future) <- futures) cancel(id, future) futures = Map.empty } private def fulfill(id: String, tag: Invoke_Scala.Tag.Value, res: String): Unit = synchronized { if (futures.isDefinedAt(id)) { session.protocol_command("Invoke_Scala.fulfill", id, tag.id.toString, res) futures -= id } } private def cancel(id: String, future: Future[Unit]) { future.cancel fulfill(id, Invoke_Scala.Tag.INTERRUPT, "") } private def invoke_scala(msg: Prover.Protocol_Output): Boolean = synchronized { msg.properties match { case Markup.Invoke_Scala(name, id) => futures += (id -> Future.fork { val (tag, result) = Invoke_Scala.method(name, msg.text) fulfill(id, tag, result) }) true case _ => false } } private def cancel_scala(msg: Prover.Protocol_Output): Boolean = synchronized { msg.properties match { case Markup.Cancel_Scala(id) => futures.get(id) match { case Some(future) => cancel(id, future) case None => } true case _ => false } } val functions = List( Markup.INVOKE_SCALA -> invoke_scala _, Markup.CANCEL_SCALA -> cancel_scala _) }
Example 123
Source File: doc.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import scala.util.matching.Regex object Doc { val isabelle_tool = Isabelle_Tool("doc", "view Isabelle documentation", args => { val getopts = Getopts(""" Usage: isabelle doc [DOC ...] View Isabelle documentation. """) val docs = getopts(args) val entries = contents() if (docs.isEmpty) Output.writeln(cat_lines(contents_lines().map(_._2)), stdout = true) else { docs.foreach(doc => entries.collectFirst { case Doc(name, _, path) if doc == name => path } match { case Some(path) => view(path) case None => error("No Isabelle documentation entry: " + quote(doc)) } ) } }) }
Example 124
Source File: invoke_scala.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.lang.reflect.{Method, Modifier, InvocationTargetException} import scala.util.matching.Regex object Invoke_Scala { class Invoke_Scala extends Session.Protocol_Handler { private var session: Session = null private var futures = Map.empty[String, Future[Unit]] override def init(init_session: Session): Unit = synchronized { session = init_session } override def exit(): Unit = synchronized { for ((id, future) <- futures) cancel(id, future) futures = Map.empty } private def fulfill(id: String, tag: Invoke_Scala.Tag.Value, res: String): Unit = synchronized { if (futures.isDefinedAt(id)) { session.protocol_command("Invoke_Scala.fulfill", id, tag.id.toString, res) futures -= id } } private def cancel(id: String, future: Future[Unit]) { future.cancel fulfill(id, Invoke_Scala.Tag.INTERRUPT, "") } private def invoke_scala(msg: Prover.Protocol_Output): Boolean = synchronized { msg.properties match { case Markup.Invoke_Scala(name, id) => futures += (id -> Future.fork { val (tag, result) = Invoke_Scala.method(name, msg.text) fulfill(id, tag, result) }) true case _ => false } } private def cancel_scala(msg: Prover.Protocol_Output): Boolean = synchronized { msg.properties match { case Markup.Cancel_Scala(id) => futures.get(id) match { case Some(future) => cancel(id, future) case None => } true case _ => false } } val functions = List( Markup.INVOKE_SCALA -> invoke_scala _, Markup.CANCEL_SCALA -> cancel_scala _) }
Example 125
Source File: doc.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import scala.util.matching.Regex object Doc { val isabelle_tool = Isabelle_Tool("doc", "view Isabelle documentation", args => { val getopts = Getopts(""" Usage: isabelle doc [DOC ...] View Isabelle documentation. """) val docs = getopts(args) val entries = contents() if (docs.isEmpty) Console.println(cat_lines(contents_lines().map(_._2))) else { docs.foreach(doc => entries.collectFirst { case Doc(name, _, path) if doc == name => path } match { case Some(path) => view(path) case None => error("No Isabelle documentation entry: " + quote(doc)) } ) } }) }
Example 126
Source File: invoke_scala.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.lang.reflect.{Method, Modifier, InvocationTargetException} import scala.util.matching.Regex object Invoke_Scala { class Invoke_Scala extends Session.Protocol_Handler { private var session: Session = null private var futures = Map.empty[String, Future[Unit]] override def init(init_session: Session): Unit = synchronized { session = init_session } override def exit(): Unit = synchronized { for ((id, future) <- futures) cancel(id, future) futures = Map.empty } private def fulfill(id: String, tag: Invoke_Scala.Tag.Value, res: String): Unit = synchronized { if (futures.isDefinedAt(id)) { session.protocol_command("Invoke_Scala.fulfill", id, tag.id.toString, res) futures -= id } } private def cancel(id: String, future: Future[Unit]) { future.cancel fulfill(id, Invoke_Scala.Tag.INTERRUPT, "") } private def invoke_scala(msg: Prover.Protocol_Output): Boolean = synchronized { msg.properties match { case Markup.Invoke_Scala(name, id) => futures += (id -> Future.fork { val (tag, result) = Invoke_Scala.method(name, msg.text) fulfill(id, tag, result) }) true case _ => false } } private def cancel_scala(msg: Prover.Protocol_Output): Boolean = synchronized { msg.properties match { case Markup.Cancel_Scala(id) => futures.get(id) match { case Some(future) => cancel(id, future) case None => } true case _ => false } } val functions = List( Markup.INVOKE_SCALA -> invoke_scala _, Markup.CANCEL_SCALA -> cancel_scala _) }
Example 127
Source File: ParadoxKeys.scala From paradox with Apache License 2.0 | 5 votes |
package com.lightbend.paradox.sbt import sbt._ import com.lightbend.paradox.ParadoxProcessor import com.lightbend.paradox.markdown.{ Directive, Writer } import com.lightbend.paradox.template.PageTemplate import scala.util.matching.Regex trait ParadoxKeys { val paradox = taskKey[File]("Build the paradox site.") val paradoxMarkdownToHtml = taskKey[Seq[(File, String)]]("Convert markdown files to HTML.") val paradoxNavigationDepth = settingKey[Int]("Determines depth of TOC for page navigation.") val paradoxNavigationExpandDepth = settingKey[Option[Int]]("Depth of auto-expanding navigation below the active page.") val paradoxNavigationIncludeHeaders = settingKey[Boolean]("Whether to include headers in the navigation.") @deprecated("Enumerate the roots in `paradoxRoots`", since = "0.6.1") val paradoxExpectedNumberOfRoots = settingKey[Int]("How many ToC roots to expect.") val paradoxRoots = settingKey[List[String]]("Which ToC roots (pages without parent) to expect.") val paradoxLeadingBreadcrumbs = settingKey[List[(String, String)]]("Any leading breadcrumbs (label -> url)") val paradoxIllegalLinkPath = settingKey[Regex]("Path pattern to fail site creation (eg. to protect against missing `@ref` for links).") val paradoxOrganization = settingKey[String]("Paradox dependency organization (for theme dependencies).") val paradoxDirectives = taskKey[Seq[Writer.Context => Directive]]("Enabled paradox directives.") val paradoxProcessor = taskKey[ParadoxProcessor]("ParadoxProcessor to use when generating the site.") val paradoxProperties = taskKey[Map[String, String]]("Property map passed to paradox.") val paradoxSourceSuffix = settingKey[String]("Source file suffix for markdown files [default = \".md\"].") val paradoxTargetSuffix = settingKey[String]("Target file suffix for HTML files [default = \".html\"].") val paradoxTheme = settingKey[Option[ModuleID]]("Web module name of the paradox theme, otherwise local template.") val paradoxThemeDirectory = taskKey[File]("Sync combined theme and local template to a directory.") val paradoxOverlayDirectories = settingKey[Seq[File]]("Directory containing common source files for configuration.") val paradoxDefaultTemplateName = settingKey[String]("Name of default template for generating pages.") val paradoxTemplate = taskKey[PageTemplate]("PageTemplate to use when generating HTML pages.") val paradoxVersion = settingKey[String]("Paradox plugin version.") val paradoxGroups = settingKey[Map[String, Seq[String]]]("Paradox groups.") val paradoxBrowse = taskKey[Unit]("Open the docs in the default browser") val paradoxValidateInternalLinks = taskKey[Unit]("Validate internal, non ref paradox links.") val paradoxValidateLinks = taskKey[Unit]("Validate all non ref paradox links.") val paradoxValidationIgnorePaths = settingKey[List[Regex]]("List of regular expressions to apply to paths to determine if they should be ignored.") val paradoxValidationSiteBasePath = settingKey[Option[String]]("The base path that the documentation is deployed to, allows validating links on the docs site that are outside of the documentation root tree") val paradoxSingle = taskKey[File]("Build the single page HTML Paradox site") val paradoxSingleMarkdownToHtml = taskKey[Seq[(File, String)]]("Convert markdown files to single page HTML") val paradoxPdf = taskKey[File]("Build the paradox PDF") val paradoxPdfSite = taskKey[File]("Build the single page HTML Paradox site") val paradoxPdfDockerImage = settingKey[String]("The wkhtmltopdf docker image to us") val paradoxPdfArgs = settingKey[Seq[String]]("Arguments for wkhtmltopdf generation") val paradoxPdfTocTemplate = settingKey[Option[String]]("XSL template to use for generating the table of contents, relative to the theme directory.") val paradoxPdfMarkdownToHtml = taskKey[Seq[(File, String)]]("Convert markdown files to single page HTML") }
Example 128
Source File: TestIdSupport.scala From warp-core with MIT License | 5 votes |
package com.workday.warp.junit import org.pmw.tinylog.Logger import scala.util.matching.Regex def fromUniqueId(uid: String): Option[String] = uid match { case uidPattern(_, clazz, _, methodName, _) => Option(s"$clazz.$methodName") case _ => Logger.debug(s"unable to parse testId from $uid") None } } object TestIdSupport { // use triple quotes to avoid escaping backslash val uidPattern: Regex = """\[engine:(.*)\]/\[class:(.*)\]/\[(method|test-template):(.*)\((.*)\)\].*""".r } // can be imported or mixed in object TestId extends TestIdSupport
Example 129
Source File: ValidatorsJsonProtocol.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.params.validators import scala.util.matching.Regex import spray.json._ object ValidatorsJsonProtocol extends DefaultJsonProtocol { implicit object RegexJsonFormat extends JsonFormat[Regex] { override def write(regex: Regex): JsValue = regex.toString.toJson override def read(json: JsValue): Regex = ??? } implicit val rangeValidatorFormat = jsonFormat( RangeValidator.apply, "begin", "end", "beginIncluded", "endIncluded", "step") implicit val regexValidatorFormat = jsonFormat(RegexValidator, "regex") implicit val arrayLengthValidator = jsonFormat(ArrayLengthValidator.apply, "min", "max") implicit val complexArrayValidator = new JsonFormat[ComplexArrayValidator] { def write(v: ComplexArrayValidator): JsValue = { v.rangeValidator.configurationToJson } def read(json: JsValue): ComplexArrayValidator = ??? } // TODO DS-3225 Complex Array Validator serialization // implicit val complexArrayValidator = // jsonFormat(ComplexArrayValidator.apply, "rangeValidator", "arrayLengthValidator") }
Example 130
Source File: MatchTextUtil.scala From sumobot with Apache License 2.0 | 5 votes |
package com.sumologic.sumobot.test.annotated import org.scalatest.WordSpecLike import scala.util.matching.Regex trait MatchTextUtil extends WordSpecLike { def shouldMatch(regex: Regex, text: String): Unit = { if (!doesMatch(regex, text)) { fail(s"$regex did not match $text but should") } } def shouldNotMatch(regex: Regex, text: String): Unit = { if (doesMatch(regex, text)) { fail(s"$regex matched $text but should not") } } private def doesMatch(regex: Regex, text: String): Boolean = { regex.pattern.matcher(text).find() } }
Example 131
Source File: MatchTextUtil.scala From sumobot with Apache License 2.0 | 5 votes |
package com.sumologic.sumobot.test import scala.util.matching.Regex @deprecated("use com.sumologic.sumobot.test.annotated.MatchTextUtil", "1.0.2") trait MatchTextUtil { this : SumoBotSpec => def shouldMatch(regex: Regex, text: String): Unit = { if (!doesMatch(regex, text)) { fail(s"$regex did not match $text but should") } } def shouldNotMatch(regex: Regex, text: String): Unit = { if (doesMatch(regex, text)) { fail(s"$regex matched $text but should not") } } private def doesMatch(regex: Regex, text: String): Boolean = { regex.pattern.matcher(text).find() } }
Example 132
Source File: CommonTypesParser.scala From rug with GNU General Public License v3.0 | 4 votes |
package com.atomist.util.scalaparsing import com.atomist.rug.{BadRugException, BadRugSyntaxException, RugRuntimeException} import com.atomist.source.FileArtifact import com.atomist.tree.content.text._ import com.typesafe.scalalogging.LazyLogging import scala.util.matching.Regex import scala.util.parsing.combinator.JavaTokenParsers import scala.util.parsing.input.{CharSequenceReader, OffsetPosition, Positional} protected def identifierRef(reservedWords: Set[String], underlying: Parser[String] = ident) = new Parser[IdentifierRef] { def apply(in: Input): ParseResult[IdentifierRef] = { val pr = underlying.apply(in) pr match { case succ: Success[String @unchecked] => if (reservedWords.contains(succ.get)) Failure(s"Cannot use reserved word '${succ.get}' as function name", succ.next) else Success[IdentifierRef](IdentifierRef(succ.get), succ.next) case f: Failure => f case _ => ??? } } } protected def identifierRefString(reservedWords: Set[String], underlying: Parser[String] = ident): Parser[String] = identifierRef(reservedWords, underlying) ^^ (ir => ir.name) protected def parseTo[T](f: FileArtifact, parser: Parser[T]): T = { logger.debug(s"Rug input is\n------\n${f.path}\n${f.content}\n------\n") // We need a source that gives us positions val source = new CharSequenceReader(f.content) val parsed = parse(parser, source) match { case Success(matched, _) => matched case Failure(msg, rest) => throw new BadRugSyntaxException(ErrorInfo(s"Failure: $msg", badInput = f.content, line = rest.pos.line, col = rest.pos.column, filePath = f.path)) case Error(msg, rest) => throw new BadRugSyntaxException(ErrorInfo(s"Error: $msg", badInput = f.content, line = rest.pos.line, col = rest.pos.column, filePath = f.path)) } logger.debug(s"Parse result=$parsed") parsed } }