java.util.regex.Pattern Scala Example

Source File: Configuration.scala From circe-generic-extras with Apache License 2.0

5 votes

package io.circe.generic.extras

import java.util.regex.Pattern


final case class Configuration(
  transformMemberNames: String => String,
  transformConstructorNames: String => String,
  useDefaults: Boolean,
  discriminator: Option[String],
  strictDecoding: Boolean = false
) {
  def withSnakeCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.snakeCaseTransformation
  )

  def withScreamingSnakeCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.screamingSnakeCaseTransformation
  )

  def withKebabCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.kebabCaseTransformation
  )

  def withSnakeCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.snakeCaseTransformation
  )

  def withScreamingSnakeCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.screamingSnakeCaseTransformation
  )

  def withKebabCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.kebabCaseTransformation
  )

  def withDefaults: Configuration = copy(useDefaults = true)
  def withDiscriminator(discriminator: String): Configuration = copy(discriminator = Some(discriminator))

  def withStrictDecoding: Configuration = copy(strictDecoding = true)
}

object Configuration {

  val default: Configuration = Configuration(Predef.identity, Predef.identity, false, None)
  private val basePattern: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val swapPattern: Pattern = Pattern.compile("([a-z\\d])([A-Z])")

  val snakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toLowerCase
  }

  val screamingSnakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toUpperCase
  }

  val kebabCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1-$2")
    swapPattern.matcher(partial).replaceAll("$1-$2").toLowerCase
  }
}

object defaults {
  implicit val defaultGenericConfiguration: Configuration = Configuration.default
}

Source File: Tag.scala From cosmos with Apache License 2.0

5 votes

package com.mesosphere.universe.v3.model

import com.mesosphere.cosmos.circe.Decoders._
import com.twitter.util.Return
import com.twitter.util.Throw
import com.twitter.util.Try
import io.circe.syntax.EncoderOps
import io.circe.Decoder
import io.circe.DecodingFailure
import io.circe.Encoder
import io.circe.HCursor
import java.util.regex.Pattern

final class Tag private(val value: String) extends AnyVal {

  override def toString: String = value

}

object Tag {

  val packageDetailsTagRegex: String = "^[^\\s]+$"
  val packageDetailsTagPattern: Pattern = Pattern.compile(packageDetailsTagRegex)

  def apply(s: String): Tag = validate(s).get

  def validate(s: String): Try[Tag] = {
    if (packageDetailsTagPattern.matcher(s).matches()) {
      Return(new Tag(s))
    } else {
      Throw(new IllegalArgumentException(
        s"Value '$s' does not conform to expected format $packageDetailsTagRegex"
      ))
    }
  }

  implicit val encodePackageDefinitionTag: Encoder[Tag] = {
    Encoder.instance(_.value.asJson)
  }

  implicit val decodePackageDefinitionTag: Decoder[Tag] =
    Decoder.instance[Tag] { (c: HCursor) =>
      c.as[String].map(validate(_)).flatMap {
        case Return(r) => Right(r)
        case Throw(ex) =>
          val msg = ex.getMessage.replaceAllLiterally("assertion failed: ", "")
          Left(DecodingFailure(msg, c.history))
      }
    }

}

Source File: StringUtils.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.regex.Pattern

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)
}

Source File: ZioAsyncHandlerTest.scala From pulsar4s with Apache License 2.0

5 votes

package com.sksamuel.pulsar4s.zio

import java.util.UUID

import com.sksamuel.pulsar4s.{ConsumerConfig, ProducerConfig, PulsarClient, Subscription, Topic}
import org.apache.pulsar.client.api.Schema
import org.scalatest.BeforeAndAfterAll
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import java.util.regex.Pattern

class ZioAsyncHandlerTest extends AnyFunSuite with Matchers with BeforeAndAfterAll {

  import ZioAsyncHandler._

  implicit val schema: Schema[String] = Schema.STRING

  private val client = PulsarClient("pulsar://localhost:6650")
  private val topic = Topic("persistent://sample/standalone/ns1/zio_" + UUID.randomUUID())

  override def afterAll(): Unit = {
    client.close()
  }

  test("async producer should use zio") {
    val producer = client.producer(ProducerConfig(topic))
    val t = producer.sendAsync("wibble")
    val r = zio.Runtime.default.unsafeRun(t.either)
    r.right.get should not be null
    producer.close()
  }

  test("async consumer should use zio") {
    val consumer = client.consumer(ConsumerConfig(topics = Seq(topic), subscriptionName = Subscription("mysub_" + UUID.randomUUID())))
    consumer.seekEarliest()
    val t = consumer.receiveAsync
    val r = zio.Runtime.default.unsafeRun(t.either)
    r shouldBe Symbol("right")
    new String(r.right.get.data) shouldBe "wibble"
    consumer.close()
  }

  test("async consumer getMessageById should use zio") {
    val consumer = client.consumer(ConsumerConfig(topics = Seq(topic), subscriptionName = Subscription("mysub_" + UUID.randomUUID())))
    consumer.seekEarliest()
    val receive = consumer.receiveAsync
    val value = zio.Runtime.default.unsafeRun(receive.either)
    val t = consumer.getLastMessageIdAsync
    val r = zio.Runtime.default.unsafeRun(t.either)
    val zipped = r.right.get.toString.split(":") zip value.right.get.messageId.toString.split(":")
    zipped.foreach(t => t._1 shouldBe t._2)
    consumer.close()
  }
}

Source File: StringUtil.scala From ingraph with Eclipse Public License 1.0

5 votes

package ingraph.compiler.cypher2gplan.util

import java.io.UnsupportedEncodingException
import java.util.regex.{Matcher, Pattern}
import javax.xml.bind.DatatypeConverter

import org.apache.spark.sql.catalyst.{expressions => cExpr}
import org.slizaa.neo4j.opencypher.{openCypher => oc}

object StringUtil {
  private val patternStringDelimiterCheck = Pattern.compile("^'.*'$|^\".*\"$")
  private val patternStringDelimiterReplace = Pattern.compile("^[\"']|[\"']$")
  // note: literal \ should be escaped twice: first for the regular expression syntax
  // and then for the Java String in the source code, so \\\\ below matches the literal backslash
  private val patterBackslashNotation = Pattern.compile(
  "(?<!\\\\)\\\\(\\\\|'|\"|b|f|n|r|t|_|%|u([0-9a-fA-F]{4})|U([0-9a-fA-F]{8}))"
  )

  
  def toOptionInt(s: String): Option[Int] = {
    Option(s).filterNot( _.isEmpty ).map( _.toInt )
  }
}

Source File: ShiftingConsumerImpl.scala From kafka4s with Apache License 2.0

5 votes

package com.banno.kafka.consumer

import cats.effect.{Async, ContextShift}
import java.util.regex.Pattern

import scala.concurrent.duration._
import org.apache.kafka.common._
import org.apache.kafka.clients.consumer._

import scala.concurrent.ExecutionContext

case class ShiftingConsumerImpl[F[_]: Async, K, V](
    c: ConsumerApi[F, K, V],
    blockingContext: ExecutionContext
)(implicit CS: ContextShift[F])
    extends ConsumerApi[F, K, V] {
  def assign(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.assign(partitions))
  def assignment: F[Set[TopicPartition]] = CS.evalOn(blockingContext)(c.assignment)
  def beginningOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.beginningOffsets(partitions))
  def beginningOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.beginningOffsets(partitions, timeout))
  def close: F[Unit] = CS.evalOn(blockingContext)(c.close)
  def close(timeout: FiniteDuration): F[Unit] = CS.evalOn(blockingContext)(c.close(timeout))
  def commitAsync: F[Unit] = CS.evalOn(blockingContext)(c.commitAsync)
  def commitAsync(
      offsets: Map[TopicPartition, OffsetAndMetadata],
      callback: OffsetCommitCallback
  ): F[Unit] =
    CS.evalOn(blockingContext)(c.commitAsync(offsets, callback))
  def commitAsync(callback: OffsetCommitCallback): F[Unit] =
    CS.evalOn(blockingContext)(c.commitAsync(callback))
  def commitSync: F[Unit] = CS.evalOn(blockingContext)(c.commitSync)
  def commitSync(offsets: Map[TopicPartition, OffsetAndMetadata]): F[Unit] =
    CS.evalOn(blockingContext)(c.commitSync(offsets))
  def committed(partition: Set[TopicPartition]): F[Map[TopicPartition, OffsetAndMetadata]] =
    CS.evalOn(blockingContext)(c.committed(partition))
  def endOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.endOffsets(partitions))
  def endOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.endOffsets(partitions, timeout))
  def listTopics: F[Map[String, Seq[PartitionInfo]]] = CS.evalOn(blockingContext)(c.listTopics)
  def listTopics(timeout: FiniteDuration): F[Map[String, Seq[PartitionInfo]]] =
    CS.evalOn(blockingContext)(c.listTopics(timeout))
  def metrics: F[Map[MetricName, Metric]] = CS.evalOn(blockingContext)(c.metrics)
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long]
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    CS.evalOn(blockingContext)(c.offsetsForTimes(timestampsToSearch))
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    CS.evalOn(blockingContext)(c.offsetsForTimes(timestampsToSearch, timeout))
  def partitionsFor(topic: String): F[Seq[PartitionInfo]] =
    CS.evalOn(blockingContext)(c.partitionsFor(topic))
  def partitionsFor(topic: String, timeout: FiniteDuration): F[Seq[PartitionInfo]] =
    CS.evalOn(blockingContext)(c.partitionsFor(topic, timeout))
  def pause(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.pause(partitions))
  def paused: F[Set[TopicPartition]] = CS.evalOn(blockingContext)(c.paused)
  def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] =
    CS.evalOn(blockingContext)(c.poll(timeout))
  def position(partition: TopicPartition): F[Long] =
    CS.evalOn(blockingContext)(c.position(partition))
  def resume(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.resume(partitions))
  def seek(partition: TopicPartition, offset: Long): F[Unit] =
    CS.evalOn(blockingContext)(c.seek(partition, offset))
  def seekToBeginning(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.seekToBeginning(partitions))
  def seekToEnd(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.seekToEnd(partitions))
  def subscribe(topics: Iterable[String]): F[Unit] = CS.evalOn(blockingContext)(c.subscribe(topics))
  def subscribe(topics: Iterable[String], callback: ConsumerRebalanceListener): F[Unit] =
    CS.evalOn(blockingContext)(c.subscribe(topics, callback))
  def subscribe(pattern: Pattern): F[Unit] = CS.evalOn(blockingContext)(c.subscribe(pattern))
  def subscribe(pattern: Pattern, callback: ConsumerRebalanceListener): F[Unit] =
    CS.evalOn(blockingContext)(c.subscribe(pattern, callback))
  def subscription: F[Set[String]] = CS.evalOn(blockingContext)(c.subscription)
  def unsubscribe: F[Unit] = CS.evalOn(blockingContext)(c.unsubscribe)
  def wakeup: F[Unit] = c.wakeup //TODO wakeup is the one method that is thread-safe, right?
}

object ShiftingConsumerImpl {
  //returns the type expected when creating a Resource
  def create[F[_]: Async: ContextShift, K, V](
      c: ConsumerApi[F, K, V],
      e: ExecutionContext
  ): ConsumerApi[F, K, V] =
    ShiftingConsumerImpl(c, e)
}

Source File: ConsumerApiWrapper.scala From kafka4s with Apache License 2.0

5 votes

package com.banno.kafka.consumer

import org.apache.kafka.common._
import org.apache.kafka.clients.consumer._

import scala.concurrent.duration._
import java.util.regex.Pattern

trait ConsumerApiWrapper[F[_], K, V] extends ConsumerApi[F, K, V] {
  def api: ConsumerApi[F, K, V]
  def assign(partitions: Iterable[TopicPartition]): F[Unit] = api.assign(partitions)
  def assignment: F[Set[TopicPartition]] = api.assignment
  def beginningOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    api.beginningOffsets(partitions)
  def close: F[Unit] = api.close
  def close(timeout: FiniteDuration): F[Unit] = api.close(timeout)
  def commitAsync: F[Unit] = api.commitAsync
  def commitAsync(
      offsets: Map[TopicPartition, OffsetAndMetadata],
      callback: OffsetCommitCallback
  ): F[Unit] = api.commitAsync(offsets, callback)
  def commitAsync(callback: OffsetCommitCallback): F[Unit] = api.commitAsync(callback)
  def commitSync: F[Unit] = api.commitSync
  def commitSync(offsets: Map[TopicPartition, OffsetAndMetadata]): F[Unit] = api.commitSync(offsets)
  def committed(partition: Set[TopicPartition]): F[Map[TopicPartition, OffsetAndMetadata]] =
    api.committed(partition)
  def endOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    api.endOffsets(partitions)
  def listTopics: F[Map[String, Seq[PartitionInfo]]] = api.listTopics
  def metrics: F[Map[MetricName, Metric]] = api.metrics
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long]
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    api.offsetsForTimes(timestampsToSearch)
  def partitionsFor(topic: String): F[Seq[PartitionInfo]] = api.partitionsFor(topic)
  def pause(partitions: Iterable[TopicPartition]): F[Unit] = api.pause(partitions)
  def paused: F[Set[TopicPartition]] = api.paused
  def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] = api.poll(timeout)
  def position(partition: TopicPartition): F[Long] = api.position(partition)
  def resume(partitions: Iterable[TopicPartition]): F[Unit] = api.resume(partitions)
  def seek(partition: TopicPartition, offset: Long): F[Unit] = api.seek(partition, offset)
  def seekToBeginning(partitions: Iterable[TopicPartition]): F[Unit] =
    api.seekToBeginning(partitions)
  def seekToEnd(partitions: Iterable[TopicPartition]): F[Unit] = api.seekToEnd(partitions)
  def subscribe(topics: Iterable[String]): F[Unit] = api.subscribe(topics)
  def subscribe(topics: Iterable[String], callback: ConsumerRebalanceListener): F[Unit] =
    api.subscribe(topics, callback)
  def subscribe(pattern: Pattern): F[Unit] = api.subscribe(pattern)
  def subscribe(pattern: Pattern, callback: ConsumerRebalanceListener): F[Unit] =
    api.subscribe(pattern, callback)
  def subscription: F[Set[String]] = api.subscription
  def unsubscribe: F[Unit] = api.unsubscribe
  def wakeup: F[Unit] = api.wakeup
}

Source File: Avro4sConsumerImpl.scala From kafka4s with Apache License 2.0

5 votes

package com.banno.kafka.consumer

import cats.implicits._
import java.util.regex.Pattern

import scala.concurrent.duration._
import org.apache.kafka.common._
import org.apache.kafka.clients.consumer._
import org.apache.avro.generic.GenericRecord
import com.sksamuel.avro4s.FromRecord
import cats.Functor
import com.banno.kafka._

//this is a Bifunctor[ConsumerApi]

case class Avro4sConsumerImpl[F[_]: Functor, K: FromRecord, V: FromRecord](
    c: ConsumerApi[F, GenericRecord, GenericRecord]
) extends ConsumerApi[F, K, V] {
  def assign(partitions: Iterable[TopicPartition]): F[Unit] = c.assign(partitions)
  def assignment: F[Set[TopicPartition]] = c.assignment
  def beginningOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    c.beginningOffsets(partitions)
  def beginningOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] =
    c.beginningOffsets(partitions, timeout)
  def close: F[Unit] = c.close
  def close(timeout: FiniteDuration): F[Unit] = c.close(timeout)
  def commitAsync: F[Unit] = c.commitAsync
  def commitAsync(
      offsets: Map[TopicPartition, OffsetAndMetadata],
      callback: OffsetCommitCallback
  ): F[Unit] = c.commitAsync(offsets, callback)
  def commitAsync(callback: OffsetCommitCallback): F[Unit] = c.commitAsync(callback)
  def commitSync: F[Unit] = c.commitSync
  def commitSync(offsets: Map[TopicPartition, OffsetAndMetadata]): F[Unit] = c.commitSync(offsets)
  def committed(partition: Set[TopicPartition]): F[Map[TopicPartition, OffsetAndMetadata]] =
    c.committed(partition)
  def endOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    c.endOffsets(partitions)
  def endOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] = c.endOffsets(partitions, timeout)
  def listTopics: F[Map[String, Seq[PartitionInfo]]] = c.listTopics
  def listTopics(timeout: FiniteDuration): F[Map[String, Seq[PartitionInfo]]] =
    c.listTopics(timeout)
  def metrics: F[Map[MetricName, Metric]] = c.metrics
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long]
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    c.offsetsForTimes(timestampsToSearch)
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    c.offsetsForTimes(timestampsToSearch, timeout)
  def partitionsFor(topic: String): F[Seq[PartitionInfo]] = c.partitionsFor(topic)
  def partitionsFor(topic: String, timeout: FiniteDuration): F[Seq[PartitionInfo]] =
    c.partitionsFor(topic, timeout)
  def pause(partitions: Iterable[TopicPartition]): F[Unit] = c.pause(partitions)
  def paused: F[Set[TopicPartition]] = c.paused
  def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] =
    c.poll(timeout).map(_.fromGenericRecords[K, V])
  def position(partition: TopicPartition): F[Long] = c.position(partition)
  def resume(partitions: Iterable[TopicPartition]): F[Unit] = c.resume(partitions)
  def seek(partition: TopicPartition, offset: Long): F[Unit] = c.seek(partition, offset)
  def seekToBeginning(partitions: Iterable[TopicPartition]): F[Unit] = c.seekToBeginning(partitions)
  def seekToEnd(partitions: Iterable[TopicPartition]): F[Unit] = c.seekToEnd(partitions)
  def subscribe(topics: Iterable[String]): F[Unit] = c.subscribe(topics)
  def subscribe(topics: Iterable[String], callback: ConsumerRebalanceListener): F[Unit] =
    c.subscribe(topics, callback)
  def subscribe(pattern: Pattern): F[Unit] = c.subscribe(pattern)
  def subscribe(pattern: Pattern, callback: ConsumerRebalanceListener): F[Unit] =
    c.subscribe(pattern, callback)
  def subscription: F[Set[String]] = c.subscription
  def unsubscribe: F[Unit] = c.unsubscribe
  def wakeup: F[Unit] = c.wakeup
}

Source File: DataSourceV2Utils.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.v2

import java.util.regex.Pattern

import org.apache.spark.internal.Logging
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sources.v2.{DataSourceV2, SessionConfigSupport}

private[sql] object DataSourceV2Utils extends Logging {

  
  def extractSessionConfigs(ds: DataSourceV2, conf: SQLConf): Map[String, String] = ds match {
    case cs: SessionConfigSupport =>
      val keyPrefix = cs.keyPrefix()
      require(keyPrefix != null, "The data source config key prefix can't be null.")

      val pattern = Pattern.compile(s"^spark\\.datasource\\.$keyPrefix\\.(.+)")

      conf.getAllConfs.flatMap { case (key, value) =>
        val m = pattern.matcher(key)
        if (m.matches() && m.groupCount() > 0) {
          Seq((m.group(1), value))
        } else {
          Seq.empty
        }
      }

    case _ => Map.empty
  }
}

Source File: StringUtils.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
}

Source File: PlaceholderParser.scala From cornichon with Apache License 2.0

5 votes

package com.github.agourlay.cornichon.resolver

import java.util.regex.Pattern

import com.github.agourlay.cornichon.core.{ CornichonError, Session }
import com.github.agourlay.cornichon.resolver.PlaceholderParser._
import org.parboiled2._

import scala.util.{ Failure, Success }

class PlaceholderParser(val input: ParserInput) extends Parser {

  def placeholdersRule = rule {
    Ignore ~ zeroOrMore(PlaceholderRule).separatedBy(Ignore) ~ Ignore ~ EOI
  }

  def PlaceholderRule = rule('<' ~ PlaceholderTXT ~ optIndex ~ '>' ~> Placeholder)

  def optIndex = rule(optional('[' ~ Number ~ ']'))

  def PlaceholderTXT = rule(capture(oneOrMore(allowedCharsInPlaceholdersPredicate)))

  def Ignore = rule { zeroOrMore(!PlaceholderRule ~ ANY) }

  def Number = rule { capture(Digits) ~> (_.toInt) }

  def Digits = rule { oneOrMore(CharPredicate.Digit) }
}

object PlaceholderParser {

  private val noPlaceholders = Right(Nil)
  private val allowedCharsInPlaceholdersPredicate: CharPredicate = CharPredicate.Visible -- Session.notAllowedInKey

  def parse(input: String): Either[CornichonError, List[Placeholder]] =
    if (!input.contains("<"))
      // No need to parse the whole thing
      noPlaceholders
    else {
      val p = new PlaceholderParser(input)
      p.placeholdersRule.run() match {
        case Failure(e: ParseError) =>
          Left(PlaceholderParsingError(input, p.formatError(e, new ErrorFormatter(showTraces = true))))
        case Failure(e: Throwable) =>
          Left(PlaceholderError(input, e))
        case Success(dt) =>
          Right(dt.toList.distinct)
      }
    }
}

case class Placeholder(key: String, index: Option[Int]) {
  val fullKey = index.fold(s"<$key>") { index => s"<$key[$index]>" }
  lazy val pattern = Pattern.compile(Pattern.quote(fullKey))
}

case class PlaceholderError(input: String, error: Throwable) extends CornichonError {
  lazy val baseErrorMessage = s"error '${error.getMessage}' thrown during placeholder parsing for input $input"
}

case class PlaceholderParsingError(input: String, error: String) extends CornichonError {
  lazy val baseErrorMessage = s"error '$error' during placeholder parsing for input $input"
}

Source File: StringUtils.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.regex.Pattern

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  //将_替换为,{1}完全匹配任何字符的1次将％替换为.*,与任何字符匹配0次或更多次
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }
}

Source File: DcosReleaseVersionParser.scala From cosmos with Apache License 2.0

5 votes

package com.mesosphere.universe.v3.model

import com.twitter.util.{Return, Throw, Try}

import java.util.regex.Pattern

object DcosReleaseVersionParser {

  private[this] val versionFragment = "(?:0|[1-9][0-9]*)"
  private[this] val subVersionFragment = "\\." + versionFragment
  private[this] val suffixFragment =
    "((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*)"

  private[v3] val versionRegex = s"^$versionFragment$$"
  private[v3] val suffixRegex = s"^$suffixFragment$$"
  private[v3] val fullRegex = s"^$versionFragment(?:$subVersionFragment)*(?<suffix>-$suffixFragment)?$$"

  private[v3] val versionPattern = Pattern.compile(versionRegex)
  private[v3] val suffixPattern = Pattern.compile(suffixRegex)
  private[v3] val fullPattern = Pattern.compile(fullRegex)

  def parseUnsafe(s: String): DcosReleaseVersion = parse(s).get

  def parse(s: String): Try[DcosReleaseVersion] = {
    val errMsg = s"Value '$s' does not conform to expected format $fullRegex"
    Try {
      assert(!s.trim.isEmpty, "Value must not be empty")
      assert(fullPattern.matcher(s).matches(), errMsg)
      s
    } flatMap { validatedString =>
      validatedString.split('-').toList match {
        case Seq(version) =>
          Return(version -> None)
        case Seq(version, tail @ _*) =>
          Return(version -> Some(tail.mkString("-")))
        case _ =>
          Throw(new AssertionError(errMsg))
      }
    } flatMap { case (version, subVersion) =>
      parseVersionSuffix(version, subVersion, errMsg)
    }
  }

  private[this] def parseVersion(s: String): Try[DcosReleaseVersion.Version] = Try {
    DcosReleaseVersion.Version(s.toInt)
  }

  private[this] def parseSuffix(s: Option[String]): Try[Option[DcosReleaseVersion.Suffix]] = s match {
    case None => Return(None)
    case Some(suff) => Return(Some(DcosReleaseVersion.Suffix(suff)))
  }

  private[this] def parseVersionSuffix(version: String, suffix: Option[String], errMsg: String): Try[DcosReleaseVersion] = {
    version.split('.').toList match {
      case head :: tail :: Nil =>
        for {
          h <- parseVersion(head)
          t <- parseVersion(tail)
          s <- parseSuffix(suffix)
        } yield {
          DcosReleaseVersion(h, List(t), s)
        }
      case head :: Nil =>
        for {
          h <- parseVersion(head)
          s <- parseSuffix(suffix)
        } yield {
          DcosReleaseVersion(h, List.empty, s)
        }
      case head :: tail =>
        for {
          h <- parseVersion(head)
          t <- Try.collect(tail.map(parseVersion))
          s <- parseSuffix(suffix)
        } yield {
          DcosReleaseVersion(h, t.toList, s)
        }
      case _ =>
        Throw(new AssertionError(errMsg))
    }
  }

}

Source File: Segment.scala From shield with MIT License

5 votes

package shield.routing

import java.util.regex.Pattern

sealed trait Segment {
  def regexPiece : String
  private[routing] def priority: Int
}

case object SlashSegment extends Segment {
  override def regexPiece: String = "/"
  override private[routing] val priority: Int = 0
  override def toString = "/"
}

case class StaticSegment(segment: String) extends Segment {
  override def regexPiece: String = Pattern.quote(segment)
  override private[routing] val priority: Int = 1
  override def toString = segment
}

case object ExtensionSegment extends Segment {
  override def regexPiece: String = "(\\.[^/]*)?"
  override private[routing] val priority: Int = 2
  override def toString = "(.extension)?"
}

case object WildcardSegment extends Segment {
  override def regexPiece: String = "[^/]*"
  override private[routing] val priority: Int = 3
  override def toString = "{}"
}

case class RegexSegment(expr: String) extends Segment {
  override def regexPiece: String = expr
  override private[routing] val priority: Int = 4
  override def toString = s"{regex: $expr}"
}

case object PathSegment extends Segment {
  override def regexPiece: String = ".*"
  override private[routing] val priority: Int = 5
  override def toString = "(.*)"
}

Source File: FileUtils.scala From sctags with Apache License 2.0

5 votes

package sctags

import java.io.{File, FileFilter}
import java.util.regex.Pattern

import scala.collection.mutable.ArrayBuffer
import scala.language.implicitConversions

object FileUtils {

  implicit def fun2fileFilter(fun: File => Boolean) =
    new FileFilter { def accept(f: File) = fun(f) }

  implicit def fileFilter2richFilter(filter: FileFilter): RichFilter =
    new RichFilter(filter)

  final class RichFilter(val self: FileFilter) extends Proxy {
    def unary_! = new FileFilter { def accept(f: File) = !self.accept(f) }
    def join(other: FileFilter, op: (Boolean,Boolean)=>Boolean) =
      new FileFilter { def accept(f: File) = op(self.accept(f), other.accept(f)) }
    def &&(other: FileFilter) = join(other, _ && _);
    def ||(other: FileFilter) = join(other, _ || _);
  }

  object DirectoryFilter extends FileFilter {
    def accept(f: File) = f.isDirectory;
  }

  class NameMatchFilter(val re: Pattern) extends FileFilter {
    def this(re: String) = this(Pattern.compile(re));
    def accept(f: File) = re.matcher(f.getName).matches;
  }

  object AcceptAllFilter extends FileFilter {
    def accept(f: File) = true;
  }

  def listFilesRecursive(base: File, filter: FileFilter): Seq[File] = {
    val files = new ArrayBuffer[File];
    def processdir(dir: File) {
      val directories = dir.listFiles(DirectoryFilter).foreach(d => processdir(d))
      val matchedFiles = dir.listFiles(filter)
      files ++= matchedFiles
    }
    processdir(base)
    files
  }
}

Source File: AccessLogParser.scala From spark-scala with Creative Commons Zero v1.0 Universal

5 votes

package com.supergloo.utils

import java.util.regex.Pattern

import com.supergloo.models.HttpStatus


  def parseHttpStatusCode(logLine: String): Option[HttpStatus] = {
    val matcher = p.matcher(logLine)
    if(matcher.find) {
      Some(createHttpStatus(matcher.group(6)))
    }
    else {
      None
    }
  }

}

Source File: RegexLiteral.scala From kantan.regex with Apache License 2.0

5 votes

package kantan.regex
package literals

import java.util.regex.Pattern
import scala.reflect.macros.blackbox.Context
import scala.util.{Failure, Success, Try => UTry}

final class RegexLiteral(val sc: StringContext) extends AnyVal {
  def rx(args: Any*): Pattern = macro RegexLiteral.rxImpl
}

// Relatively distatefull trick to get rid of spurious warnings.
trait RegexLiteralMacro {
  def rxImpl(c: Context)(args: c.Expr[Any]*): c.Expr[Pattern]
}

object RegexLiteral extends RegexLiteralMacro {

  override def rxImpl(c: Context)(args: c.Expr[Any]*): c.Expr[Pattern] = {
    import c.universe._

    c.prefix.tree match {
      case Apply(_, List(Apply(_, List(lit @ Literal(Constant(str: String)))))) =>
        UTry(Pattern.compile(str)) match {
          case Failure(_) => c.abort(c.enclosingPosition, s"Illegal regex: '$str'")
          case Success(_) =>
            reify {
              Pattern.compile(c.Expr[String](lit).splice)
            }
        }
      case _ =>
        c.abort(c.enclosingPosition, "rx can only be used on string literals")
    }
  }
}

trait ToRegexLiteral {
  implicit def toRegexLiteral(sc: StringContext): RegexLiteral = new RegexLiteral(sc)
}

Source File: ConfigReader.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

}

Source File: StringUtils.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
}

Source File: PidUtils.scala From sbt-dynamodb with MIT License

5 votes

package com.localytics.sbt.dynamodb

import java.io.File
import java.util.regex.Pattern

object PidUtils {

  def extractPid(input: String, port: Int, jar: File): Option[String] = {
    val pidPortRegex = s"\\d+ ${Pattern.quote(jar.getAbsolutePath)} -port $port".r
    pidPortRegex.findFirstIn(input).map(_.split(" ")(0))
  }

  def osName: String = System.getProperty("os.name") match {
    case n: String if !n.isEmpty => n
    case _ => System.getProperty("os")
  }

  def killPidCommand(pid: String): String =
    if (osName.toLowerCase.contains("windows")) s"Taskkill /PID $pid /F" else s"kill $pid"

}

Source File: TemplateParamsSpec.scala From comet-data-pipeline with Apache License 2.0

5 votes

package com.ebiznext.comet.database.extractor

import java.util.regex.Pattern

import better.files.File
import com.ebiznext.comet.schema.model._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class TemplateParamsSpec extends AnyFlatSpec with Matchers {
  val scriptOutputFolder: File = File("/tmp")

  "fromSchema" should "generate the correct TemplateParams for a given Schema" in {
    val schema: Schema = Schema(
      name = "table1",
      pattern = Pattern.compile("output_file.*.csv"),
      List(Attribute(name = "col1"), Attribute(name = "col2")),
      metadata = Option(Metadata(write = Some(WriteMode.APPEND))),
      merge = Some(MergeOptions(List("col1", "col2"), None, timestamp = Some("updateCol"))),
      comment = None,
      presql = None,
      postsql = None
    )

    val expectedTemplateParams = TemplateParams(
      tableToExport = "table1",
      columnsToExport = List("col1", "col2"),
      fullExport = false,
      dsvDelimiter = ",",
      deltaColumn = Some("updateCol"),
      exportOutputFileBase = "output_file",
      scriptOutputFile = scriptOutputFolder / "EXTRACT_table1.sql"
    )
    TemplateParams.fromSchema(schema, scriptOutputFolder) shouldBe expectedTemplateParams
  }

  it should "generate the correct TemplateParams for an other Schema" in {
    val schema: Schema = Schema(
      name = "table1",
      pattern = Pattern.compile("output_file.*.csv"),
      List(Attribute(name = "col1"), Attribute(name = "col2")),
      metadata = Option(Metadata(write = Some(WriteMode.OVERWRITE), separator = Some("|"))),
      merge = Some(MergeOptions(List("col1", "col2"), None, timestamp = Some("updateCol"))),
      comment = None,
      presql = None,
      postsql = None
    )

    val expectedTemplateParams = TemplateParams(
      tableToExport = "table1",
      columnsToExport = List("col1", "col2"),
      fullExport = true,
      dsvDelimiter = "|",
      deltaColumn = None,
      exportOutputFileBase = "output_file",
      scriptOutputFile = scriptOutputFolder / "EXTRACT_table1.sql"
    )
    TemplateParams.fromSchema(schema, scriptOutputFolder) shouldBe expectedTemplateParams
  }
}

Source File: ESLoadConfig.scala From comet-data-pipeline with Apache License 2.0

5 votes

package com.ebiznext.comet.job.index.esload

import java.util.regex.Pattern

import buildinfo.BuildInfo
import com.ebiznext.comet.config.Settings
import com.ebiznext.comet.schema.model.RowLevelSecurity
import com.ebiznext.comet.utils.CliConfig
import org.apache.hadoop.fs.Path
import scopt.OParser

case class ESLoadConfig(
  timestamp: Option[String] = None,
  id: Option[String] = None,
  mapping: Option[Path] = None,
  domain: String = "",
  schema: String = "",
  format: String = "",
  dataset: Option[Path] = None,
  conf: Map[String, String] = Map(),
  rls: Option[List[RowLevelSecurity]] = None
) {

  def getDataset()(implicit settings: Settings): Path = {
    dataset.getOrElse {
      new Path(s"${settings.comet.datasets}/${settings.comet.area.accepted}/$domain/$schema")
    }
  }

  def getIndexName(): String = s"${domain.toLowerCase}_${schema.toLowerCase}"

  private val pattern = Pattern.compile("\\{(.*)\\|(.*)\\}")

  def getTimestampCol(): Option[String] = {
    timestamp.flatMap { ts =>
      val matcher = pattern.matcher(ts)
      if (matcher.matches()) {
        Some(matcher.group(1))
      } else {
        None
      }
    }
  }

  def getResource(): String = {
    timestamp.map { ts =>
      s"${this.getIndexName()}-$ts/_doc"
    } getOrElse {
      s"${this.getIndexName()}/_doc"
    }
  }
}

object ESLoadConfig extends CliConfig[ESLoadConfig] {

  val parser: OParser[Unit, ESLoadConfig] = {
    val builder = OParser.builder[ESLoadConfig]
    import builder._
    OParser.sequence(
      programName("comet"),
      head("comet", BuildInfo.version),
      opt[String]("timestamp")
        .action((x, c) => c.copy(timestamp = Some(x)))
        .optional()
        .text("Elasticsearch index timestamp suffix as in {@timestamp|yyyy.MM.dd}"),
      opt[String]("id")
        .action((x, c) => c.copy(id = Some(x)))
        .optional()
        .text("Elasticsearch Document Id"),
      opt[String]("mapping")
        .action((x, c) => c.copy(mapping = Some(new Path(x))))
        .optional()
        .text("Path to Elasticsearch Mapping File"),
      opt[String]("domain")
        .action((x, c) => c.copy(domain = x))
        .required()
        .text("Domain Name"),
      opt[String]("schema")
        .action((x, c) => c.copy(schema = x))
        .required()
        .text("Schema Name"),
      opt[String]("format")
        .action((x, c) => c.copy(format = x))
        .required()
        .text("Dataset input file : parquet, json or json-array"),
      opt[String]("dataset")
        .action((x, c) => c.copy(dataset = Some(new Path(x))))
        .optional()
        .text("Input dataset path"),
      opt[Map[String, String]]("conf")
        .action((x, c) => c.copy(conf = x))
        .optional()
        .valueName(
          "es.batch.size.entries=1000,es.batch.size.bytes=1mb... (see https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html)"
        )
        .text("eshadoop configuration options")
    )
  }

  def parse(args: Seq[String]): Option[ESLoadConfig] = OParser.parse(parser, args, ESLoadConfig())
}

Source File: Device.scala From uap-scala with Do What The F*ck You Want To Public License

5 votes

package org.uaparser.scala

import java.util.regex.{ Matcher, Pattern }
import org.uaparser.scala.MatcherOps._

case class Device(family: String, brand: Option[String] = None, model: Option[String] = None)

object Device {
  private[scala] def fromMap(m: Map[String, String]) = m.get("family").map(Device(_, m.get("brand"), m.get("model")))

  private[scala] case class DevicePattern(pattern: Pattern, familyReplacement: Option[String],
                                           brandReplacement: Option[String], modelReplacement: Option[String]) {
    def process(agent: String): Option[Device] = {
      val matcher = pattern.matcher(agent)
      if (!matcher.find()) None else {
        val family = familyReplacement.map(r => replace(r, matcher)).orElse(matcher.groupAt(1))
        val brand = brandReplacement.map(r => replace(r, matcher)).filterNot(s => s.isEmpty)
        val model = modelReplacement.map(r => replace(r, matcher)).orElse(matcher.groupAt(1)).filterNot(s => s.isEmpty)
        family.map(Device(_, brand, model))
      }
    }

    def replace(replacement: String, matcher: Matcher): String = {
      (if (replacement.contains("$") && matcher.groupCount() >= 1)  {
        (1 to matcher.groupCount()).foldLeft(replacement)((rep, i) => {
          val toInsert = if (matcher.group(i) ne null) matcher.group(i) else ""
          rep.replaceFirst("\\$" + i, Matcher.quoteReplacement(toInsert))
        })
      } else replacement).trim
    }
  }

  private object DevicePattern {
    def fromMap(m: Map[String, String]): Option[DevicePattern] = m.get("regex").map { r =>
      val pattern = m.get("regex_flag").map(flag =>
        Pattern.compile(r, Pattern.CASE_INSENSITIVE)).getOrElse(Pattern.compile(r)
      )
      DevicePattern(pattern, m.get("device_replacement"), m.get("brand_replacement"), m.get("model_replacement"))
    }
  }

  case class DeviceParser(patterns: List[DevicePattern]) {
    def parse(agent: String): Device = patterns.foldLeft[Option[Device]](None) {
      case (None, pattern) => pattern.process(agent)
      case (result, _) => result
    }.getOrElse(Device("Other"))
  }

  object DeviceParser {
    def fromList(config: List[Map[String, String]]): DeviceParser =
      DeviceParser(config.map(DevicePattern.fromMap).flatten)
  }
}

Source File: UserAgent.scala From uap-scala with Do What The F*ck You Want To Public License

5 votes

package org.uaparser.scala

import MatcherOps._
import java.util.regex.{Matcher, Pattern}

case class UserAgent(family: String, major: Option[String] = None, minor: Option[String] = None,
                     patch: Option[String] = None)

object UserAgent {
  private[scala] def fromMap(m: Map[String, String]) = m.get("family").map { family =>
    UserAgent(family, m.get("major"), m.get("minor"), m.get("patch"))
  }

  private[scala] case class UserAgentPattern(pattern: Pattern, familyReplacement: Option[String],
                                      v1Replacement: Option[String], v2Replacement: Option[String],
                                      v3Replacement: Option[String]) {
    def process(agent: String): Option[UserAgent] = {
      val matcher = pattern.matcher(agent)
      if (!matcher.find()) return None
      familyReplacement.map { replacement =>
        if (replacement.contains("$1") && matcher.groupCount() >= 1) {
          replacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)))
        } else replacement
      }.orElse(matcher.groupAt(1)).map { family =>
        val major = v1Replacement.orElse(matcher.groupAt(2)).filter(_.nonEmpty)
        val minor = v2Replacement.orElse(matcher.groupAt(3)).filter(_.nonEmpty)
        val patch = v3Replacement.orElse(matcher.groupAt(4)).filter(_.nonEmpty)
        UserAgent(family, major, minor, patch)
      }
    }
  }

  private object UserAgentPattern {
    def fromMap(config: Map[String, String]): Option[UserAgentPattern] = config.get("regex").map { r =>
      UserAgentPattern(Pattern.compile(r), config.get("family_replacement"), config.get("v1_replacement"),
        config.get("v2_replacement"), config.get("v3_replacement"))
    }
  }

  case class UserAgentParser(patterns: List[UserAgentPattern]) {
    def parse(agent: String): UserAgent = patterns.foldLeft[Option[UserAgent]](None) {
      case (None, pattern) => pattern.process(agent)
      case (result, _) => result
    }.getOrElse(UserAgent("Other"))
  }

  object UserAgentParser {
    def fromList(config: List[Map[String, String]]): UserAgentParser =
      UserAgentParser(config.map(UserAgentPattern.fromMap).flatten)
  }
}

Source File: RegexConstraint.scala From drunken-data-quality with Apache License 2.0

5 votes

package de.frosner.ddq.constraints

import java.util.regex.Pattern

import org.apache.spark.sql.functions._
import org.apache.spark.sql.{Column, DataFrame}

import scala.util.Try

case class RegexConstraint(columnName: String, regex: String) extends Constraint {

  val fun = (df: DataFrame) => {
    val pattern = Pattern.compile(regex)
    val doesNotMatch = udf((column: String) => column != null && !pattern.matcher(column).find())
    val maybeDoesNotMatchCount = Try(df.filter(doesNotMatch(new Column(columnName))).count)
    RegexConstraintResult(
      constraint = this,
      data = maybeDoesNotMatchCount.toOption.map(RegexConstraintResultData),
      status = ConstraintUtil.tryToStatus[Long](maybeDoesNotMatchCount, _ == 0)
    )
  }

}

case class RegexConstraintResult(constraint: RegexConstraint,
                                 data: Option[RegexConstraintResultData],
                                 status: ConstraintStatus) extends ConstraintResult[RegexConstraint] {

  val message: String = {
    val columnName = constraint.columnName
    val regex = constraint.regex
    val maybeFailedRows = data.map(_.failedRows)
    val maybePluralSAndVerb = maybeFailedRows.map(failedRows => if (failedRows == 1) ("", "does") else ("s", "do"))
    (status, maybeFailedRows, maybePluralSAndVerb) match {
      case (ConstraintSuccess, Some(0), _) =>
        s"Column $columnName matches $regex"
      case (ConstraintFailure, Some(failedRows), Some((pluralS, verb))) =>
        s"Column $columnName contains $failedRows row$pluralS that $verb not match $regex"
      case (ConstraintError(throwable), None, None) =>
        s"Checking whether column $columnName matches $regex failed: $throwable"
      case default => throw IllegalConstraintResultException(this)
    }
  }

}

case class RegexConstraintResultData(failedRows: Long)

Source File: SearchPluginId.scala From sbt-idea-plugin with Apache License 2.0

5 votes

package org.jetbrains.sbtidea.tasks

import java.net.URLEncoder
import java.nio.file.Path
import java.util.regex.Pattern

import org.jetbrains.sbtidea.PluginLogger
import org.jetbrains.sbtidea.download.BuildInfo
import org.jetbrains.sbtidea.download.plugin.LocalPluginRegistry
import com.eclipsesource.json._
import scalaj.http.Http

import scala.collection.JavaConverters._

class SearchPluginId(ideaRoot: Path, buildInfo: BuildInfo, useBundled: Boolean = true, useRemote: Boolean = true) {

  private val REPO_QUERY = "https://plugins.jetbrains.com/api/search/plugins?search=%s&build=%s"

  // true if plugin was found in the remote repo
  def apply(query: String): Map[String, (String, Boolean)] = {
    val local  = if (useBundled) searchPluginIdLocal(query) else Map.empty
    val remote = if (useRemote) searchPluginIdRemote(query) else Map.empty
    remote ++ local
  }

  private def searchPluginIdLocal(query: String): Map[String, (String, Boolean)] = {
    val pattern = Pattern.compile(query)
    val registry = new LocalPluginRegistry(ideaRoot)
    val allDescriptors = registry.getAllDescriptors
    allDescriptors
        .filter(descriptor => pattern.matcher(descriptor.name).find() || pattern.matcher(descriptor.id).find())
        .map(descriptor => descriptor.id -> (descriptor.name, false))
        .toMap
  }

  // Apparently we can't use json4s when cross-compiling for sbt because there are BOTH no shared versions AND binary compatibility
  private def searchPluginIdRemote(query: String): Map[String, (String, Boolean)] = {
    try {
      val param = URLEncoder.encode(query, "UTF-8")
      val url = REPO_QUERY.format(param, s"${buildInfo.edition.edition}-${buildInfo.getActualIdeaBuild(ideaRoot)}")
      val data = Http(url).asString.body
      val json = Json.parse(data)
      val values = json.asArray().values().asScala.map(_.asObject())
      val names = values.map(_.getString("name", "") -> true)
      val ids = values.map(_.getString("xmlId", ""))
      ids.zip(names).toMap
    } catch {
      case ex: Throwable =>
        PluginLogger.warn(s"Failed to query IJ plugin repo: $ex")
        Map.empty
    }
  }
}

Source File: Constraint.scala From exhibitor-mesos-framework with Apache License 2.0

5 votes

package ly.stealth.mesos.exhibitor

import java.util.regex.{Pattern, PatternSyntaxException}

import scala.util.Try

trait Constraint {
  def matches(value: String, values: List[String] = Nil): Boolean
}

object Constraint {
  def apply(value: String): Constraint = {
    if (value.startsWith("like:")) Constraint.Like(value.substring("like:".length))
    else if (value.startsWith("unlike:")) Constraint.Like(value.substring("unlike:".length), negated = true)
    else if (value == "unique") Constraint.Unique()
    else if (value.startsWith("cluster")) {
      val tail = value.substring("cluster".length)
      val cluster = if (tail.startsWith(":")) Some(tail.substring(1)) else None
      Cluster(cluster)
    } else if (value.startsWith("groupBy")) {
      val tail = value.substring("groupBy".length)
      val groups = if (tail.startsWith(":")) Try(tail.substring(1).toInt).toOption.getOrElse(throw new IllegalArgumentException(s"invalid condition $value"))
      else 1

      GroupBy(groups)
    }
    else throw new IllegalArgumentException(s"Unsupported condition: $value")
  }

  def parse(constraints: String): Map[String, List[Constraint]] = {
    Util.parseList(constraints).foldLeft[Map[String, List[Constraint]]](Map()) { case (all, (name, value)) =>
      all.get(name) match {
        case Some(values) => all.updated(name, Constraint(value) :: values)
        case None => all.updated(name, List(Constraint(value)))
      }
    }
  }

  case class Like(regex: String, negated: Boolean = false) extends Constraint {
    val pattern = try {
      Pattern.compile(s"^$regex$$")
    } catch {
      case e: PatternSyntaxException => throw new IllegalArgumentException(s"Invalid $name: ${e.getMessage}")
    }

    private def name: String = if (negated) "unlike" else "like"

    def matches(value: String, values: List[String]): Boolean = negated ^ pattern.matcher(value).find()

    override def toString: String = s"$name:$regex"
  }

  case class Unique() extends Constraint {
    def matches(value: String, values: List[String]): Boolean = !values.contains(value)

    override def toString: String = "unique"
  }

  case class Cluster(value: Option[String]) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = this.value match {
      case Some(v) => v == value
      case None => values.isEmpty || values.head == value
    }

    override def toString: String = "cluster" + value.map(":" + _).getOrElse("")
  }

  case class GroupBy(groups: Int = 1) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = {
      val counts = values.groupBy(identity).mapValues(_.size)
      if (counts.size < groups) !counts.contains(value)
      else {
        val minCount = counts.values.reduceOption(_ min _).getOrElse(0)
        counts.getOrElse(value, 0) == minCount
      }
    }

    override def toString: String = "groupBy" + (if (groups > 1) s":$groups" else "")
  }

}

Source File: TypeScopeNameObjectNameFactory.scala From scala-server-toolkit with MIT License

5 votes

package com.avast.sst.micrometer.jmx

import java.util
import java.util.regex.Pattern

import cats.syntax.either._
import com.codahale.metrics.jmx.{DefaultObjectNameFactory, ObjectNameFactory}
import javax.management.ObjectName


      val map = new java.util.LinkedHashMap[String, String](parts.length)
      val properties = new java.util.Hashtable[String, String](parts.length) {
        override def entrySet(): util.Set[util.Map.Entry[String, String]] = map.entrySet()
      }

      parts.zip(partNames).foreach {
        case (part, partName) =>
          val quoted = quote(part)
          properties.put(partName, quoted)
          map.put(partName, quoted)
      }

      new ObjectName(domain, properties)
    }

  private def quote(objectName: String) = objectName.replaceAll("[\\Q.?*\"\\E]", "_")

}

Source File: Configuration.scala From tapir with Apache License 2.0

5 votes

package sttp.tapir.generic

import java.util.regex.Pattern

final case class Configuration(toLowLevelName: String => String) {
  def withSnakeCaseMemberNames: Configuration =
    copy(
      toLowLevelName = Configuration.snakeCaseTransformation
    )

  def withKebabCaseMemberNames: Configuration =
    copy(
      toLowLevelName = Configuration.kebabCaseTransformation
    )
}

object Configuration {
  implicit val default: Configuration = Configuration(Predef.identity)

  private val basePattern: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val swapPattern: Pattern = Pattern.compile("([a-z\\d])([A-Z])")

  private val snakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toLowerCase
  }

  private val kebabCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1-$2")
    swapPattern.matcher(partial).replaceAll("$1-$2").toLowerCase
  }
}

Source File: ColumnMetadataTest.scala From spark-vector with Apache License 2.0

5 votes

package com.actian.spark_vector.vector

import java.util.regex.Pattern

import org.apache.spark.sql.types.DecimalType

import org.scalacheck.Arbitrary.arbitrary
import org.scalacheck.Gen
import org.scalacheck.Gen.{choose, identifier}
import org.scalacheck.Prop.{forAll, propBoolean}
import org.scalatest.{FunSuite, Matchers}

import com.actian.spark_vector.test.tags.RandomizedTest

class ColumnMetadataTest extends FunSuite with Matchers {
  // Generate random column metadata and ensure the resultant StructField's are valid
  test("generated", RandomizedTest) {
    forAll(columnMetadataGen)(colMD => {
      assertColumnMetadata(colMD)
    }).check
  }

  val milliSecsPattern = Pattern.compile(".*\\.(S*)")

  def assertColumnMetadata(columnMD: ColumnMetadata): Boolean = {
    val structField = columnMD.structField
    structField.dataType match {
      // For decimal type, ensure the scale and precision match
      case decType: DecimalType =>
        decType.precision should be(columnMD.precision)
        decType.scale should be(columnMD.scale)
      case _ =>
    }
    true
  }

  val columnMetadataGen: Gen[ColumnMetadata] =
    for {
      name <- identifier
      typeName <- VectorTypeGen.vectorJdbcTypeGen
      nullable <- arbitrary[Boolean]
      precision <- choose(0, 20)
      scale <- choose(0, Math.min(20, precision))
    } yield ColumnMetadata(name, typeName, nullable, precision, scale)
}

Source File: BigQueryPartitionUtil.scala From scio with Apache License 2.0

5 votes

package com.spotify.scio.bigquery

import java.util.regex.Pattern

import com.google.api.services.bigquery.model.TableReference
import com.spotify.scio.bigquery.client.BigQuery
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers

import scala.util.Try

private[bigquery] object BigQueryPartitionUtil {
  // Ported from com.google.cloud.dataflow.sdk.io.BigQueryHelpers

  private[this] val PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]{4,61}[a-z0-9]"
  private[this] val DATASET_REGEXP = "[-\\w.]{1,1024}"
  private[this] val TABLE_REGEXP = "[-\\w$@]{1,1024}($LATEST)?"
  private[this] val DATASET_TABLE_REGEXP_LEGACY =
    s"((?<PROJECT>$PROJECT_ID_REGEXP):)?(?<DATASET>$DATASET_REGEXP)\\.(?<TABLE>$TABLE_REGEXP)"
  private[this] val DATASET_TABLE_REGEXP_STANDARD =
    s"((?<PROJECT>$PROJECT_ID_REGEXP).)?(?<DATASET>$DATASET_REGEXP)\\.(?<TABLE>$TABLE_REGEXP)"
  private[this] val QUERY_TABLE_SPEC_LEGACY =
    Pattern.compile(s"(?<=\\[)$DATASET_TABLE_REGEXP_LEGACY(?=\\])")
  private[this] val QUERY_TABLE_SPEC_STANDARD =
    Pattern.compile(s"(?<=\\`)$DATASET_TABLE_REGEXP_STANDARD(?=\\`)")

  private def extractTables(sqlQuery: String): Map[String, TableReference] = {
    val b = Map.newBuilder[String, TableReference]
    val m1 = QUERY_TABLE_SPEC_LEGACY.matcher(sqlQuery)
    while (m1.find()) {
      val t = m1.group(0)
      b += (s"[$t]" -> BigQueryHelpers.parseTableSpec(t))
    }
    val m2 = QUERY_TABLE_SPEC_STANDARD.matcher(sqlQuery)
    while (m2.find()) {
      val t = m2.group(0)
      b += (s"`$t`" -> BigQueryHelpers.parseTableSpec(t.replaceFirst("\\.", ":")))
    }
    b.result()
  }

  private def getPartitions(bq: BigQuery, tableRef: TableReference): Set[String] = {
    val prefix = tableRef.getTableId.split('$')(0)
    bq.tables
      .tableReferences(tableRef.getProjectId, tableRef.getDatasetId)
      .filter(_.getTableId.startsWith(prefix))
      .map(_.getTableId.substring(prefix.length))
      .toSet
      // get all table with prefix and filter only the day/date partitioned tables. Current
      // format for date partition is YYYYMMDD, thus all numeric.
      .filter(e => Try(e.toLong).isSuccess)
  }

  def latestQuery(bq: BigQuery, sqlQuery: String): String = {
    val tables =
      extractTables(sqlQuery).filter(_._2.getTableId.endsWith("$LATEST"))
    if (tables.isEmpty) {
      sqlQuery
    } else {
      val overlaps = tables
        .map(t => getPartitions(bq, t._2))
        .reduce(_ intersect _)
      require(
        overlaps.nonEmpty,
        "Cannot find latest common partition for " + tables.keys.mkString(", ")
      )
      val latest = overlaps.max
      tables.foldLeft(sqlQuery) {
        case (q, (spec, _)) =>
          q.replace(spec, spec.replace("$LATEST", latest))
      }
    }
  }

  def latestTable(bq: BigQuery, tableSpec: String): String = {
    val ref = BigQueryHelpers.parseTableSpec(tableSpec)
    if (ref.getTableId.endsWith("$LATEST")) {
      val partitions = getPartitions(bq, ref)
      require(partitions.nonEmpty, s"Cannot find latest partition for $tableSpec")
      tableSpec.replace("$LATEST", partitions.max)
    } else {
      tableSpec
    }
  }
}

Source File: CompilationErrorParser.scala From scala-clippy with Apache License 2.0

5 votes

package com.softwaremill.clippy

import java.util.regex.Pattern

object CompilationErrorParser {
  private val FoundRegexp            = """found\s*:\s*([^\n]+)""".r
  private val RequiredPrefixRegexp   = """required\s*:""".r
  private val AfterRequiredRegexp    = """required\s*:\s*([^\n]+)""".r
  private val WhichExpandsToRegexp   = """\s*\(which expands to\)\s*([^\n]+)""".r
  private val NotFoundRegexp         = """not found\s*:\s*([^\n]+)""".r
  private val NotAMemberRegexp       = """:?\s*([^\n:]+) is not a member of""".r
  private val NotAMemberOfRegexp     = """is not a member of\s*([^\n]+)""".r
  private val ImplicitNotFoundRegexp = """could not find implicit value for parameter\s*([^:]+):\s*([^\n]+)""".r
  private val DivergingImplicitExpansionRegexp =
    """diverging implicit expansion for type\s*([^\s]+)\s*.*\s*starting with method\s*([^\s]+)\s*in\s*([^\n]+)""".r
  private val TypeArgumentsDoNotConformToOverloadedBoundsRegexp =
    """type arguments \[([^\]]+)\] conform to the bounds of none of the overloaded alternatives of\s*([^:\n]+)[^:]*: ([^\n]+)""".r
  private val TypeclassNotFoundRegexp = """No implicit (.*) defined for ([^\n]+)""".r

  def parse(e: String): Option[CompilationError[ExactT]] = {
    val error = e.replaceAll(Pattern.quote("[error]"), "")
    if (error.contains("type mismatch")) {
      RequiredPrefixRegexp.split(error).toList match {
        case List(beforeReq, afterReq) =>
          for {
            found <- FoundRegexp.findFirstMatchIn(beforeReq)
            foundExpandsTo = WhichExpandsToRegexp.findFirstMatchIn(beforeReq)
            required <- AfterRequiredRegexp.findFirstMatchIn(error)
            requiredExpandsTo = WhichExpandsToRegexp.findFirstMatchIn(afterReq)
          } yield {
            val notes = requiredExpandsTo match {
              case Some(et) => getNotesFromIndex(afterReq, et.end)
              case None     => getNotesFromIndex(error, required.end)
            }

            TypeMismatchError[ExactT](
              ExactT(found.group(1)),
              foundExpandsTo.map(m => ExactT(m.group(1))),
              ExactT(required.group(1)),
              requiredExpandsTo.map(m => ExactT(m.group(1))),
              notes
            )
          }

        case _ =>
          None
      }
    } else if (error.contains("not found")) {
      for {
        what <- NotFoundRegexp.findFirstMatchIn(error)
      } yield NotFoundError[ExactT](ExactT(what.group(1)))
    } else if (error.contains("is not a member of")) {
      for {
        what         <- NotAMemberRegexp.findFirstMatchIn(error)
        notAMemberOf <- NotAMemberOfRegexp.findFirstMatchIn(error)
      } yield NotAMemberError[ExactT](ExactT(what.group(1)), ExactT(notAMemberOf.group(1)))
    } else if (error.contains("could not find implicit value for parameter")) {
      for {
        inf <- ImplicitNotFoundRegexp.findFirstMatchIn(error)
      } yield ImplicitNotFoundError[ExactT](ExactT(inf.group(1)), ExactT(inf.group(2)))
    } else if (error.contains("diverging implicit expansion for type")) {
      for {
        inf <- DivergingImplicitExpansionRegexp.findFirstMatchIn(error)
      } yield DivergingImplicitExpansionError[ExactT](ExactT(inf.group(1)), ExactT(inf.group(2)), ExactT(inf.group(3)))
    } else if (error.contains("conform to the bounds of none of the overloaded alternatives")) {
      for {
        inf <- TypeArgumentsDoNotConformToOverloadedBoundsRegexp.findFirstMatchIn(error)
      } yield
        TypeArgumentsDoNotConformToOverloadedBoundsError[ExactT](
          ExactT(inf.group(1)),
          ExactT(inf.group(2)),
          inf.group(3).split(Pattern.quote(" <and> ")).toSet.map(ExactT.apply)
        )
    } else if (error.contains("No implicit")) {
      for {
        inf <- TypeclassNotFoundRegexp.findFirstMatchIn(error)
        group2 = inf.group(2)
      } yield
        TypeclassNotFoundError(
          ExactT(inf.group(1)),
          ExactT(if (group2.endsWith(".")) group2.substring(0, group2.length - 1) else group2)
        )
    } else None
  }

  private def getNotesFromIndex(msg: String, afterIdx: Int): Option[String] = {
    val fromIdx = afterIdx + 1
    if (msg.length >= fromIdx + 1) {
      val notes = msg.substring(fromIdx).trim
      if (notes == "") None else Some(notes)
    } else None
  }
}

Source File: Template.scala From scala-clippy with Apache License 2.0

5 votes

package com.softwaremill.clippy

import java.util.regex.Pattern

import scala.util.Try
import scala.util.matching.Regex

sealed trait Template {
  def v: String
}

case class ExactT(v: String) extends Template {
  override def toString = v
}

case class RegexT(v: String) extends Template {
  lazy val regex                  = Try(new Regex(v)).getOrElse(new Regex("^$"))
  def matches(e: ExactT): Boolean = regex.pattern.matcher(e.v).matches()
  override def toString           = v
}
object RegexT {

  
  def fromPattern(pattern: String): RegexT = {
    val regexp = pattern
      .split("\\*", -1)
      .map(el => if (el != "") Pattern.quote(el) else el)
      .flatMap(el => List(".*", el))
      .tail
      .filter(_.nonEmpty)
      .mkString("")

    RegexT.fromRegex(regexp)
  }

  def fromRegex(v: String): RegexT =
    new RegexT(v)

  def setMatches(rr: Set[RegexT], ee: Set[ExactT]): Boolean =
    if (rr.size != ee.size) false
    else {
      rr.toList.forall { r =>
        ee.exists(r.matches)
      }
    }
}

Source File: VwFeatureNormalizer.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.dataset.vw

import java.text.DecimalFormat
import java.util.regex.Pattern

class VwFeatureNormalizer extends (CharSequence => CharSequence) with java.io.Serializable {
    private[this] val lineRegex = Pattern.compile("\\|(\\w)\\s+([^\\|]+)")
    private[this] val namespaceRegex = ".+:(.+)".r
    private[this] val format = new DecimalFormat("0.00000")

    def apply(vwLine: CharSequence): CharSequence = {
        val matcher = lineRegex.matcher(vwLine)
        val sb = new StringBuffer
        while(matcher.find) {
            matcher.appendReplacement(sb, "|" + matcher.group(1) + ":" + format.format(normalizeNamespace(matcher.group(2))) + " " + matcher.group(2))
        }
        matcher.appendTail(sb)
        sb
    }

    private[this] def normalizeNamespace(namespace: String): Double = {
        var sum = 0d
        namespace.split("\\s+").foreach {
            case namespaceRegex(w) =>
                val currentWeight = w.toDouble
                sum += currentWeight * currentWeight
            case _ => sum += 1
        }
        if (sum == 0) 0
        else 1.0 / math.sqrt(sum)
    }
}

object VwFeatureNormalizer {
    val instance = new VwFeatureNormalizer
}

Source File: Constraint.scala From zipkin-mesos-framework with Apache License 2.0

5 votes

package net.elodina.mesos.zipkin.components

import java.util.regex.{Pattern, PatternSyntaxException}

import net.elodina.mesos.zipkin.utils.Util

import scala.util.Try

trait Constraint {
  def matches(value: String, values: List[String] = Nil): Boolean
}

object Constraint {
  def apply(value: String): Constraint = {
    if (value.startsWith("like:")) Constraint.Like(value.substring("like:".length))
    else if (value.startsWith("unlike:")) Constraint.Like(value.substring("unlike:".length), negated = true)
    else if (value == "unique") Constraint.Unique()
    else if (value.startsWith("cluster")) {
      val tail = value.substring("cluster".length)
      val cluster = if (tail.startsWith(":")) Some(tail.substring(1)) else None
      Cluster(cluster)
    } else if (value.startsWith("groupBy")) {
      val tail = value.substring("groupBy".length)
      val groups = if (tail.startsWith(":")) Try(tail.substring(1).toInt).toOption.getOrElse(throw new IllegalArgumentException(s"invalid condition $value"))
      else 1

      GroupBy(groups)
    }
    else throw new IllegalArgumentException(s"Unsupported condition: $value")
  }

  def parse(constraints: String): Map[String, List[Constraint]] = {
    Util.parseList(constraints).foldLeft[Map[String, List[Constraint]]](Map()) { case (all, (name, value)) =>
      all.get(name) match {
        case Some(values) => all.updated(name, Constraint(value) :: values)
        case None => all.updated(name, List(Constraint(value)))
      }
    }
  }

  case class Like(regex: String, negated: Boolean = false) extends Constraint {
    val pattern = try {
      Pattern.compile(s"^$regex$$")
    } catch {
      case e: PatternSyntaxException => throw new IllegalArgumentException(s"Invalid $name: ${e.getMessage}")
    }

    private def name: String = if (negated) "unlike" else "like"

    def matches(value: String, values: List[String]): Boolean = negated ^ pattern.matcher(value).find()

    override def toString: String = s"$name:$regex"
  }

  case class Unique() extends Constraint {
    def matches(value: String, values: List[String]): Boolean = !values.contains(value)

    override def toString: String = "unique"
  }

  case class Cluster(value: Option[String]) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = this.value match {
      case Some(v) => v == value
      case None => values.isEmpty || values.head == value
    }

    override def toString: String = "cluster" + value.map(":" + _).getOrElse("")
  }

  case class GroupBy(groups: Int = 1) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = {
      val counts = values.groupBy(identity).mapValues(_.size)
      if (counts.size < groups) !counts.contains(value)
      else {
        val minCount = counts.values.reduceOption(_ min _).getOrElse(0)
        counts.getOrElse(value, 0) == minCount
      }
    }

    override def toString: String = "groupBy" + (if (groups > 1) s":$groups" else "")
  }

}

Source File: I18NProcessor.scala From ez-framework with Apache License 2.0

5 votes

package com.ecfront.ez.framework.core.i18n

import java.io.File
import java.util.regex.Pattern

import com.ecfront.common.Resp
import com.ecfront.ez.framework.core.EZ
import com.ecfront.ez.framework.core.logger.Logging

import scala.io.Source


  def setLanguage(_language: String): Unit = {
    EZ.Info.language = _language
  }

  private val tabR = "\t"

  def process(resp: Resp[_]): Unit = {
    if (resp.message != null && resp.message.nonEmpty) {
      resp.message = i18n(resp.message.replaceAll(tabR, " "))
    }
  }

  def i18n(str: String): String = {
    var newStr = str
    i18nInfo.find(_._1.matcher(str).matches()).foreach {
      matchedItem =>
        val matcher = matchedItem._1.matcher(str)
        newStr = matcher.replaceAll(matchedItem._2(EZ.Info.language))
    }
    newStr
  }

  implicit class Impl(val str: String) {
    def x: String = i18n(str)
  }

}

Source File: OS.scala From uap-scala with Do What The F*ck You Want To Public License

5 votes

package org.uaparser.scala

import MatcherOps._
import java.util.regex.{ Matcher, Pattern }
import scala.util.control.Exception.allCatch

case class OS(family: String, major: Option[String] = None, minor: Option[String] = None, patch: Option[String] = None,
              patchMinor: Option[String] = None)

object OS {
  private[scala] def fromMap(m: Map[String, String]) = m.get("family").map { family =>
    OS(family, m.get("major"), m.get("minor"), m.get("patch"), m.get("patch_minor"))
  }

  private[this] val quotedBack1: Pattern = Pattern.compile(s"(${Pattern.quote("$1")})")

  private[this] def replacementBack1(matcher: Matcher)(replacement: String): String =
    if (matcher.groupCount() >= 1) {
      quotedBack1.matcher(replacement).replaceAll(matcher.group(1))
    } else replacement

  private[this] def replaceBackreference(matcher: Matcher)(replacement: String): Option[String] =
    getBackreferenceGroup(replacement) match {
      case Some(group) => matcher.groupAt(group)
      case None        => Some(replacement)
    }

  private[this] def getBackreferenceGroup(replacement: String): Option[Int] =
    for {
      ref <- Option(replacement).filter(_.contains("$"))
      groupOpt = allCatch opt ref.substring(1).toInt
      group <- groupOpt
    } yield group


  private[scala] case class OSPattern(
    pattern: Pattern,
    osReplacement: Option[String],
    v1Replacement: Option[String],
    v2Replacement: Option[String],
    v3Replacement: Option[String],
    v4Replacement: Option[String]
  ) {
    def process(agent: String): Option[OS] = {
      val matcher = pattern.matcher(agent)
      if (!matcher.find()) None else {
        osReplacement
          .map(replacementBack1(matcher))
          .orElse(matcher.groupAt(1)).map { family =>
            val major = v1Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(2))
            val minor = v2Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(3))
            val patch = v3Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(4))
            val patchMinor = v4Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(5))
            OS(family, major, minor, patch, patchMinor)
        }
      }
    }
  }

  private object OSPattern {
    def fromMap(m: Map[String, String]): Option[OSPattern] = m.get("regex").map { r =>
      OSPattern(Pattern.compile(r), m.get("os_replacement"), m.get("os_v1_replacement"), m.get("os_v2_replacement"),
        m.get("os_v3_replacement"), m.get("os_v4_replacement"))
    }
  }

  case class OSParser(patterns: List[OSPattern]) {
    def parse(agent: String): OS = patterns.foldLeft[Option[OS]](None) {
      case (None, pattern) => pattern.process(agent)
      case (result, _) => result
    }.getOrElse(OS("Other"))
  }

  object OSParser {
    def fromList(config: List[Map[String, String]]): OSParser = OSParser(config.map(OSPattern.fromMap).flatten)
  }
}

Source File: ByteUnit.scala From aloha with Apache License 2.0

5 votes

package me.jrwang.aloha.common.util

import java.util.regex.Pattern

sealed abstract class ByteUnit(val multiplier: Long) {

  // Interpret the provided number (d) with suffix (u) as this unit type.
  // E.g. KiB.interpret(1, MiB) interprets 1MiB as its KiB representation = 1024k
  def convertFrom(d: Long, u: ByteUnit): Long = {
    u.convertTo(d, this)
  }

  // Convert the provided number (d) interpreted as this unit type to unit type (u).
  def convertTo(d: Long, u: ByteUnit): Long = {
    if (multiplier > u.multiplier) {
      val ratio = multiplier / u.multiplier
      if (Long.MaxValue / ratio < d) {
        throw new IllegalArgumentException(s"Conversion of $d exceeds Long.MAX_VALUE in ${name()}. Try a larger unit (e.g. MiB instead of KiB)")
      }
      d * ratio
    } else {
      // Perform operations in this order to avoid potential overflow
      // when computing d * multiplier
      d / (u.multiplier / multiplier)
    }
  }

  def name(): String
}

object ByteUnit {
  case object BYTE extends ByteUnit(1L) {
    override def name(): String = "BYTE"
  }

  case object KiB extends ByteUnit(1L << 10) {
    override def name(): String = "KB"
  }

  case object MiB extends ByteUnit(1L << 20) {
    override def name(): String = "MB"
  }

  case object GiB extends ByteUnit(1L << 30) {
    override def name(): String = "GB"
  }

  case object TiB extends ByteUnit(1L << 40) {
    override def name(): String = "TB"
  }

  case object PiB extends ByteUnit(1L << 50) {
    override def name(): String = "PB"
  }
}


object ByteUtils {
  private val byteSuffixes =
    Map(
      "b" -> ByteUnit.BYTE,
      "k" -> ByteUnit.KiB,
      "kb" -> ByteUnit.KiB,
      "m" -> ByteUnit.MiB,
      "mb" -> ByteUnit.MiB,
      "g" -> ByteUnit.GiB,
      "gb" -> ByteUnit.GiB,
      "t" -> ByteUnit.TiB,
      "tb" -> ByteUnit.TiB,
      "p" -> ByteUnit.PiB,
      "pb" -> ByteUnit.PiB
    )


  
  def byteStringAsGb(str: String): Long = {
    byteStringAs(str, ByteUnit.GiB)
  }
}

Source File: TimeUtils.scala From aloha with Apache License 2.0

5 votes

package me.jrwang.aloha.common.util

import java.util.concurrent.TimeUnit
import java.util.regex.Pattern

object TimeUtils {
  private val timeSuffixes =
    Map(
      "us" -> TimeUnit.MICROSECONDS,
      "ms" -> TimeUnit.MILLISECONDS,
      "s" -> TimeUnit.SECONDS,
      "m" -> TimeUnit.MINUTES,
      "min" -> TimeUnit.MINUTES,
      "h" -> TimeUnit.HOURS,
      "d" -> TimeUnit.DAYS
    )

  
  def timeStringAsSeconds(str: String): Long =
    timeStringAs(str, TimeUnit.SECONDS)
}

Source File: OutputTransformer.scala From play-swagger with Apache License 2.0

5 votes

package com.iheart.playSwagger

import java.util.regex.Pattern

import com.iheart.playSwagger.OutputTransformer.SimpleOutputTransformer
import play.api.libs.json.{ JsArray, JsString, JsValue, JsObject }

import scala.util.matching.Regex
import scala.util.{ Success, Failure, Try }


  def >=>(b: JsObject ⇒ Try[JsObject]): OutputTransformer = SimpleOutputTransformer { value: JsObject ⇒
    this.apply(value).flatMap(b)
  }
}

object OutputTransformer {
  final case class SimpleOutputTransformer(run: (JsObject ⇒ Try[JsObject])) extends OutputTransformer {
    override def apply(value: JsObject): Try[JsObject] = run(value)
  }

  def traverseTransformer(vals: JsArray)(transformer: JsValue ⇒ Try[JsValue]): Try[JsArray] = {
    val tryElements = vals.value.map {
      case value: JsObject ⇒ traverseTransformer(value)(transformer)
      case value: JsArray  ⇒ traverseTransformer(value)(transformer)
      case value: JsValue  ⇒ transformer(value)
    }

    val failures: Seq[Failure[JsValue]] = tryElements.filter(_.isInstanceOf[Failure[_]]).asInstanceOf[Seq[Failure[JsValue]]]
    if (failures.nonEmpty) {
      Failure(failures.head.exception)
    } else {
      Success(JsArray(tryElements.asInstanceOf[Seq[Success[JsValue]]].map(_.value)))
    }
  }

  def traverseTransformer(obj: JsObject)(transformer: JsValue ⇒ Try[JsValue]): Try[JsObject] = {
    val tryFields = obj.fields.map {
      case (key, value: JsObject) ⇒ (key, traverseTransformer(value)(transformer))
      case (key, values: JsArray) ⇒ (key, traverseTransformer(values)(transformer))
      case (key, value: JsValue)  ⇒ (key, transformer(value))
    }
    val failures: Seq[(String, Failure[JsValue])] = tryFields
      .filter(_._2.isInstanceOf[Failure[_]])
      .asInstanceOf[Seq[(String, Failure[JsValue])]]
    if (failures.nonEmpty) {
      Failure(failures.head._2.exception)
    } else {
      Success(JsObject(tryFields.asInstanceOf[Seq[(String, Success[JsValue])]].map {
        case (key, Success(result)) ⇒ (key, result)
      }))
    }
  }
}

class PlaceholderVariablesTransformer(map: String ⇒ Option[String], pattern: Regex = "^\\$\\{(.*)\\}$".r) extends OutputTransformer {
  def apply(value: JsObject) = OutputTransformer.traverseTransformer(value) {
    case JsString(pattern(key)) ⇒ map(key) match {
      case Some(result) ⇒ Success(JsString(result))
      case None         ⇒ Failure(new IllegalStateException(s"Unable to find variable $key"))
    }
    case e: JsValue ⇒ Success(e)
  }
}

final case class MapVariablesTransformer(map: Map[String, String]) extends PlaceholderVariablesTransformer(map.get)
class EnvironmentVariablesTransformer extends PlaceholderVariablesTransformer((key: String) ⇒ Option(System.getenv(key)))

Source File: QuicklensRelease.scala From quicklens with Apache License 2.0

5 votes

import java.util.regex.Pattern

import sbt._
import sbtrelease.ReleasePlugin.autoImport._
import sbtrelease.ReleasePlugin.autoImport.ReleaseKeys._
import sbtrelease.ReleasePlugin.autoImport.ReleaseStep
import sbtrelease.ReleaseStateTransformations._

object QuicklensRelease {
  def steps: Seq[ReleaseStep] = Seq(
    checkSnapshotDependencies,
    inquireVersions,
    // publishing locally so that the pgp password prompt is displayed early
    // in the process
    releaseStepCommand("publishLocalSigned"),
    runClean,
    runTest,
    setReleaseVersion,
    updateVersionInReadme,
    commitReleaseVersion,
    tagRelease,
    publishArtifacts,
    setNextVersion,
    commitNextVersion,
    releaseStepCommand("sonatypeReleaseAll"),
    pushChanges
  )

  // based on https://github.com/EECOLOR/sbt-release-custom-steps/blob/master/src/main/scala/org/qirx/sbtrelease/UpdateVersionInFiles.scala
  private def updateVersionInReadme: ReleaseStep = { s: State =>
    val readmeFile = file("README.md")
    val readme = IO.read(readmeFile)

    val currentVersionPattern = """"com.softwaremill.quicklens" %% "quicklens" % "([\w\.-]+)"""".r
    val currentVersionInReadme = currentVersionPattern.findFirstMatchIn(readme).get.group(1)

    val releaseVersion = s.get(versions).get._1

    s.log.info(s"Replacing $currentVersionInReadme with $releaseVersion in ${readmeFile.name}")

    val newReadme = readme.replaceAll(Pattern.quote(currentVersionInReadme), releaseVersion)
    IO.write(readmeFile, newReadme)

    val settings = Project.extract(s)
    settings.get(releaseVcs).get.add(readmeFile.getAbsolutePath) !! s.log

    s
  }
}

Source File: StaxEncoder.scala From scalaz-deriving with GNU Lesser General Public License v3.0

5 votes

// Copyright: 2017 - 2020 Sam Halliday
// License: http://www.gnu.org/licenses/lgpl-3.0.en.html

package xmlformat
package stax

import java.io.StringWriter
import java.util.regex.Pattern

import javax.xml.stream.{ XMLOutputFactory, XMLStreamWriter }
import scalaz._, Scalaz._

import com.ctc.wstx.stax.WstxOutputFactory

object StaxEncoder {
  // must not escape the code in this module
  private[this] val factory = new ThreadLocal[XMLOutputFactory] {
    override def initialValue: WstxOutputFactory = {
      val f = new com.ctc.wstx.stax.WstxOutputFactory
      f.configureForSpeed()
      f.getConfig.doSupportNamespaces(false)
      f
    }
  }

  def encode(t: XTag): String = {
    val output = new StringWriter

    val x = factory.get.createXMLStreamWriter(output)
    x.writeStartDocument()
    writeTag(x, t, 0)
    x.writeEndDocument()
    output.toString()
  }

  private[this] def writeTag(x: XMLStreamWriter, t: XTag, level: Int): Unit = {
    x.writeCharacters("\n")
    x.writeCharacters(" " * 2 * level)
    x.writeStartElement(t.name)

    t.attrs.toList.foreach { a =>
      x.writeAttribute(a.name, a.value.text)
    }

    t.children.toList.foreach { c =>
      writeTag(x, c, level + 1)
    }

    t.body.toList.foreach { s =>
      if (t.children.nonEmpty) {
        x.writeCharacters("\n")
        x.writeCharacters(" " * 2 * (level + 1))
      }
      if (!containsXmlEntities(s.text))
        x.writeCharacters(s.text)
      else {
        val clean =
          if (!s.text.contains("]]>")) s.text
          else s.text.replace("]]>", "]]]]><![CDATA[>")
        x.writeCData(clean)
      }
    }

    if (t.children.nonEmpty) {
      x.writeCharacters("\n")
      x.writeCharacters(" " * 2 * level)
    }

    x.writeEndElement()
  }

  private[this] val entities                      = Pattern.compile("""("|&|'|<|>)""")
  def containsXmlEntities(input: String): Boolean =
    entities.matcher(input).find()

}

Source File: StringReplacement.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package com.gravity.goose.text



import java.util.regex.Pattern

object StringReplacement {
  def compile(pattern: String, replaceWith: String): StringReplacement = {
    if (string.isNullOrEmpty(pattern)) throw new IllegalArgumentException("Patterns must not be null or empty!")
    var p: Pattern = Pattern.compile(pattern)
    return new StringReplacement(p, replaceWith)
  }
}

class StringReplacement {
  private def this(pattern: Pattern, replaceWith: String) {
    this ()
    this.pattern = pattern
    this.replaceWith = replaceWith
  }

  def replaceAll(input: String): String = {
    if (string.isNullOrEmpty(input)) return string.empty
    return pattern.matcher(input).replaceAll(replaceWith)
  }

  private var pattern: Pattern = null
  private var replaceWith: String = null
}

Source File: ESSearchSanitizer.scala From recogito2 with Apache License 2.0

5 votes

package storage.es


  def sanitize(term: String): String = (
    escapeSpecialCharacters _ andThen
    escapeSetOperators andThen
    collapseWhiteSpaces andThen
    escapeOddQuote
  )(term)

  private def escapeSpecialCharacters(term: String): String = {
    val escapedCharacters = Pattern.quote("""\/+-&|!(){}[]^~*?:""")
    term.replaceAll(s"([$escapedCharacters])", "\\\\$1")
  }

  private def escapeSetOperators(term: String): String = {
    val operators = Set("AND", "OR", "NOT")
    operators.foldLeft(term) { case (accTerm, op) =>
      val escapedOp = escapeEachCharacter(op)
      accTerm.replaceAll(s"""\\b($op)\\b""", escapedOp)
    }
  }

  private def escapeEachCharacter(op: String): String =
    op.toCharArray.map(ch => s"""\\\\$ch""").mkString

  private def collapseWhiteSpaces(term: String): String = term.replaceAll("""\s+""", " ")

  private def escapeOddQuote(term: String): String = {
    if (term.count(_ == '"') % 2 == 1) term.replaceAll("""(.*)"(.*)""", """$1\\"$2""") else term
  }

}

Source File: LimitedSharingClassLoader.scala From polynote with Apache License 2.0

5 votes

package polynote.kernel.util

import java.net.URL
import java.util.regex.Pattern


// TODO: should resource loading be similarly altered?
class LimitedSharingClassLoader(
  shareRegex: String,
  urls: Seq[URL],
  parent: ClassLoader
) extends scala.reflect.internal.util.ScalaClassLoader.URLClassLoader(urls, parent) {

  private val share = Pattern.compile(shareRegex).asPredicate()

  override def loadClass(name: String, resolve: Boolean): Class[_] =  getClassLoadingLock(name).synchronized {
    val c = if (share.test(name)) {
      //System.err.println(s"Delegating class $name")
      try {
        super.loadClass(name, false)
      } catch {
        case err: ClassNotFoundException => findClass(name)
      }
    } else try {
      findClass(name)
    } catch {
      case _: ClassNotFoundException | _: LinkageError =>
        super.loadClass(name, resolve)
    }

    if (resolve) {
      resolveClass(c)
    }

    c
  }


}

Source File: OrcAcidUtil.scala From spark-acid with Apache License 2.0

5 votes

package com.qubole.shaded.hadoop.hive.ql.io.orc

import java.util.regex.Pattern

import com.qubole.shaded.hadoop.hive.ql.io.AcidUtils
import org.apache.hadoop.fs.Path

object OrcAcidUtil {
  val BUCKET_PATTERN = Pattern.compile("bucket_[0-9]{5}$")

  def getDeleteDeltaPaths(orcSplit: OrcSplit): Array[Path] = {
    assert(BUCKET_PATTERN.matcher(orcSplit.getPath.getName).matches())
    val bucket = AcidUtils.parseBucketId(orcSplit.getPath)
    assert(bucket != -1)
    val deleteDeltaDirPaths = VectorizedOrcAcidRowBatchReader.getDeleteDeltaDirsFromSplit(orcSplit);
    deleteDeltaDirPaths.map(deleteDir => AcidUtils.createBucketFile(deleteDir, bucket))
  }
}

Source File: TestFilter.scala From stryker4s with Apache License 2.0

5 votes

package stryker4s.config

import java.util.regex.Pattern
import scala.util.Try
import stryker4s.config.TestFilter.wildcardToRegex

class TestFilter(implicit config: Config) {

  val exclamationMark = "!"

  lazy val partition: Partition = config.testFilter.partition(_.startsWith(exclamationMark)) match {
    case (negative, positive) =>
      Partition(
        negative.map(p => Regex(wildcardToRegex(p.substring(1)))),
        positive.map(p => Regex(wildcardToRegex(p)))
      )
  }

  def filter(testName: String): Boolean = {
    def matches(regexSeq: Seq[Regex]): Boolean =
      regexSeq.foldLeft(false)((acc, regex) => acc || regex.matches(testName))

    if (matches(partition.negative))
      false
    else
      partition.positive.isEmpty || matches(partition.positive)
  }
}

case class Partition(negative: Seq[Regex], positive: Seq[Regex])

case class Regex(regex: String) {
  def matches(testName: String): Boolean = Try(Pattern.matches(regex, testName)).fold(_ => false, b => b)
}

object TestFilter {

  def wildcardToRegex(wildcard: String): String = s"^${wildcard.toList.map(convertChar).mkString}$$"

  def convertChar(c: Char): String =
    c match {
      case '*'                 => ".*"
      case '?'                 => "."
      case _ if isRegexChar(c) => s"\\${c.toString}"
      case c                   => c.toString
    }

  def isRegexChar(c: Char): Boolean =
    Seq('(', ')', '[', ']', '$', '^', '.', '{', '}', '|', '\\').foldLeft(false)((acc, elt) => acc || c == elt)
}

Source File: ImapResponses.scala From gatling-imap with GNU Affero General Public License v3.0

5 votes

package com.linagora.gatling.imap.protocol

import java.util.regex.Pattern

import com.sun.mail.imap.protocol.IMAPResponse

import scala.collection.immutable.Seq

case class ImapResponses(responses: Seq[IMAPResponse]) {

  import ImapResponses._

  def mkString(separator: String = ",") = {
    responses.mkString(separator)
  }

  def isBad = responses.lastOption.exists(_.isBAD)

  def isOk = responses.lastOption.exists(_.isOK)

  def isNo = responses.lastOption.exists(_.isNO)

  def countRecent: Option[Int] = {
    responses.map(_.toString).find(_.matches(Recent.regex))
      .map {
        case Recent(actual) => actual.toInt
      }
  }

  def folderList: Seq[String] = {
    responses.map(_.toString).filter(_.matches(List.regex))
      .map {
        case List(name, null) => name
        case List(null, quotedName) => quotedName
      }
  }

  def uidList: Seq[Uid] = {
    responses.map(_.toString).filter(_.matches(UidRegex.regex))
      .map {
        case UidRegex(uid) => Uid(uid.toInt)
      }
  }

  def contains(content: String): Boolean =
    responses.map(_.toString).exists(_.contains(content))
}

object ImapResponses {
  val empty = ImapResponses(Seq.empty)

  private[this] val dotAllFlag = """(?s)"""
  private[this] val startWithStar = """^(?:(?:, )?\*)"""
  private[this] val mailboxName = """(?:"([^"]*)"|([^"\s]*))"""

  private val Recent = (dotAllFlag + startWithStar + """ (\d+) RECENT\s*$""").r
  private val List = ("""^\* LIST .*? """ + mailboxName + """\s*$""").r
  private val UidRegex = (dotAllFlag + startWithStar + """ .*UID (\d+).*$""").r
}

Source File: ShouldNotTypecheck.scala From lagom with Apache License 2.0

5 votes

package com.lightbend.lagom.macrotestkit

import scala.language.experimental.macros
import java.util.regex.Pattern

import scala.reflect.macros.TypecheckException
import scala.reflect.macros.blackbox


object ShouldNotTypecheck {
  def apply(name: String, code: String): Unit = macro ShouldNotTypecheck.applyImplNoExp
  def apply(name: String, code: String, expected: String): Unit = macro ShouldNotTypecheck.applyImpl
}

final class ShouldNotTypecheck(val c: blackbox.Context) {
  import c.universe._

  def applyImplNoExp(name: Expr[String], code: Expr[String]): Expr[Unit] = applyImpl(name, code, c.Expr(EmptyTree))

  def applyImpl(name: Expr[String], code: Expr[String], expected: Expr[String]): Expr[Unit] = {
    val Expr(Literal(Constant(codeStr: String))) = code
    val Expr(Literal(Constant(nameStr: String))) = name
    val (expPat, expMsg) = expected.tree match {
      case EmptyTree => (Pattern.compile(".*"), "Expected some error.")
      case Literal(Constant(s: String)) =>
        (Pattern.compile(s, Pattern.CASE_INSENSITIVE), "Expected error matching: " + s)
    }

    try c.typecheck(c.parse("{ " + codeStr + " }"))
    catch {
      case e: TypecheckException =>
        val msg = e.getMessage
        if (!expPat.matcher(msg).matches) {
          c.abort(c.enclosingPosition, s"$nameStr failed in an unexpected way.\n$expMsg\nActual error: $msg")
        } else {
          println(s"$nameStr passed.")
          return reify(())
        }
    }

    c.abort(c.enclosingPosition, s"$nameStr succeeded unexpectedly.\n$expMsg")
  }
}

Source File: MetadataActor.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.serving.api.actor

import java.util.regex.Pattern

import akka.actor.Actor
import com.github.nscala_time.time.Imports.{DateTime, DateTimeFormat}
import com.stratio.sparta.serving.api.actor.MetadataActor.ExecuteBackup
import com.stratio.sparta.serving.api.actor.MetadataActor._
import com.stratio.sparta.serving.api.constants.HttpConstant
import com.stratio.sparta.serving.api.utils.{BackupRestoreUtils, FileActorUtils}
import com.stratio.sparta.serving.core.config.SpartaConfig
import com.stratio.sparta.serving.core.constants.AppConstant._
import com.stratio.sparta.serving.core.exception.ServingCoreException
import com.stratio.sparta.serving.core.helpers.InfoHelper
import com.stratio.sparta.serving.core.models.SpartaSerializer
import com.stratio.sparta.serving.core.models.files.{BackupRequest, SpartaFilesResponse}
import spray.http.BodyPart
import spray.httpx.Json4sJacksonSupport

import scala.util.{Failure, Success, Try}

class MetadataActor extends Actor with Json4sJacksonSupport with BackupRestoreUtils with SpartaSerializer
  with FileActorUtils {

  //The dir where the backups will be saved
  val targetDir = Try(SpartaConfig.getDetailConfig.get.getString(BackupsLocation)).getOrElse(DefaultBackupsLocation)
  override val apiPath = HttpConstant.MetadataPath
  override val patternFileName = Option(Pattern.compile(""".*\.json""").asPredicate())

  //The dir where the jars will be saved
  val zkConfig = Try(SpartaConfig.getZookeeperConfig.get)
    .getOrElse(throw new ServingCoreException("Zookeeper configuration is mandatory"))
  override val uri = Try(zkConfig.getString("connectionString")).getOrElse(DefaultZKConnection)
  override val connectionTimeout = Try(zkConfig.getInt("connectionTimeout")).getOrElse(DefaultZKConnectionTimeout)
  override val sessionTimeout = Try(zkConfig.getInt("sessionTimeout")).getOrElse(DefaultZKSessionTimeout)

  override def receive: Receive = {
    case UploadBackups(files) => if (files.isEmpty) errorResponse() else uploadBackups(files)
    case ListBackups => browseBackups()
    case BuildBackup => buildBackup()
    case DeleteBackups => deleteBackups()
    case CleanMetadata => cleanMetadata()
    case DeleteBackup(fileName) => deleteBackup(fileName)
    case ExecuteBackup(backupRequest) => executeBackup(backupRequest)
    case _ => log.info("Unrecognized message in Backup/Restore Actor")
  }

  def executeBackup(backupRequest: BackupRequest): Unit =
    sender ! BackupResponse(Try{
      importer("/", s"$targetDir/${backupRequest.fileName}", backupRequest.deleteAllBefore)
    })

  def errorResponse(): Unit =
    sender ! SpartaFilesResponse(Failure(new IllegalArgumentException(s"At least one file is expected")))

  def deleteBackups(): Unit = sender ! BackupResponse(deleteFiles())

  def cleanMetadata(): Unit = sender ! BackupResponse(Try(cleanZk(BaseZKPath)))

  def buildBackup(): Unit = {
    val format = DateTimeFormat.forPattern("yyyy-MM-dd-hh:mm:ss")
    val appInfo = InfoHelper.getAppInfo
    Try {
      dump(BaseZKPath, s"$targetDir/backup-${format.print(DateTime.now)}-${appInfo.pomVersion}.json")
    } match {
      case Success(_) =>
        sender ! SpartaFilesResponse(browseDirectory())
      case Failure(e) =>
        sender ! BackupResponse(Try(throw e))
    }
  }

  def deleteBackup(fileName: String): Unit = sender ! BackupResponse(deleteFile(fileName))

  def browseBackups(): Unit = sender ! SpartaFilesResponse(browseDirectory())

  def uploadBackups(files: Seq[BodyPart]): Unit = sender ! SpartaFilesResponse(uploadFiles(files))
}

object MetadataActor {

  case class UploadBackups(files: Seq[BodyPart])

  case class BackupResponse(status: Try[_])

  case class ExecuteBackup(backupRequest: BackupRequest)

  case object ListBackups

  case object BuildBackup

  case object DeleteBackups

  case object CleanMetadata

  case class DeleteBackup(fileName: String)

}

Source File: KStreamBuilderS.scala From kafka-streams-scala with Apache License 2.0

5 votes

package com.github.aseigneurin.kafka.streams.scala

import java.util.regex.Pattern

import com.github.aseigneurin.kafka.streams.scala.ImplicitConversions._
import org.apache.kafka.common.serialization.Serde
import org.apache.kafka.streams.kstream.{GlobalKTable, KStreamBuilder}
import org.apache.kafka.streams.processor.TopologyBuilder

object KStreamBuilderS {

  val inner = new KStreamBuilder

  def stream[K, V](topics: String*)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](keySerde, valSerde, topics: _*)

  def stream[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
                   topics: String*)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](offsetReset, keySerde, valSerde, topics: _*)

  def stream[K, V](topicPattern: Pattern)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](keySerde, valSerde, topicPattern)

  def stream[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
                   topicPattern: Pattern)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](offsetReset, keySerde, valSerde, topicPattern)

  def table[K, V](topic: String,
                  storeName: String)
                 (implicit keySerde: Serde[K], valSerde: Serde[V]): KTableS[K, V] =
    inner.table[K, V](keySerde, valSerde, topic, storeName)

  def table[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
                  topic: String,
                  storeName: String)
                 (implicit keySerde: Serde[K], valSerde: Serde[V]): KTableS[K, V] =
    inner.table[K, V](offsetReset, keySerde, valSerde, topic, storeName)


  def globalTable[K, V](topic: String,
                        storeName: String)
                       (implicit keySerde: Serde[K], valSerde: Serde[V]): GlobalKTable[K, V] =
    inner.globalTable(keySerde, valSerde, topic, storeName)

  def merge[K, V](streams: KStreamS[K, V]*): KStreamS[K, V] = {
    val streamsJ = streams.map { streamS => streamS.inner }
    inner.merge(streamsJ: _*)
  }

}

Source File: typechecking.scala From perf_tester with Apache License 2.0

5 votes

package shapeless.test

import scala.language.experimental.macros

import java.util.regex.Pattern

import scala.reflect.macros.{ whitebox, ParseException, TypecheckException }


object illTyped {
  def apply(code: String): Unit = macro IllTypedMacros.applyImplNoExp
  def apply(code: String, expected: String): Unit = macro IllTypedMacros.applyImpl
}

@macrocompat.bundle
class IllTypedMacros(val c: whitebox.Context) {
  import c.universe._

  def applyImplNoExp(code: Tree): Tree = applyImpl(code, null)

  def applyImpl(code: Tree, expected: Tree): Tree = {
    val Literal(Constant(codeStr: String)) = code
    val (expPat, expMsg) = expected match {
      case null => (null, "Expected some error.")
      case Literal(Constant(s: String)) =>
        (Pattern.compile(s, Pattern.CASE_INSENSITIVE | Pattern.DOTALL), "Expected error matching: "+s)
    }

    try {
      val dummy0 = TermName(c.freshName)
      val dummy1 = TermName(c.freshName)
      c.typecheck(c.parse(s"object $dummy0 { val $dummy1 = { $codeStr } }"))
      c.error(c.enclosingPosition, "Type-checking succeeded unexpectedly.\n"+expMsg)
    } catch {
      case e: TypecheckException =>
        val msg = e.getMessage
        if((expected ne null) && !(expPat.matcher(msg)).matches)
          c.error(c.enclosingPosition, "Type-checking failed in an unexpected way.\n"+expMsg+"\nActual error: "+msg)
      case e: ParseException =>
        c.error(c.enclosingPosition, s"Parsing failed.\n${e.getMessage}")
    }

    q"()"
  }
}

Source File: Configuration.scala From circe-magnolia with Apache License 2.0

5 votes

package io.circe.magnolia.configured

import java.util.regex.Pattern


final case class Configuration(
  transformMemberNames: String => String,
  transformConstructorNames: String => String,
  useDefaults: Boolean,
  discriminator: Option[String]
) {
  def withSnakeCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.snakeCaseTransformation
  )

  def withKebabCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.kebabCaseTransformation
  )

  def withSnakeCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.snakeCaseTransformation
  )

  def withKebabCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.kebabCaseTransformation
  )

  def withDefaults: Configuration = copy(useDefaults = true)
  def withDiscriminator(discriminator: String): Configuration = copy(discriminator = Some(discriminator))
}

final object Configuration {

  val default: Configuration = Configuration(Predef.identity, Predef.identity, false, None)
  private val basePattern: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val swapPattern: Pattern = Pattern.compile("([a-z\\d])([A-Z])")

  val snakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toLowerCase
  }

  val kebabCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1-$2")
    swapPattern.matcher(partial).replaceAll("$1-$2").toLowerCase
  }
}

final object defaults {
  implicit val defaultGenericConfiguration: Configuration = Configuration.default
}

Source File: package.scala From tscfg with Apache License 2.0

5 votes

package tscfg

import java.util.regex.Pattern

import scala.util.control.NonFatal

package object codeDefs {
  private val beginTemplatePattern = Pattern.compile("\\s*//<([^>]+)>.*$")

  private val javaMap = getMap("codeDefs/JavaDefs.java")
  private val scalaMap = getMap("codeDefs/ScalaDefs.scala")

  def javaDef(key: String): String = getDef("java", javaMap, key)

  def scalaDef(key: String): String = getDef("scala", scalaMap, key)

  private def getDef(lang: String, map: Map[String, String], key: String): String = {
    try map(key)
    catch {
      // $COVERAGE-OFF$
      case NonFatal(e) =>
        val keys = map.keySet.toList.sorted
        val msg = s"Unexpected: undefined key '$key' for $lang. Defined keys: $keys. Please report this bug"
        throw new RuntimeException(msg, e)
      // $COVERAGE-ON$
    }
  }

  private def getMap(resourceName: String): Map[String, String] = try {
    //println(s"codeDefs.getMap $resourceName")
    val map = collection.mutable.HashMap[String, String]()
    val is = getClass.getClassLoader.getResourceAsStream(resourceName)
    assert(is != null)
    val source = io.Source.fromInputStream(is, "utf-8")
    var key: String = null
    val template = new StringBuilder
    for (line <- source.getLines()) {
      if (key == null) {
        val m = beginTemplatePattern.matcher(line)
        if (m.find) {
          key = m.group(1)
        }
      }
      else if (line.contains("//</" + key + ">")) {
        map.update(key, template.toString)
        key = null
        template.setLength(0)
      }
      else template.append(line).append("\n")
    }
    is.close()
    map.toMap
  }
  catch {
    case NonFatal(ex) =>
      throw new RuntimeException(
        s"Unexpected exception in getMap(resourceName=$resourceName)." +
        " Please report this bug.", ex)
  }
}

Source File: DebuggingWordCount.scala From beam-scala-examples with Apache License 2.0

5 votes

package org.apache.beam.examples

import java.util.regex.Pattern

import org.apache.beam.sdk.Pipeline
import org.apache.beam.sdk.io.TextIO
import org.apache.beam.sdk.metrics.Metrics
import org.apache.beam.sdk.options.{Default, Description, PipelineOptionsFactory}
import org.apache.beam.sdk.transforms.DoFn.ProcessElement
import org.apache.beam.sdk.transforms.{Count, DoFn, MapElements, ParDo}
import org.apache.beam.sdk.values.KV
import org.slf4j.LoggerFactory

object DebuggingWordCount {

  def main(args: Array[String]): Unit = {
    val options = PipelineOptionsFactory
      .fromArgs(args: _*)
      .withValidation()
      .as(classOf[DebuggingWordCountOptions])

    val pipeline = Pipeline.create(options)

    pipeline.apply("ReadFiles", TextIO.read().from(options.getInputFile))
      .apply(ParDo.of(new ExtractWords))
      .apply(Count.perElement())
      .apply(ParDo.of(new FilterTextFn(options.getFilterPattern)))
      .apply(MapElements.via(new FormatResult))
      .apply("WriteWords", TextIO.write().to(options.getOutput))

    pipeline.run().waitUntilFinish()
  }
}

// ======================================== UDFs ===============================================

class FilterTextFn(pattern: String) extends DoFn[KV[String, java.lang.Long], KV[String, java.lang.Long]] {
  private val logger = LoggerFactory.getLogger(getClass)
  lazy val filter = Pattern.compile(pattern)
  lazy val matchedWords = Metrics.counter(classOf[FilterTextFn], "matchedWords")
  lazy val unmatchedWords = Metrics.counter(classOf[FilterTextFn], "unmatchedWords")

  @ProcessElement
  def processElement(c: ProcessContext): Unit = {
    filter.matcher(c.element.getKey).matches() match {
      case true => logger.debug("Matched: " + c.element.getKey)
        matchedWords.inc()
        c.output(c.element)
      case false => logger.trace("Did not match: " + c.element.getKey)
        unmatchedWords.inc()
    }
  }
}

// ======================================= Options ==============================================

trait DebuggingWordCountOptions extends WordCountOptions {
  @Description("Regex filter pattern to use in DebuggingWordCount. Only words matching this pattern will be counted.")
  @Default.String("Flourish|stomach")
  def getFilterPattern: String
  def setFilterPattern(value: String): Unit
}

Source File: YamlProjectOperationInfoParser.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.project.common.yaml

import java.util.regex.{Pattern, PatternSyntaxException}

import com.atomist.param._
import com.atomist.project.common.template.{InvalidTemplateException, TemplateBasedProjectOperationInfo}
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
import org.apache.commons.lang3.builder.ReflectionToStringBuilder

import scala.util.{Failure, Success, Try}


object YamlProjectOperationInfoParser {

  private val mapper = new ObjectMapper(new YAMLFactory()) with ScalaObjectMapper
  mapper.registerModule(DefaultScalaModule)

  @throws[InvalidYamlDescriptorException]
  def parse(yaml: String): TemplateBasedProjectOperationInfo = {
    if (yaml == null || "".equals(yaml))
      throw new InvalidYamlDescriptorException("YAML content required in template metadata file")

    Try(mapper.readValue(yaml, classOf[BoundProjectOperationInfo])) match {
      case s: Success[BoundProjectOperationInfo] =>
        val badPatterns = s.value.parameters.flatMap(p => patternError(p))
        if (badPatterns.nonEmpty)
          throw new InvalidYamlDescriptorException(s"Bad regexp patterns: ${badPatterns.mkString(",")}")
        s.value
      case f: Failure[BoundProjectOperationInfo] =>
        throw new InvalidYamlDescriptorException(s"Failed to parse YAML [$yaml]: ${f.exception.getMessage}", f.exception)
    }
  }

  private def patternError(p: Parameter): Option[String] = {
    try {
      Pattern.compile(p.getPattern)
      None
    } catch {
      case pse: PatternSyntaxException => Some(s"${p.getName}: Bad regular expression pattern: ${pse.getMessage}")
    }
  }
}

private class BoundProjectOperationInfo extends TemplateBasedProjectOperationInfo {

  @JsonProperty("name")
  var name: String = _

  @JsonProperty("description")
  var description: String = _

  @JsonProperty("template_name")
  var templateName: String = _

  @JsonProperty("type")
  var _templateType: String = _

  override def templateType: Option[String] =
    if (_templateType == null || "".equals(_templateType)) None
    else Some(_templateType)

  @JsonProperty("parameters")
  private var _params: Seq[Parameter] = Nil

  @JsonProperty("tags")
  private var _tags: Seq[TagHolder] = Nil

  override def parameters: Seq[Parameter] = _params

  override def tags: Seq[Tag] = _tags.map(tw => tw.toTag)

  override def toString = ReflectionToStringBuilder.toString(this)
}

private class TagHolder {

  @JsonProperty
  var name: String = _

  @JsonProperty
  var description: String = _

  def toTag = Tag(name, description)
}

class InvalidYamlDescriptorException(msg: String, ex: Throwable = null) extends InvalidTemplateException(msg, ex)

Source File: DynamicConfigurationUtils.scala From maha with Apache License 2.0

5 votes

// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.service.config.dynamic

import java.util.regex.Pattern

import grizzled.slf4j.Logging
import org.json4s.JsonAST.JString
import org.json4s.{JField, JValue}

import scala.collection.mutable

object DynamicConfigurationUtils extends Logging {
  private val START = Pattern.quote("<%(")
  private val END = Pattern.quote(")%>")
  val DYNAMIC_CONFIG_PATTERN = Pattern.compile(s"$START(.*),(.*)$END")

  def extractDynamicFields(json: JValue): Map[String, (String, String)] = {
    val dynamicFieldMap = new mutable.HashMap[String, (String, String)]()
    val dynamicFields = getDynamicFields(json)
    dynamicFields.foreach(f => {
      require(f._2.isInstanceOf[JString], s"Cannot extract dynamic property from non-string field: $f")
      implicit val formats = org.json4s.DefaultFormats
      val matcher = DYNAMIC_CONFIG_PATTERN.matcher(f._2.extract[String])
      require(matcher.find(), s"Field does not contain dynamic property $f. Pattern - $DYNAMIC_CONFIG_PATTERN")
      require(matcher.groupCount() == 2, s"Expected name and default value in dynamic property field: $f")
      val propertyKey = matcher.group(1).trim
      val defaultValue = matcher.group(2).trim
      dynamicFieldMap.put(propertyKey, (f._1, defaultValue))
    })
    dynamicFieldMap.toMap
  }

  def getDynamicFields(json: JValue): List[JField] = {
    implicit val formats = org.json4s.DefaultFormats
     json.filterField(_._2 match {
      case JString(s) => {
        DYNAMIC_CONFIG_PATTERN.matcher(s).find()
      }
      case a => false
    })
  }
}

Source File: DataSourceV2Utils.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.v2

import java.util.regex.Pattern

import org.apache.spark.internal.Logging
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sources.v2.{DataSourceV2, SessionConfigSupport}

private[sql] object DataSourceV2Utils extends Logging {

  
  def extractSessionConfigs(ds: DataSourceV2, conf: SQLConf): Map[String, String] = ds match {
    case cs: SessionConfigSupport =>
      val keyPrefix = cs.keyPrefix()
      require(keyPrefix != null, "The data source config key prefix can't be null.")

      val pattern = Pattern.compile(s"^spark\\.datasource\\.$keyPrefix\\.(.+)")

      conf.getAllConfs.flatMap { case (key, value) =>
        val m = pattern.matcher(key)
        if (m.matches() && m.groupCount() > 0) {
          Seq((m.group(1), value))
        } else {
          Seq.empty
        }
      }

    case _ => Map.empty
  }
}

Source File: StringUtils.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
}

Source File: Block.scala From sort-imports with MIT License

5 votes

package fix

import java.util.regex.Pattern

sealed trait Block {
  def string: String
  def matches(s: String): Boolean
}
object Block {
  final class StaticPrefix(val string: String) extends Block {
    override def matches(s: String): Boolean = s.startsWith(string)
  }
  object RegexPrefix {
    val Prefix: String = "re:"
  }
  final class RegexPrefix(val string: String) extends Block {
    private val pattern                      = Pattern.compile(string)
    override def matches(s: String): Boolean = pattern.matcher(s).lookingAt
  }
  object Default extends Block {
    val string                               = "*"
    override def matches(s: String): Boolean = true
  }
}

Source File: DefaultInstanceAliasConverter.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.rpc.instancealias.impl

import java.util.Base64
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.rpc.instancealias.InstanceAliasConverter
import org.apache.commons.lang.StringUtils
import org.springframework.stereotype.Component


@Component
class DefaultInstanceAliasConverter extends InstanceAliasConverter  {

  val pattern = Pattern.compile("[a-zA-Z\\d=\\+/]+")

  // todo use base64 for the moment
  override def instanceToAlias(instance: String): String = {
    new String(Base64.getEncoder.encode(instance.getBytes()))
  }

  override def aliasToInstance(alias: String): String = {
    new String(Base64.getDecoder.decode(alias))
  }

  override def checkAliasFormatValid(alias: String): Boolean = {
    if (StringUtils.isBlank(alias)) {
      return false
    }
    val matcher = pattern.matcher(alias)
    matcher.find()
  }
}

Source File: CSResourceParser.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.cs


import java.util
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.cs.client.service.CSResourceService
import com.webank.wedatasphere.linkis.engine.PropertiesExecuteRequest
import org.apache.commons.lang.StringUtils

import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer


class CSResourceParser {

  private val pb = Pattern.compile("cs://[^\\s\"]+[$\\s]{0,1}", Pattern.CASE_INSENSITIVE)

  private val PREFIX = "cs://"

  private def getPreFixResourceNames(code: String): Array[String] = {
    val bmlResourceNames = new ArrayBuffer[String]()
    val mb = pb.matcher(code)
    while (mb.find) bmlResourceNames.append(mb.group.trim)
    bmlResourceNames.toArray
  }

  def parse(executeRequest: PropertiesExecuteRequest, code: String, contextIDValueStr: String, nodeNameStr: String): String = {

    //TODO getBMLResource peaceWong
    val bmlResourceList = CSResourceService.getInstance().getUpstreamBMLResource(contextIDValueStr, nodeNameStr)

    val parsedResources = new util.ArrayList[util.Map[String, Object]]()
    val preFixResourceNames = getPreFixResourceNames(code)

    val preFixNames = new ArrayBuffer[String]()
    val parsedNames = new ArrayBuffer[String]()
    preFixResourceNames.foreach { preFixResourceName =>
      val resourceName = preFixResourceName.replace(PREFIX, "").trim
      val bmlResourceOption = bmlResourceList.find(_.getDownloadedFileName.equals(resourceName))
      if (bmlResourceOption.isDefined) {
        val bmlResource = bmlResourceOption.get
        val map = new util.HashMap[String, Object]()
        map.put("resourceId", bmlResource.getResourceId)
        map.put("version", bmlResource.getVersion)
        map.put("fileName", resourceName)
        parsedResources.add(map)
        preFixNames.append(preFixResourceName)
        parsedNames.append(resourceName)
      }

    }
    executeRequest.properties.put("resources", parsedResources)
    StringUtils.replaceEach(code, preFixNames.toArray, parsedNames.toArray)
  }

}

Source File: CommentInterceptor.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.entrance.interceptor.impl

import java.lang
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.entrance.interceptor.EntranceInterceptor
import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask
import com.webank.wedatasphere.linkis.protocol.task.Task
import org.slf4j.{Logger, LoggerFactory}

import scala.util.matching.Regex

"
  override def dealComment(code: String): String = {
    val p = Pattern.compile(scalaCommentPattern)
    p.matcher(code).replaceAll("$1")
  }
}


object CommentMain{
  def main(args: Array[String]): Unit = {
    val sqlCode = "select * from default.user;--你好;show tables"
    val sqlCode1 = "select * from default.user--你好;show tables"
    println(SQLCommentHelper.dealComment(sqlCode))
  }
}

Source File: CSTableParser.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.cs

import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.cs.client.service.CSTableService
import com.webank.wedatasphere.linkis.cs.common.entity.metadata.CSTable
import com.webank.wedatasphere.linkis.cs.common.utils.CSCommonUtils
import com.webank.wedatasphere.linkis.engine.exception.ExecuteError
import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext
import org.apache.commons.lang.StringUtils
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.datasources.csv.DolphinToSpark

import scala.collection.mutable.ArrayBuffer


  def getCSTable(csTempTable:String,  contextIDValueStr: String, nodeNameStr: String):CSTable = {
    CSTableService.getInstance().getUpstreamSuitableTable(contextIDValueStr, nodeNameStr, csTempTable)
  }

  def registerTempTable(csTable: CSTable):Unit = {
    val spark = SparkSession.builder().enableHiveSupport().getOrCreate()
    info(s"Start to create  tempView to sparkSession viewName(${csTable.getName}) location(${csTable.getLocation})")
    DolphinToSpark.createTempView(spark, csTable.getName, csTable.getLocation, true)
    info(s"Finished to create  tempView to sparkSession viewName(${csTable.getName}) location(${csTable.getLocation})")
  }
}

Source File: ConfManager.scala From HadoopLearning with MIT License

5 votes

package com.utils

import java.util.regex.Pattern

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.mutable.HashMap

/**
  * 描述 Spark Streaming 配置
  *
  * @author liumm
  * @since 2018-07-27 20:27
  */
object ConfManager {

  /**
    * 每次入库最大记录数量
    */
  val maxRecords = 1000

  /**
    * 配置Kafka
    *
    * @param streamConf
    * @return
    */
  def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = {
    (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics))
  }

  def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = {
    val kafkaParams = new HashMap[String, String]()
    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers)
    kafkaParams += ("metadata.broker.list" -> streamConf.brokers)
    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest")
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId)
    kafkaParams.toMap
  }

  /**
    * 生成Kafka的Consumer配置信息
    *
    * @return Kafka的Consumer配置信息
    */
  private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = {
    val kafkaParams = new HashMap[String, Object]()

    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers)
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId)
    kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
    kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])

    kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024))
    kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100))

    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest")
    //关闭kafka自动提交offset方式
    kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))

    kafkaParams.toMap
  }

  def newStreamConf() = {
    val conf = new StreamConf()
    conf.zkUrl = "hdp01:2181"
    conf.brokers = "hdp01:9092"
    conf.groupId = "liumm_group"
    conf.topics = "i57_.*"
    conf
  }

}

Source File: PlainEnum.scala From docless with MIT License

5 votes

package com.timeout.docless.schema

import java.util.regex.Pattern

import enumeratum.EnumEntry
import shapeless._
import shapeless.labelled._
import shapeless.ops.hlist

trait PlainEnum[A] {
  def ids: List[String]
}

object PlainEnum {
  sealed trait IdFormat {
    def apply(s: String): String
  }

  object IdFormat {
    case object SnakeCase extends IdFormat {
      override def apply(s: String) = snakify(s)
    }

    case object UpperSnakeCase extends IdFormat {
      override def apply(s: String) = snakify(s).toUpperCase()
    }

    case object UpperCase extends IdFormat {
      override def apply(s: String) = s.toUpperCase
    }

    case object LowerCase extends IdFormat {
      override def apply(s: String) = s.toLowerCase
    }

    case object Default extends IdFormat {
      override def apply(s: String) = s
    }

    
    private val snakifyRegexp1 = Pattern.compile("([A-Z]+)([A-Z][a-z])")
    private val snakifyRegexp2 = Pattern.compile("([a-z\\d])([A-Z])")
    private val snakifyReplacement = "$1_$2"

    private def snakify(s: String): String = {
      val first = snakifyRegexp1.matcher(s).replaceAll(snakifyReplacement)
      snakifyRegexp2.matcher(first).replaceAll(snakifyReplacement).toLowerCase
    }

    implicit val default: IdFormat = Default
  }

  def instance[A](_ids: List[String]): PlainEnum[A] = new PlainEnum[A] {
    override def ids = _ids
  }

  implicit val cnilEnum: PlainEnum[CNil] = instance(Nil)

  implicit def coprodEnum[K <: Symbol, H, T <: Coproduct, HL <: HList, N <: Nat](
    implicit
    witness: Witness.Aux[K],
    gen: Generic.Aux[H, HL],
    hLen: hlist.Length.Aux[HL, N],
    lazyEnum: Lazy[PlainEnum[T]],
    zeroLen: N =:= Nat._0,
    format: IdFormat
  ): PlainEnum[FieldType[K, H] :+: T] =
    instance(format(witness.value.name) :: lazyEnum.value.ids)

  implicit def genericPlainEnum[A, R <: Coproduct](
    implicit
    gen: LabelledGeneric.Aux[A, R],
    enum: PlainEnum[R],
    format: IdFormat,
    ev: A <:!< EnumEntry
  ): PlainEnum[A] = instance(enum.ids)

  def apply[A](implicit ev: PlainEnum[A]): PlainEnum[A] = ev
}

Source File: ConfigReader.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

}

Source File: StringUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
}

Source File: EntityFilter.scala From prometheus-akka with Apache License 2.0

5 votes

package com.workday.prometheus.akka.impl

import java.util.regex.Pattern

private[akka] case class EntityFilter(includes: List[PathFilter], excludes: List[PathFilter]) {
  def accept(name: String): Boolean =
    includes.exists(_.accept(name)) && !excludes.exists(_.accept(name))
}

private[akka] trait PathFilter {
  def accept(path: String): Boolean
}

private[akka] case class RegexPathFilter(path: String) extends PathFilter {
  private val pathRegex = path.r
  override def accept(path: String): Boolean = {
    path match {
      case pathRegex(_*) ⇒ true
      case _             ⇒ false
    }
  }
}


private[akka] case class GlobPathFilter(glob: String) extends PathFilter {
  private val GLOB_PATTERN = Pattern.compile("(\\*\\*?)|(\\?)|(\\\\.)|(/+)|([^*?]+)")
  private val pattern = getGlobPattern(glob)

  def accept(path: String): Boolean = pattern.matcher(path).matches

  private def getGlobPattern(glob: String) = {
    val patternBuilder = new StringBuilder
    val m = GLOB_PATTERN.matcher(glob)
    var lastWasSlash = false
    while (m.find) {
      lastWasSlash = false
      val grp1 = m.group(1)
      if (grp1 != null) {
        // match a * or **
        if (grp1.length == 2) {
          // it's a *workers are able to process multiple metrics*
          patternBuilder.append(".*")
        }
        else { // it's a *
          patternBuilder.append("[^/]*")
        }
      }
      else if (m.group(2) != null) {
        // match a '?' glob pattern; any non-slash character
        patternBuilder.append("[^/]")
      }
      else if (m.group(3) != null) {
        // backslash-escaped value
        patternBuilder.append(Pattern.quote(m.group.substring(1)))
      }
      else if (m.group(4) != null) {
        // match any number of / chars
        patternBuilder.append("/+")
        lastWasSlash = true
      }
      else {
        // some other string
        patternBuilder.append(Pattern.quote(m.group))
      }
    }
    if (lastWasSlash) {
      // ends in /, append **
      patternBuilder.append(".*")
    }
    Pattern.compile(patternBuilder.toString)
  }
}

Source File: Assertions.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.testtool.infrastructure

import ai.x.diff.DiffShow
import com.daml.grpc.{GrpcException, GrpcStatus}
import java.util.regex.Pattern
import io.grpc.Status

import scala.language.higherKinds
import scala.util.control.NonFatal

object Assertions extends DiffExtensions {
  def fail(message: String): Nothing =
    throw new AssertionError(message)

  def fail(message: String, cause: Throwable): Nothing =
    throw new AssertionError(message, cause)

  def assertLength[A, F[_] <: Seq[_]](context: String, length: Int, as: F[A]): F[A] = {
    assert(as.length == length, s"$context: expected $length item(s), got ${as.length}")
    as
  }

  def assertSingleton[A](context: String, as: Seq[A]): A =
    assertLength(context, 1, as).head

  def assertEquals[T: DiffShow](context: String, actual: T, expected: T): Unit = {
    val diff = DiffShow.diff(actual, expected)
    if (!diff.isIdentical)
      throw AssertionErrorWithPreformattedMessage(
        diff.string,
        s"$context: two objects are supposed to be equal but they are not",
      )
  }

  
  def assertGrpcError(t: Throwable, expectedCode: Status.Code, pattern: String): Unit = {
    assertGrpcError(
      t,
      expectedCode,
      if (pattern.isEmpty) None else Some(Pattern.compile(Pattern.quote(pattern))))
  }
}

Source File: FilteringTest.scala From scala-commons with MIT License

5 votes

package com.avsystem.commons
package mongo.core.ops

import java.util.regex.Pattern

import com.avsystem.commons.mongo.BsonRef
import com.avsystem.commons.serialization.GenCodec
import com.mongodb.client.model.Filters
import org.bson.BsonType
import org.bson.conversions.Bson
import org.scalatest.funsuite.AnyFunSuite

class FilteringTest extends AnyFunSuite {

  import Filtering._
  import FilteringTest._

  private def testCase(name: String)(filter: (Ref[String]) => Bson)(verify: (String) => Bson): Unit = {
    import BsonEquality.bsonEquality

    test(name) {
      assert(filter(sRef) === verify("s"))
    }
  }

  private def testValue(name: String)(filter: (Ref[String], String) => Bson)(verify: (String, String) => Bson): Unit = {
    val someValue = "someValue"
    testCase(name)(filter(_, someValue))(verify(_, someValue))
  }

  testValue("equal")(_ equal _)(Filters.eq)
  testValue("notEqual")(_ notEqual _)(Filters.ne)

  testValue("gt")(_ gt _)(Filters.gt)
  testValue("lt")(_ lt _)(Filters.lt)
  testValue("gte")(_ gte _)(Filters.gte)
  testValue("lte")(_ lte _)(Filters.lte)

  testValue("in")(_ in _)(Filters.in(_, _))
  testValue("nin")(_ nin _)(Filters.nin(_, _))

  testCase("exists")(_.exists())(Filters.exists)
  testCase("notExists")(_.exists(false))(Filters.exists(_, false))

  testCase("ofType")(_.ofType("someTypeName"))(Filters.`type`(_, "someTypeName"))
  testCase("ofTypeEnum")(_.ofType(BsonType.STRING))(Filters.`type`(_, BsonType.STRING))

  testCase("mod")(_.mod(313, 131))(Filters.mod(_, 313, 131))

  private val regexString = "\\d"
  private val regexScala = regexString.r
  private val regexJava = Pattern.compile(regexString)
  testCase("regexScala")(_ regex regexScala)(Filters.regex(_, regexString))
  testCase("regexJava")(_ regex regexJava)(Filters.regex(_, regexJava))
  testCase("regexString")(_ regex regexString)(Filters.regex(_, regexString))
  testCase("regexOptions")(_.regex(regexString, "ops"))(Filters.regex(_, regexString, "ops"))

  import BsonEquality.bsonEquality

  test("contains") {
    assert(aRef.contains("elem") === Filters.eq("a", "elem"))
  }

  private val simpleFilter = Filters.eq("key", "value")
  test("elemMatch") {
    assert(aRef.elemMatch(simpleFilter) === Filters.elemMatch("a", simpleFilter))
  }

  test("size") {
    assert(aRef.size(131) === Filters.size("a", 131))
  }

  test("all") {
    assert(aRef.all("e1", "e2") === Filters.all("a", "e1", "e2"))
  }

  private val otherFilter = Filters.eq("key2", "value2")
  test("and") {
    assert(and(simpleFilter, otherFilter) === Filters.and(simpleFilter, otherFilter))
  }

  test("or") {
    assert(or(simpleFilter, otherFilter) === Filters.or(simpleFilter, otherFilter))
  }

  test("nor") {
    assert(nor(simpleFilter, otherFilter) === Filters.nor(simpleFilter, otherFilter))
  }

  test("not") {
    assert(not(simpleFilter) === Filters.not(simpleFilter))
  }
}

object FilteringTest extends BsonRef.Creator[SomeEntity] {
  implicit val codec: GenCodec[SomeEntity] = GenCodec.materialize
  val sRef: Ref[String] = ref(_.s)
  val aRef: Ref[List[String]] = ref(_.a)
}

Source File: URLInString.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.jawa.core.util

import java.util.regex.Pattern

 
object URLInString {
  def extract(str: String): Set[String] = {
    val results = msetEmpty[String]
    val regex = "\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" + 
            "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" + 
            "|mil|biz|info|mobi|name|aero|jobs|museum" + 
            "|travel|[a-z]{2}))(:[\\d]{1,5})?" + 
            "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" + 
            "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
            "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" + 
            "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + 
            "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" + 
            "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b"
    val p = Pattern.compile(regex)
    val m = p.matcher(str)
    while(m.find()) {
      var urlStr = m.group()
      if (urlStr.startsWith("(") && urlStr.endsWith(")"))
      {
        urlStr = urlStr.substring(1, urlStr.length() - 1)
      }
      results.add(urlStr)
    }
    results.toSet
  }
}

Source File: PScoutTranslator.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.amandroid.core.util

import org.argus.jawa.core.util._
import java.util.regex.Pattern

import org.argus.jawa.core.elements.{JavaKnowledge, Signature}


object PScoutTranslator {
  def main(args: Array[String]): Unit = {
    val filepath = args(0)
    val fileuri = FileUtil.toUri(filepath)
    translate(fileuri)
  }
  
  def translate(uri: FileResourceUri): IMap[String, ISet[Signature]] = {
    val permissionMap: MMap[String, MSet[Signature]] = mmapEmpty
    var currentPermission: String = null
    scala.io.Source.fromFile(FileUtil.toFile(uri)).getLines().foreach {
      case permission if permission.startsWith("Permission:") =>
        currentPermission = permission.replace("Permission:", "")
      case sigstr if sigstr.startsWith("<") =>
        val sig = formatSignature(sigstr)
        permissionMap.getOrElseUpdate(currentPermission, msetEmpty) ++= sig
      case _ =>
    }
    permissionMap.map{case (k, v) => (k, v.toSet)}.toMap
  }
  //                                    1            2                     3                     4
  private val regex = "<([[^\\s]&&[^:]]+):\\s([^\\s]+)\\s([[^\\s]&&[^\\(]]+)\\(([[^\\s]&&[^\\)]]*)\\)>\\s+\\(.*\\)"
  private def formatSignature(sigstr: String): Option[Signature] = {
    val p: Pattern = Pattern.compile(regex)
    val m = p.matcher(sigstr)
    if(m.find()){
      val classTypStr = m.group(1)
      val retTypStr = m.group(2)
      val methodName = m.group(3)
      val paramTypStrList = m.group(4).split(",")
      val classTyp = JavaKnowledge.getTypeFromJawaName(classTypStr)
      val protosb = new StringBuilder
      protosb.append("(")
      paramTypStrList.foreach{
        paramTypStr =>
          if(!paramTypStr.isEmpty)
            protosb.append(JavaKnowledge.formatTypeToSignature(JavaKnowledge.getTypeFromJawaName(paramTypStr)))
      }
      protosb.append(")")
      protosb.append(JavaKnowledge.formatTypeToSignature(JavaKnowledge.getTypeFromJawaName(retTypStr)))
      Some(new Signature(classTyp, methodName, protosb.toString()))
    } else {
      System.err.println("PScoutTranslator, does not match: " + sigstr)
      None
    }
  }
}

Source File: ARSCFileParser_apktool.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.amandroid.core.parser

import org.argus.jawa.core.util.FileResourceUri
import brut.androlib.res.decoder.ARSCDecoder
import brut.androlib.res.decoder.ARSCDecoder.ARSCData
import brut.androlib.res.data.ResID
import brut.androlib.res.data.ResResSpec
import brut.androlib.res.data.ResPackage
import java.util.regex.Pattern
import org.argus.jawa.core.util._
import java.util.zip.ZipFile

class ARSCFileParser_apktool {
//  final private val TITLE = "ARSCFileParser_apktool"
  private var data: ARSCData = _
  
  def parse(apkUri: FileResourceUri): Unit = {
    val apkFile = FileUtil.toFilePath(apkUri)
    val zf = new ZipFile(apkFile)
    try{
      val ze = zf.getEntry("resources.arsc")
      if(ze != null){
        val in = zf.getInputStream(ze)
        this.data = ARSCDecoder.decode(in, false, false)
      } else {}//err_msg_normal(TITLE, "Cannot find resources.arsc file.")
    } finally {
      zf.close()
    }
  }
  
  def findResource(resourceId: Int): ResResSpec = {
    var result: ResResSpec = null
    val id = new ResID(resourceId)
    if(this.data != null){
      this.data.getPackages.foreach{ pkg =>
        if(pkg.hasResSpec(id)){
          result = pkg.getResSpec(id)
        }
      }
    }
    result
  }
  
  def getPackages: Set[ResPackage] = {
    if(this.data != null){
      data.getPackages.toSet
    } else Set()
  }
  
  def getData: ARSCData = this.data
  
  def getGlobalStringPool: Map[Int, String] = {
    val matches: MMap[Int, String] = mmapEmpty
    getPackages.foreach{ pkg =>
      val str = pkg.getResTable.toString
      val strs = str.substring(1, str.length() - 1).split(", ")
      val p = Pattern.compile("(.+)\\sstring\\/(.+)")
      var matches: Map[Int, String] = Map()
      strs foreach { str =>
        val m = p.matcher(str)
        if(m.find()){
          matches += (Integer.parseInt(m.group(1).substring(2), 16) -> m.group(2))
        }
      }
    }
    matches.toMap
  }
}

Source File: TokenParser.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.annotators.common

import java.util.regex.Pattern

trait PreprocessingParser {
  def separate(token:String): String
}


class SuffixedToken(suffixes:Array[String]) extends PreprocessingParser {

  def belongs(token: String): Option[String] =
    suffixes.find(token.endsWith)


  override def separate(token:String): String = {
    belongs(token).map { suffix  =>
      s"""${separate(token.dropRight(suffix.length))} $suffix"""
    }.getOrElse(token)
  }

}

object SuffixedToken {
  def apply(suffixes:Array[String]) = new SuffixedToken(suffixes)
}


class PrefixedToken(prefixes:Array[String]) extends PreprocessingParser {

  private def parse(token:String)  =
    (token.head.toString, token.tail)

  def belongs(token: String): Boolean =
    if(token.length > 1)
      prefixes.map(token.head.toString.equals).reduce(_ || _)
    else
      false

  override def separate(token:String): String = {
    if (belongs(token))
      s"""${token.head} ${separate(token.tail)}"""
    else
      token
  }
}

object PrefixedToken {
  def apply(prefixes:Array[String]) = new PrefixedToken(prefixes)
}


class InfixToken(tokens:Array[String]) extends PreprocessingParser {

  private def parse(token:String)  =
    (token.head.toString, token.tail)

  def belongs(token: String): Boolean = {
    if(token.length > 2) {
      val insideChunk = token.tail.dropRight(1)
      tokens.exists(insideChunk.contains)
    }else{
      false
    }
  }

  override def separate(token:String): String = {
    var result = token
    if (belongs(token)) {
      tokens.foreach{ t =>
        val quotedInfix = Pattern.quote(t)
        result = result.replaceAll(quotedInfix, s" $t ")
      }
    }
    result
  }
}

object InfixToken {
  def apply(infixes:Array[String]) = new InfixToken(infixes)
}

Source File: ModuleMatcher.scala From coursier with Apache License 2.0

5 votes

package coursier.util

import java.util.regex.Pattern

import coursier.core.{Module, ModuleName, Organization}
import dataclass.data

import scala.annotation.tailrec
import scala.util.matching.Regex

@data class ModuleMatcher(matcher: Module) {

  import ModuleMatcher.blobToPattern

  lazy val orgPattern = blobToPattern(matcher.organization.value)
  lazy val namePattern = blobToPattern(matcher.name.value)
  lazy val attributesPattern = matcher
    .attributes
    .mapValues(blobToPattern(_))
    .iterator
    .toMap

  def matches(module: Module): Boolean =
    orgPattern.pattern.matcher(module.organization.value).matches() &&
      namePattern.pattern.matcher(module.name.value).matches() &&
      module.attributes.keySet == attributesPattern.keySet &&
      attributesPattern.forall {
        case (k, p) =>
          module.attributes.get(k).exists(p.pattern.matcher(_).matches())
      }

}

object ModuleMatcher {

  def apply(org: Organization, name: ModuleName, attributes: Map[String, String] = Map.empty): ModuleMatcher =
    ModuleMatcher(Module(org, name, attributes))

  def all: ModuleMatcher =
    ModuleMatcher(Module(Organization("*"), ModuleName("*"), Map.empty))

  @tailrec
  private def blobToPattern(s: String, b: StringBuilder = new StringBuilder): Regex =
    if (s.isEmpty)
      b.result().r
    else {
      val idx = s.indexOf('*')
      if (idx < 0) {
        b ++= Pattern.quote(s)
        b.result().r
      } else {
        if (idx > 0)
          b ++= Pattern.quote(s.substring(0, idx))
        b ++= ".*"
        blobToPattern(s.substring(idx + 1), b)
      }
    }

}

Source File: CacheChecksum.scala From coursier with Apache License 2.0

5 votes

package coursier.cache

import java.math.BigInteger
import java.nio.charset.StandardCharsets
import java.util.regex.Pattern

object CacheChecksum {

  private val checksumLength = Set(
    32, // md5
    40, // sha-1
    64, // sha-256
    128 // sha-512
  )

  private def ifHexString(s: String) =
    s.forall(c => c.isDigit || c >= 'a' && c <= 'z')

  private def findChecksum(elems: Seq[String]): Option[BigInteger] =
    elems.collectFirst {
      case rawSum if ifHexString(rawSum) && checksumLength.contains(rawSum.length) =>
        new BigInteger(rawSum, 16)
    }

  private def parseChecksumLine(lines: Seq[String]): Option[BigInteger] =
    findChecksum(lines.map(_.toLowerCase.replaceAll("\\s", "")))

  private def parseChecksumAlternative(lines: Seq[String]): Option[BigInteger] =
    findChecksum(lines.flatMap(_.toLowerCase.split("\\s+"))).orElse {
      findChecksum(
        lines.map { line =>
          line
            .toLowerCase
            .split("\\s+")
            .filter(ifHexString)
            .mkString
        }
      )
    }

  def parseChecksum(content: String): Option[BigInteger] = {
    val lines = Predef.augmentString(content)
      .lines
      .toVector

    parseChecksumLine(lines).orElse(parseChecksumAlternative(lines))
  }

  def parseRawChecksum(content: Array[Byte]): Option[BigInteger] =
    if (content.length == 16 || content.length == 20)
      Some(new BigInteger(content))
    else {
      val s = new String(content, StandardCharsets.UTF_8)
      val lines = Predef.augmentString(s)
        .lines
        .toVector

      parseChecksumLine(lines) orElse parseChecksumAlternative(lines)
    }

}

Source File: MergeRule.scala From coursier with Apache License 2.0

5 votes

package coursier.launcher

import java.util.jar.JarFile
import java.util.regex.Pattern
import dataclass.data

sealed abstract class MergeRule extends Product with Serializable

object MergeRule {
  sealed abstract class PathRule extends MergeRule {
    def path: String
  }

  @data class Exclude(path: String) extends PathRule
  @data class ExcludePattern(path: Pattern) extends MergeRule

  object ExcludePattern {
    def apply(s: String): ExcludePattern =
      ExcludePattern(Pattern.compile(s))
  }

  // TODO Accept a separator: Array[Byte] argument in these
  // (to separate content with a line return in particular)
  @data class Append(path: String) extends PathRule
  @data class AppendPattern(path: Pattern) extends MergeRule

  object AppendPattern {
    def apply(s: String): AppendPattern =
      AppendPattern(Pattern.compile(s))
  }

  val default = Seq(
    MergeRule.Append("reference.conf"),
    MergeRule.AppendPattern("META-INF/services/.*"),
    MergeRule.Exclude("log4j.properties"),
    MergeRule.Exclude(JarFile.MANIFEST_NAME),
    MergeRule.ExcludePattern("META-INF/.*\\.[sS][fF]"),
    MergeRule.ExcludePattern("META-INF/.*\\.[dD][sS][aA]"),
    MergeRule.ExcludePattern("META-INF/.*\\.[rR][sS][aA]")
  )
}

Source File: package.scala From pureconfig with Mozilla Public License 2.0

5 votes

package pureconfig

import java.util.regex.Pattern

import scala.util.matching.Regex

import org.scalactic.Equality
import org.scalactic.TypeCheckedTripleEquals._

package object equality {

  implicit final val PatternEquality = new Equality[Pattern] {
    def areEqual(a: Pattern, b: Any): Boolean = b match {
      case bp: Pattern => a.pattern === bp.pattern
      case _ => false
    }
  }

  implicit final val RegexEquality = new Equality[Regex] {
    override def areEqual(a: Regex, b: Any): Boolean = b match {
      case r: Regex => PatternEquality.areEqual(a.pattern, r.pattern)
      case _ => false
    }
  }

}

Source File: KanjiCharacter.scala From scalastringcourseday7 with Apache License 2.0

5 votes

package text.kanji

import java.io.File
import java.util.regex.Pattern

import util.Config

import scala.collection.mutable.ListBuffer
import scala.io.Source


trait KanjiCharacter {
  val kanji: Seq[String]

  lazy val regex: String = {
    val size: Int = kanji.size
    if (size <= 0) {
      ""
    } else {
      val builder: StringBuilder = new StringBuilder(size)
      kanji foreach builder.append
      builder.result.mkString("[", "", "]")
    }
  }

  lazy val pattern: Pattern = Pattern.compile(regex)

  def isDefined(codePoint: Int): Boolean = {
    if (Character.isValidCodePoint(codePoint)) {
      pattern.matcher(new String(Array(codePoint), 0, 1)).matches
    } else {
      false
    }
  }

  def notDefined(codePoint: Int): Boolean = {
    !isDefined(codePoint)
  }

  def isDefined(char: Char): Boolean = {
    pattern.matcher(char.toString).matches
  }

  def notDefined(char: Char): Boolean = {
    !isDefined(char)
  }

  protected def readKanjiCSV(fileName: String): Seq[String] = {
    val buffer = ListBuffer.empty[String]
    val file: File = Config.resourceFile("kanji", fileName.concat(".csv")).toFile
    if (file.canRead && file.isFile) {
      Source.fromFile(file).getLines foreach {
        line =>
          val elements: Array[String] = line.split(",")
          if (elements.nonEmpty && elements.length == 2) {
            val kanji: String = elements(1)
            buffer += kanji
          }
      }
    }
    buffer.result
  }
}

Source File: PatternChecker.scala From incubator-daffodil with Apache License 2.0

5 votes

package org.apache.daffodil.grammar.primitives

import org.apache.daffodil.exceptions.SavesErrorsAndWarnings
import java.util.regex.Pattern
import org.apache.daffodil.util.Misc
import java.util.regex.PatternSyntaxException
import org.apache.daffodil.api.WarnID


object PatternChecker {
  def checkPattern(pattern: String,
    context: SavesErrorsAndWarnings): Unit = {
    try {
      val pat = Pattern.compile(pattern)
      val m1 = pat.matcher("")
      val m2 = pat.matcher("\uFFFE") // obscure enough?
      if (m1.matches() && m2.lookingAt() && m2.group().length() == 0) {
        // the pattern will match with zero length, anything or nothing
        // This is a flawed pattern for an assert and dubious
        // generally. The pattern should have to match something.
        val needCDATA =
          if (pattern.startsWith("(?x)") &&
            !pattern.contains("\n") &&
            pattern.contains("#")) {
            // it's free form regex notation
            // it's all on one line,
            // and it contains a comment (# to end of line)
            // Almost guaranteed you are missing a CDATA wrapper.
            "\nMissing <![CDATA[...]]> around the regular expression." +
              "\nThis is required for free-form regular expression syntax with comments."
          } else ""
        context.SDW(WarnID.RegexPatternZeroLength, "Regular expression pattern '%s'.\n" +
          "This pattern will match with zero length, so it can always match.%s", pattern, needCDATA)
      }
    } catch {
      case e: PatternSyntaxException => {
        context.SDE("Invalid regular expression pattern '%s'.\nReason: %s.", pattern, Misc.getSomeMessage(e).get)
      }
    }
  }
}

Source File: ShouldNotTypecheck.scala From scala-parallel-collections with Apache License 2.0

5 votes

package testutil

import scala.language.experimental.macros
import scala.reflect.macros.blackbox.Context
import scala.reflect.macros.TypecheckException
import java.util.regex.Pattern


object ShouldNotTypecheck {
  def apply(code: String): Unit = macro applyImplNoExp
  def apply(code: String, expected: String): Unit = macro applyImpl

  def applyImplNoExp(ctx: Context)(code: ctx.Expr[String]) = applyImpl(ctx)(code, null)

  def applyImpl(ctx: Context)(code: ctx.Expr[String], expected: ctx.Expr[String]): ctx.Expr[Unit] = {
    import ctx.universe._

    val Expr(Literal(Constant(codeStr: String))) = code
    val (expPat, expMsg) = expected match {
      case null => (null, "Expected some error.")
      case Expr(Literal(Constant(s: String))) =>
        (Pattern.compile(s, Pattern.CASE_INSENSITIVE | Pattern.DOTALL), "Expected error matching: "+s)
    }

    try ctx.typecheck(ctx.parse("{ "+codeStr+" }")) catch { case e: TypecheckException =>
      val msg = e.getMessage
      if((expected ne null) && !(expPat.matcher(msg)).matches)
        ctx.abort(ctx.enclosingPosition, "Type-checking failed in an unexpected way.\n"+
          expMsg+"\nActual error: "+msg)
      else return reify(())
    }

    ctx.abort(ctx.enclosingPosition, "Type-checking succeeded unexpectedly.\n"+expMsg)
  }
}

Source File: ClassUtils.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.it.code

import java.io.FileInputStream
import java.lang.reflect.Modifier
import java.util.jar.JarInputStream
import java.util.regex.Pattern

import org.junit.Test

import scala.jdk.CollectionConverters._

private[code] object ClassUtils {
  
  def classesInProductionScope(): Seq[Class[_]] = allClasses(n => !n.contains("tests.jar"))

  def allClasses(fileNameFilter: String => Boolean): Seq[Class[_]] = {
    val classLoader = ClassLoader.getSystemClassLoader
    val path        = "oharastream/ohara"
    val pattern     = Pattern.compile("^file:(.+\\.jar)!/" + path + "$")
    val urls        = classLoader.getResources(path)
    urls.asScala
      .map(url => pattern.matcher(url.getFile))
      .filter(_.find())
      .map(_.group(1))
      .filter(fileNameFilter)
      .flatMap { f =>
        val jarInput = new JarInputStream(new FileInputStream(f))
        try Iterator
          .continually(jarInput.getNextJarEntry)
          .takeWhile(_ != null)
          .map(_.getName)
          .toArray
          .filter(_.endsWith(".class"))
          .map(_.replace('/', '.'))
          .map(className => className.substring(0, className.length - ".class".length))
          .map(Class.forName)
        finally jarInput.close()
      }
      .toSeq
  }
}

Source File: BaseFiltering.scala From scala-commons with MIT License

5 votes

package com.avsystem.commons
package mongo.core.ops

import java.util.regex.Pattern

import com.avsystem.commons.mongo.text.TextSearchLanguage
import com.google.common.collect.ImmutableList
import com.mongodb.client.model.geojson.{Geometry, Point}
import com.mongodb.client.model.{Filters, TextSearchOptions}
import org.bson.BsonType
import org.bson.conversions.Bson

import scala.util.matching.Regex

trait BaseFiltering[T] extends Any with KeyValueHandling[T] {
  def equal(t: T): Bson = use(t)(Filters.eq)
  def notEqual(t: T): Bson = use(t)(Filters.ne)

  def gt(t: T): Bson = use(t)(Filters.gt)
  def lt(t: T): Bson = use(t)(Filters.lt)
  def gte(t: T): Bson = use(t)(Filters.gte)
  def lte(t: T): Bson = use(t)(Filters.lte)

  def in(ts: T*): Bson = Filters.in(key, ts.map(encode).asJava)
  def nin(ts: T*): Bson = Filters.nin(key, ts.map(encode).asJava)

  def exists(exists: Boolean = true): Bson = Filters.exists(key, exists)

  def ofType(bsonType: BsonType): Bson = Filters.`type`(key, bsonType)
  def ofType(typeName: String): Bson = Filters.`type`(key, typeName)

  def mod(divisor: Long, remainder: Long): Bson = Filters.mod(key, divisor, remainder)

  def regex(re: Regex): Bson = regex(re.pattern)
  def regex(pattern: Pattern): Bson = Filters.regex(key, pattern)
  def regex(patternStr: String): Bson = Filters.regex(key, patternStr)
  def regex(patternStr: String, options: String): Bson = Filters.regex(key, patternStr, options)

  def text(str: String, caseSensitive: OptArg[Boolean] = OptArg.Empty,
    language: OptArg[TextSearchLanguage] = OptArg.Empty, diacriticSensitive: OptArg[Boolean] = OptArg.Empty): Bson = {
    val searchOptions = new TextSearchOptions().setup { options =>
      caseSensitive.foreach(b => options.caseSensitive(b))
      language.foreach(l => options.language(l.code))
      diacriticSensitive.foreach(b => options.diacriticSensitive(b))
    }
    Filters.text(str, searchOptions)
  }

  def bitsAllClear(bitMask: Long): Bson = Filters.bitsAllClear(key, bitMask)
  def bitsAllSet(bitMask: Long): Bson = Filters.bitsAllSet(key, bitMask)
  def bitsAnyClear(bitMask: Long): Bson = Filters.bitsAnyClear(key, bitMask)
  def bitsAnySet(bitMask: Long): Bson = Filters.bitsAnySet(key, bitMask)

  def geoWithinBson(geometryBson: Bson): Bson = Filters.geoWithin(key, geometryBson)
  def geoWithin(geometry: Geometry): Bson = Filters.geoWithin(key, geometry)
  def geoWithinBox(lowerLeftX: Double, lowerLeftY: Double, upperRightX: Double, upperRightY: Double): Bson = {
    Filters.geoWithinBox(key, lowerLeftX, lowerLeftY, upperRightX, upperRightY)
  }
  def geoWithinPolygon(points: (Double, Double)*): Bson = {
    val javaPoints = points.map {
      case (x, y) => ImmutableList.of(x: JDouble, y: JDouble): JList[JDouble]
    }.asJava
    Filters.geoWithinPolygon(key, javaPoints)
  }
  def geoWithinCenter(x: Double, y: Double, radius: Double): Bson = Filters.geoWithinCenter(key, x, y, radius)
  def geoWithinCenterSphere(x: Double, y: Double, radius: Double): Bson = Filters.geoWithinCenterSphere(key, x, y, radius)

  def geoIntersectsBson(geometryBson: Bson): Bson = Filters.geoIntersects(key, geometryBson)
  def geoIntersects(geometry: Geometry): Bson = Filters.geoIntersects(key, geometry)

  private def jDouble(doubleOpt: Opt[Double]): JDouble = doubleOpt.map(d => d: JDouble).orNull
  private def useMinMax(min: Opt[Double], max: Opt[Double])(f: (JDouble, JDouble) => Bson): Bson = {
    f(jDouble(min), jDouble(max))
  }

  def nearBson(geometryBson: Bson, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.near(key, geometryBson, _, _))
  }
  def nearPoint(point: Point, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.near(key, point, _, _))
  }
  def nearXY(x: Double, y: Double, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.near(key, x, y, _, _))
  }

  def nearSphereBson(geometryBson: Bson, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, geometryBson, _, _))
  }
  def nearSpherePoint(point: Point, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, point, _, _))
  }
  def nearSphereXY(x: Double, y: Double, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, x, y, _, _))
  }
}

Source File: FieldStyle.scala From tethys with Apache License 2.0

5 votes

package tethys.derivation.builder

import java.util.regex.Pattern

import scala.annotation.StaticAnnotation

trait FieldStyle  { self =>
  def applyStyle(field: String): String

  def andThen(that: FieldStyle): FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = that.applyStyle(self.applyStyle(field))
  }

  def andThen(that: String => String): FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = that.apply(self.applyStyle(field))
  }

  def >>(that: FieldStyle): FieldStyle = andThen(that)
  def >>(that: String => String): FieldStyle = andThen(that)
}


object FieldStyle {

  def apply(fun: String => String): FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = fun(field)
  }

  class Ref(fieldStyle: FieldStyle) extends StaticAnnotation
  trait StyleReference extends FieldStyle {
    final override def applyStyle(field: String): String = throw new RuntimeException("StyleReference should not be used at runtime")
  }

  // Names transformations adopted from scala enumeratum
  private val regexp1: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val regexp2: Pattern = Pattern.compile("([a-z\\d])([A-Z])")
  private val replacement: String = "$1_$2"

  private def splitName(name: String): List[String] = {
    val first = regexp1.matcher(name).replaceAll(replacement)
    regexp2.matcher(first).replaceAll(replacement).split("_").toList
  }

  val snakecase: FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = splitName(field).mkString("_")
  }

  val lowercase: FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = field.toLowerCase()
  }

  val uppercase: FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = field.toUpperCase()
  }

  val lowerSnakecase: FieldStyle = snakecase >> lowercase
  val upperSnakecase: FieldStyle = snakecase >> uppercase
}

Source File: ScalastyleSettings.scala From intellij-lsp with Apache License 2.0

5 votes

package org.jetbrains.plugins.scala.editor.importOptimizer

import java.util.regex.Pattern

import org.scalastyle.ConfigurationChecker

import scala.util.Try


  def compareNames(name1: String, name2: String, isImport: Boolean): Int = {
    if (name1 != "_") {
      if (name2 == "_") {
        -1 * compareNames(name2, name1, isImport)
      } else {
        val isName1UpperCase = Character.isUpperCase(name1.codePointAt(0))
        val isName2UpperCase = Character.isUpperCase(name2.codePointAt(0))

        if (isName1UpperCase == isName2UpperCase) {
          name1.compareToIgnoreCase(name2)
        } else {
          if (isName1UpperCase && !isImport) 1 else -1
        }
      }
    } else {
      if (isImport) -1 else 1
    }
  }

  def groups(checker: ConfigurationChecker): Option[Seq[Pattern]] = {
    Try {
      checker.parameters("groups").split(",").toSeq.map { name =>
        Pattern.compile(checker.parameters(s"group.$name"))
      }
    }.toOption
  }

  val nameOrdering: Ordering[String] = Ordering.fromLessThan(compareNames(_, _, isImport = false) < 0)
}

case class ScalastyleSettings(scalastyleOrder: Boolean, groups: Option[Seq[Pattern]])

Source File: RegexBenchmark.scala From chronicler with Apache License 2.0

5 votes

package com.github.fsanaulla.chronicler.benchmark

import java.util.concurrent.TimeUnit
import java.util.regex.Pattern

import com.github.fsanaulla.chronicler.benchmark.RegexBenchmark.CompiledPattern
import com.github.fsanaulla.chronicler.core.regex
import org.openjdk.jmh.annotations._

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MILLISECONDS)
class RegexBenchmark {

  // 10x time faster
  @Benchmark
  def compiledPattern(state: CompiledPattern): Unit =
    state.pattern.matcher("My=, Name").replaceAll("\\\\$1")

  @Benchmark
  def uncompiledPattern(): Unit =
    "My=, Name".replaceAll("([ ,=])", "\\\\$1")
}

object RegexBenchmark {
  @State(Scope.Benchmark)
  class CompiledPattern {
    var pattern: Pattern = _
    @Setup
    def up(): Unit = pattern = regex.tagPattern
    @TearDown
    def close(): Unit = {}
  }
}

Source File: CarbonGlobalDictionaryRDD.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.rdd

import java.util.regex.Pattern

import org.apache.spark.sql.Row

case class DataFormat(delimiters: Array[String],
    var delimiterIndex: Int,
    patterns: Array[Pattern]) extends Serializable {
  self =>
  def getSplits(input: String): Array[String] = {
    // -1 in case after splitting the last column is empty, the surrogate key ahs to be generated
    // for empty value too
    patterns(delimiterIndex).split(input, -1)
  }

  def cloneAndIncreaseIndex: DataFormat = {
    DataFormat(delimiters, Math.min(delimiterIndex + 1, delimiters.length - 1), patterns)
  }
}

class StringArrayRow(var values: Array[String]) extends Row {

  override def length: Int = values.length

  override def get(i: Int): Any = values(i)

  override def getString(i: Int): String = values(i)

  private def reset(): Unit = {
    val len = values.length
    var i = 0
    while (i < len) {
      values(i) = null
      i = i + 1
    }
  }

  override def copy(): Row = {
    val tmpValues = new Array[String](values.length)
    System.arraycopy(values, 0, tmpValues, 0, values.length)
    new StringArrayRow(tmpValues)
  }

  def setValues(values: Array[String]): StringArrayRow = {
    reset()
    if (values != null) {
      val minLength = Math.min(this.values.length, values.length)
      System.arraycopy(values, 0, this.values, 0, minLength)
    }
    this
  }
}

Source File: CodeDumperTests.scala From codepropertygraph with Apache License 2.0

5 votes

package io.shiftleft.semanticcpg.codedumper

import java.util.regex.Pattern

import org.scalatest.{Matchers, WordSpec}
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.testfixtures.CodeToCpgFixture

class CodeDumperTests extends WordSpec with Matchers {

  val code = """
                | // A comment
                |int my_func(int param1)
                |{
                |   int x = foo(param1);
                |}""".stripMargin

  CodeToCpgFixture(code) { cpg =>
    "should return empty string for empty traversal" in {
      CodeDumper
        .dump(cpg.method.name("notinthere"), false)
        .mkString("\n") shouldBe ""
    }

    "should be able to dump complete function" in {
      val query = cpg.method.name("my_func")
      val code = CodeDumper.dump(query, false).mkString("\n")
      code should startWith("int my_func")
      code should include("foo(param1)")
      code should endWith("}")
    }

    "should dump method with arrow for expression (a call)" in {
      val query = cpg.call.name("foo")
      val code = CodeDumper.dump(query, false).mkString("\n")
      code should startWith("int")
      code should include regex (".*" + "int x = foo" + ".*" + Pattern.quote(CodeDumper.arrow.toString) + ".*")
      code should endWith("}")
    }

    "methodCode should return nothing on invalid filename" in {
      CodeDumper.code("fooNonexisting", 1, 2) shouldBe ""
    }

    "should allow dumping via .dump" in {
      val code = cpg.method.name("my_func").dumpRaw.mkString("\n")
      code should startWith("int my_func")
    }

    "should allow dumping callIn" in {
      implicit val resolver: ICallResolver = NoResolve
      val code = cpg.method.name("foo").callIn.dumpRaw.mkString("\n")
      code should startWith("int")
    }

  }

}

Source File: FilterJsonLigatures.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.apps

import java.io.File
import java.io.PrintWriter
import java.util.regex.Pattern

import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.Sinker
import org.clulab.wm.eidos.utils.TsvWriter
import org.json4s.DefaultFormats
import org.json4s.JString
import org.json4s.JValue
import org.json4s.jackson.JsonMethods

object FilterJsonLigatures extends App {
  val pattern: Pattern = Pattern.compile("([A-Za-z]+(f([bhkl]|[ft]|[ij])|ij)) ([A-Za-z]+)")

  class Filter(tsvWriter: TsvWriter) {
    implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats

    tsvWriter.println("file", "left", "right")

    def filter(jValue: JValue, inputFile: File): Unit = {
      val extractions: JValue = jValue \ "_source" \ "extracted_text"

      extractions match {
        case text: JString =>
          val matcher = pattern.matcher(text.extract[String])

          while (matcher.find)
            tsvWriter.println(inputFile.getName, matcher.group(1), matcher.group(4))
        case _ => throw new RuntimeException(s"Unexpected extractions value: $extractions")
      }
    }
  }

  val inputDir = args(0)
  val extension = args(1)
  val outputFile = args(2)

  new TsvWriter(Sinker.printWriterFromFile(outputFile)).autoClose { tsvWriter =>
    val filter = new Filter(tsvWriter)
    val inputFiles = FileUtils.findFiles(inputDir, extension)

    inputFiles.sortBy(_.getName).foreach { inputFile =>
      val text = FileUtils.getTextFromFile(inputFile)
      val json = JsonMethods.parse(text)

      filter.filter(json, inputFile)
    }
  }
}

Source File: TelegramBot4sRelease.scala From telegram with Apache License 2.0

5 votes

import java.util.regex.Pattern

import sbt._
import sbtrelease.ReleasePlugin.autoImport.ReleaseKeys._
import sbtrelease.ReleasePlugin.autoImport.{ReleaseStep, _}
import sbtrelease.ReleaseStateTransformations._

object TelegramBot4sRelease {

  def steps: Seq[ReleaseStep] = Seq(
    checkSnapshotDependencies,
    inquireVersions,
    // publishing locally so that the pgp password prompt is displayed early
    // in the process
    releaseStepCommand("publishLocalSigned"),
    runClean,
    runTest,
    setReleaseVersion,
    updateVersionInReadme,
    commitReleaseVersion,
    tagRelease,
    publishArtifacts,
    setNextVersion,
    commitNextVersion,
    releaseStepCommand("sonatypeReleaseAll"),
    pushChanges
  )

  private def updateVersionInReadme: ReleaseStep = { s: State =>
    val readmeFile = file("README.md")
    val readme = IO.read(readmeFile)

    val currentVersionPattern = """"info.mukel" %% "telegrambot4s" % "([\w\.-]+)"""".r
    val currentVersionInReadme = currentVersionPattern.findFirstMatchIn(readme).get.group(1)

    val releaseVersion = s.get(versions).get._1

    s.log.info(s"Replacing $currentVersionInReadme with $releaseVersion in ${readmeFile.name}")

    val newReadme = readme.replaceAll(Pattern.quote(currentVersionInReadme), releaseVersion)
    IO.write(readmeFile, newReadme)

    val settings = Project.extract(s)
    settings.get(releaseVcs).get.add(readmeFile.getAbsolutePath) !! s.log

    s
  }
}

Source File: ConfigReader.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

}

Source File: StringUtils.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
}

java.util.regex.Pattern Scala Examples