java.util.regex.Pattern Scala Examples

The following examples show how to use java.util.regex.Pattern. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: Configuration.scala    From circe-generic-extras   with Apache License 2.0 5 votes vote down vote up
package io.circe.generic.extras

import java.util.regex.Pattern


final case class Configuration(
  transformMemberNames: String => String,
  transformConstructorNames: String => String,
  useDefaults: Boolean,
  discriminator: Option[String],
  strictDecoding: Boolean = false
) {
  def withSnakeCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.snakeCaseTransformation
  )

  def withScreamingSnakeCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.screamingSnakeCaseTransformation
  )

  def withKebabCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.kebabCaseTransformation
  )

  def withSnakeCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.snakeCaseTransformation
  )

  def withScreamingSnakeCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.screamingSnakeCaseTransformation
  )

  def withKebabCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.kebabCaseTransformation
  )

  def withDefaults: Configuration = copy(useDefaults = true)
  def withDiscriminator(discriminator: String): Configuration = copy(discriminator = Some(discriminator))

  def withStrictDecoding: Configuration = copy(strictDecoding = true)
}

object Configuration {

  val default: Configuration = Configuration(Predef.identity, Predef.identity, false, None)
  private val basePattern: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val swapPattern: Pattern = Pattern.compile("([a-z\\d])([A-Z])")

  val snakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toLowerCase
  }

  val screamingSnakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toUpperCase
  }

  val kebabCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1-$2")
    swapPattern.matcher(partial).replaceAll("$1-$2").toLowerCase
  }
}

object defaults {
  implicit val defaultGenericConfiguration: Configuration = Configuration.default
} 
Example 2
Source File: Tag.scala    From cosmos   with Apache License 2.0 5 votes vote down vote up
package com.mesosphere.universe.v3.model

import com.mesosphere.cosmos.circe.Decoders._
import com.twitter.util.Return
import com.twitter.util.Throw
import com.twitter.util.Try
import io.circe.syntax.EncoderOps
import io.circe.Decoder
import io.circe.DecodingFailure
import io.circe.Encoder
import io.circe.HCursor
import java.util.regex.Pattern

final class Tag private(val value: String) extends AnyVal {

  override def toString: String = value

}

object Tag {

  val packageDetailsTagRegex: String = "^[^\\s]+$"
  val packageDetailsTagPattern: Pattern = Pattern.compile(packageDetailsTagRegex)

  def apply(s: String): Tag = validate(s).get

  def validate(s: String): Try[Tag] = {
    if (packageDetailsTagPattern.matcher(s).matches()) {
      Return(new Tag(s))
    } else {
      Throw(new IllegalArgumentException(
        s"Value '$s' does not conform to expected format $packageDetailsTagRegex"
      ))
    }
  }

  implicit val encodePackageDefinitionTag: Encoder[Tag] = {
    Encoder.instance(_.value.asJson)
  }

  implicit val decodePackageDefinitionTag: Decoder[Tag] =
    Decoder.instance[Tag] { (c: HCursor) =>
      c.as[String].map(validate(_)).flatMap {
        case Return(r) => Right(r)
        case Throw(ex) =>
          val msg = ex.getMessage.replaceAllLiterally("assertion failed: ", "")
          Left(DecodingFailure(msg, c.history))
      }
    }

} 
Example 3
Source File: StringUtils.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.Pattern

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)
} 
Example 4
Source File: ZioAsyncHandlerTest.scala    From pulsar4s   with Apache License 2.0 5 votes vote down vote up
package com.sksamuel.pulsar4s.zio

import java.util.UUID

import com.sksamuel.pulsar4s.{ConsumerConfig, ProducerConfig, PulsarClient, Subscription, Topic}
import org.apache.pulsar.client.api.Schema
import org.scalatest.BeforeAndAfterAll
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import java.util.regex.Pattern

class ZioAsyncHandlerTest extends AnyFunSuite with Matchers with BeforeAndAfterAll {

  import ZioAsyncHandler._

  implicit val schema: Schema[String] = Schema.STRING

  private val client = PulsarClient("pulsar://localhost:6650")
  private val topic = Topic("persistent://sample/standalone/ns1/zio_" + UUID.randomUUID())

  override def afterAll(): Unit = {
    client.close()
  }

  test("async producer should use zio") {
    val producer = client.producer(ProducerConfig(topic))
    val t = producer.sendAsync("wibble")
    val r = zio.Runtime.default.unsafeRun(t.either)
    r.right.get should not be null
    producer.close()
  }

  test("async consumer should use zio") {
    val consumer = client.consumer(ConsumerConfig(topics = Seq(topic), subscriptionName = Subscription("mysub_" + UUID.randomUUID())))
    consumer.seekEarliest()
    val t = consumer.receiveAsync
    val r = zio.Runtime.default.unsafeRun(t.either)
    r shouldBe Symbol("right")
    new String(r.right.get.data) shouldBe "wibble"
    consumer.close()
  }

  test("async consumer getMessageById should use zio") {
    val consumer = client.consumer(ConsumerConfig(topics = Seq(topic), subscriptionName = Subscription("mysub_" + UUID.randomUUID())))
    consumer.seekEarliest()
    val receive = consumer.receiveAsync
    val value = zio.Runtime.default.unsafeRun(receive.either)
    val t = consumer.getLastMessageIdAsync
    val r = zio.Runtime.default.unsafeRun(t.either)
    val zipped = r.right.get.toString.split(":") zip value.right.get.messageId.toString.split(":")
    zipped.foreach(t => t._1 shouldBe t._2)
    consumer.close()
  }
} 
Example 5
Source File: StringUtil.scala    From ingraph   with Eclipse Public License 1.0 5 votes vote down vote up
package ingraph.compiler.cypher2gplan.util

import java.io.UnsupportedEncodingException
import java.util.regex.{Matcher, Pattern}
import javax.xml.bind.DatatypeConverter

import org.apache.spark.sql.catalyst.{expressions => cExpr}
import org.slizaa.neo4j.opencypher.{openCypher => oc}

object StringUtil {
  private val patternStringDelimiterCheck = Pattern.compile("^'.*'$|^\".*\"$")
  private val patternStringDelimiterReplace = Pattern.compile("^[\"']|[\"']$")
  // note: literal \ should be escaped twice: first for the regular expression syntax
  // and then for the Java String in the source code, so \\\\ below matches the literal backslash
  private val patterBackslashNotation = Pattern.compile(
  "(?<!\\\\)\\\\(\\\\|'|\"|b|f|n|r|t|_|%|u([0-9a-fA-F]{4})|U([0-9a-fA-F]{8}))"
  )

  
  def toOptionInt(s: String): Option[Int] = {
    Option(s).filterNot( _.isEmpty ).map( _.toInt )
  }
} 
Example 6
Source File: ShiftingConsumerImpl.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package com.banno.kafka.consumer

import cats.effect.{Async, ContextShift}
import java.util.regex.Pattern

import scala.concurrent.duration._
import org.apache.kafka.common._
import org.apache.kafka.clients.consumer._

import scala.concurrent.ExecutionContext

case class ShiftingConsumerImpl[F[_]: Async, K, V](
    c: ConsumerApi[F, K, V],
    blockingContext: ExecutionContext
)(implicit CS: ContextShift[F])
    extends ConsumerApi[F, K, V] {
  def assign(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.assign(partitions))
  def assignment: F[Set[TopicPartition]] = CS.evalOn(blockingContext)(c.assignment)
  def beginningOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.beginningOffsets(partitions))
  def beginningOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.beginningOffsets(partitions, timeout))
  def close: F[Unit] = CS.evalOn(blockingContext)(c.close)
  def close(timeout: FiniteDuration): F[Unit] = CS.evalOn(blockingContext)(c.close(timeout))
  def commitAsync: F[Unit] = CS.evalOn(blockingContext)(c.commitAsync)
  def commitAsync(
      offsets: Map[TopicPartition, OffsetAndMetadata],
      callback: OffsetCommitCallback
  ): F[Unit] =
    CS.evalOn(blockingContext)(c.commitAsync(offsets, callback))
  def commitAsync(callback: OffsetCommitCallback): F[Unit] =
    CS.evalOn(blockingContext)(c.commitAsync(callback))
  def commitSync: F[Unit] = CS.evalOn(blockingContext)(c.commitSync)
  def commitSync(offsets: Map[TopicPartition, OffsetAndMetadata]): F[Unit] =
    CS.evalOn(blockingContext)(c.commitSync(offsets))
  def committed(partition: Set[TopicPartition]): F[Map[TopicPartition, OffsetAndMetadata]] =
    CS.evalOn(blockingContext)(c.committed(partition))
  def endOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.endOffsets(partitions))
  def endOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] =
    CS.evalOn(blockingContext)(c.endOffsets(partitions, timeout))
  def listTopics: F[Map[String, Seq[PartitionInfo]]] = CS.evalOn(blockingContext)(c.listTopics)
  def listTopics(timeout: FiniteDuration): F[Map[String, Seq[PartitionInfo]]] =
    CS.evalOn(blockingContext)(c.listTopics(timeout))
  def metrics: F[Map[MetricName, Metric]] = CS.evalOn(blockingContext)(c.metrics)
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long]
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    CS.evalOn(blockingContext)(c.offsetsForTimes(timestampsToSearch))
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    CS.evalOn(blockingContext)(c.offsetsForTimes(timestampsToSearch, timeout))
  def partitionsFor(topic: String): F[Seq[PartitionInfo]] =
    CS.evalOn(blockingContext)(c.partitionsFor(topic))
  def partitionsFor(topic: String, timeout: FiniteDuration): F[Seq[PartitionInfo]] =
    CS.evalOn(blockingContext)(c.partitionsFor(topic, timeout))
  def pause(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.pause(partitions))
  def paused: F[Set[TopicPartition]] = CS.evalOn(blockingContext)(c.paused)
  def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] =
    CS.evalOn(blockingContext)(c.poll(timeout))
  def position(partition: TopicPartition): F[Long] =
    CS.evalOn(blockingContext)(c.position(partition))
  def resume(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.resume(partitions))
  def seek(partition: TopicPartition, offset: Long): F[Unit] =
    CS.evalOn(blockingContext)(c.seek(partition, offset))
  def seekToBeginning(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.seekToBeginning(partitions))
  def seekToEnd(partitions: Iterable[TopicPartition]): F[Unit] =
    CS.evalOn(blockingContext)(c.seekToEnd(partitions))
  def subscribe(topics: Iterable[String]): F[Unit] = CS.evalOn(blockingContext)(c.subscribe(topics))
  def subscribe(topics: Iterable[String], callback: ConsumerRebalanceListener): F[Unit] =
    CS.evalOn(blockingContext)(c.subscribe(topics, callback))
  def subscribe(pattern: Pattern): F[Unit] = CS.evalOn(blockingContext)(c.subscribe(pattern))
  def subscribe(pattern: Pattern, callback: ConsumerRebalanceListener): F[Unit] =
    CS.evalOn(blockingContext)(c.subscribe(pattern, callback))
  def subscription: F[Set[String]] = CS.evalOn(blockingContext)(c.subscription)
  def unsubscribe: F[Unit] = CS.evalOn(blockingContext)(c.unsubscribe)
  def wakeup: F[Unit] = c.wakeup //TODO wakeup is the one method that is thread-safe, right?
}

object ShiftingConsumerImpl {
  //returns the type expected when creating a Resource
  def create[F[_]: Async: ContextShift, K, V](
      c: ConsumerApi[F, K, V],
      e: ExecutionContext
  ): ConsumerApi[F, K, V] =
    ShiftingConsumerImpl(c, e)
} 
Example 7
Source File: ConsumerApiWrapper.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package com.banno.kafka.consumer

import org.apache.kafka.common._
import org.apache.kafka.clients.consumer._

import scala.concurrent.duration._
import java.util.regex.Pattern

trait ConsumerApiWrapper[F[_], K, V] extends ConsumerApi[F, K, V] {
  def api: ConsumerApi[F, K, V]
  def assign(partitions: Iterable[TopicPartition]): F[Unit] = api.assign(partitions)
  def assignment: F[Set[TopicPartition]] = api.assignment
  def beginningOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    api.beginningOffsets(partitions)
  def close: F[Unit] = api.close
  def close(timeout: FiniteDuration): F[Unit] = api.close(timeout)
  def commitAsync: F[Unit] = api.commitAsync
  def commitAsync(
      offsets: Map[TopicPartition, OffsetAndMetadata],
      callback: OffsetCommitCallback
  ): F[Unit] = api.commitAsync(offsets, callback)
  def commitAsync(callback: OffsetCommitCallback): F[Unit] = api.commitAsync(callback)
  def commitSync: F[Unit] = api.commitSync
  def commitSync(offsets: Map[TopicPartition, OffsetAndMetadata]): F[Unit] = api.commitSync(offsets)
  def committed(partition: Set[TopicPartition]): F[Map[TopicPartition, OffsetAndMetadata]] =
    api.committed(partition)
  def endOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    api.endOffsets(partitions)
  def listTopics: F[Map[String, Seq[PartitionInfo]]] = api.listTopics
  def metrics: F[Map[MetricName, Metric]] = api.metrics
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long]
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    api.offsetsForTimes(timestampsToSearch)
  def partitionsFor(topic: String): F[Seq[PartitionInfo]] = api.partitionsFor(topic)
  def pause(partitions: Iterable[TopicPartition]): F[Unit] = api.pause(partitions)
  def paused: F[Set[TopicPartition]] = api.paused
  def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] = api.poll(timeout)
  def position(partition: TopicPartition): F[Long] = api.position(partition)
  def resume(partitions: Iterable[TopicPartition]): F[Unit] = api.resume(partitions)
  def seek(partition: TopicPartition, offset: Long): F[Unit] = api.seek(partition, offset)
  def seekToBeginning(partitions: Iterable[TopicPartition]): F[Unit] =
    api.seekToBeginning(partitions)
  def seekToEnd(partitions: Iterable[TopicPartition]): F[Unit] = api.seekToEnd(partitions)
  def subscribe(topics: Iterable[String]): F[Unit] = api.subscribe(topics)
  def subscribe(topics: Iterable[String], callback: ConsumerRebalanceListener): F[Unit] =
    api.subscribe(topics, callback)
  def subscribe(pattern: Pattern): F[Unit] = api.subscribe(pattern)
  def subscribe(pattern: Pattern, callback: ConsumerRebalanceListener): F[Unit] =
    api.subscribe(pattern, callback)
  def subscription: F[Set[String]] = api.subscription
  def unsubscribe: F[Unit] = api.unsubscribe
  def wakeup: F[Unit] = api.wakeup
} 
Example 8
Source File: Avro4sConsumerImpl.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package com.banno.kafka.consumer

import cats.implicits._
import java.util.regex.Pattern

import scala.concurrent.duration._
import org.apache.kafka.common._
import org.apache.kafka.clients.consumer._
import org.apache.avro.generic.GenericRecord
import com.sksamuel.avro4s.FromRecord
import cats.Functor
import com.banno.kafka._

//this is a Bifunctor[ConsumerApi]

case class Avro4sConsumerImpl[F[_]: Functor, K: FromRecord, V: FromRecord](
    c: ConsumerApi[F, GenericRecord, GenericRecord]
) extends ConsumerApi[F, K, V] {
  def assign(partitions: Iterable[TopicPartition]): F[Unit] = c.assign(partitions)
  def assignment: F[Set[TopicPartition]] = c.assignment
  def beginningOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    c.beginningOffsets(partitions)
  def beginningOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] =
    c.beginningOffsets(partitions, timeout)
  def close: F[Unit] = c.close
  def close(timeout: FiniteDuration): F[Unit] = c.close(timeout)
  def commitAsync: F[Unit] = c.commitAsync
  def commitAsync(
      offsets: Map[TopicPartition, OffsetAndMetadata],
      callback: OffsetCommitCallback
  ): F[Unit] = c.commitAsync(offsets, callback)
  def commitAsync(callback: OffsetCommitCallback): F[Unit] = c.commitAsync(callback)
  def commitSync: F[Unit] = c.commitSync
  def commitSync(offsets: Map[TopicPartition, OffsetAndMetadata]): F[Unit] = c.commitSync(offsets)
  def committed(partition: Set[TopicPartition]): F[Map[TopicPartition, OffsetAndMetadata]] =
    c.committed(partition)
  def endOffsets(partitions: Iterable[TopicPartition]): F[Map[TopicPartition, Long]] =
    c.endOffsets(partitions)
  def endOffsets(
      partitions: Iterable[TopicPartition],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, Long]] = c.endOffsets(partitions, timeout)
  def listTopics: F[Map[String, Seq[PartitionInfo]]] = c.listTopics
  def listTopics(timeout: FiniteDuration): F[Map[String, Seq[PartitionInfo]]] =
    c.listTopics(timeout)
  def metrics: F[Map[MetricName, Metric]] = c.metrics
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long]
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    c.offsetsForTimes(timestampsToSearch)
  def offsetsForTimes(
      timestampsToSearch: Map[TopicPartition, Long],
      timeout: FiniteDuration
  ): F[Map[TopicPartition, OffsetAndTimestamp]] =
    c.offsetsForTimes(timestampsToSearch, timeout)
  def partitionsFor(topic: String): F[Seq[PartitionInfo]] = c.partitionsFor(topic)
  def partitionsFor(topic: String, timeout: FiniteDuration): F[Seq[PartitionInfo]] =
    c.partitionsFor(topic, timeout)
  def pause(partitions: Iterable[TopicPartition]): F[Unit] = c.pause(partitions)
  def paused: F[Set[TopicPartition]] = c.paused
  def poll(timeout: FiniteDuration): F[ConsumerRecords[K, V]] =
    c.poll(timeout).map(_.fromGenericRecords[K, V])
  def position(partition: TopicPartition): F[Long] = c.position(partition)
  def resume(partitions: Iterable[TopicPartition]): F[Unit] = c.resume(partitions)
  def seek(partition: TopicPartition, offset: Long): F[Unit] = c.seek(partition, offset)
  def seekToBeginning(partitions: Iterable[TopicPartition]): F[Unit] = c.seekToBeginning(partitions)
  def seekToEnd(partitions: Iterable[TopicPartition]): F[Unit] = c.seekToEnd(partitions)
  def subscribe(topics: Iterable[String]): F[Unit] = c.subscribe(topics)
  def subscribe(topics: Iterable[String], callback: ConsumerRebalanceListener): F[Unit] =
    c.subscribe(topics, callback)
  def subscribe(pattern: Pattern): F[Unit] = c.subscribe(pattern)
  def subscribe(pattern: Pattern, callback: ConsumerRebalanceListener): F[Unit] =
    c.subscribe(pattern, callback)
  def subscription: F[Set[String]] = c.subscription
  def unsubscribe: F[Unit] = c.unsubscribe
  def wakeup: F[Unit] = c.wakeup
} 
Example 9
Source File: DataSourceV2Utils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.v2

import java.util.regex.Pattern

import org.apache.spark.internal.Logging
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sources.v2.{DataSourceV2, SessionConfigSupport}

private[sql] object DataSourceV2Utils extends Logging {

  
  def extractSessionConfigs(ds: DataSourceV2, conf: SQLConf): Map[String, String] = ds match {
    case cs: SessionConfigSupport =>
      val keyPrefix = cs.keyPrefix()
      require(keyPrefix != null, "The data source config key prefix can't be null.")

      val pattern = Pattern.compile(s"^spark\\.datasource\\.$keyPrefix\\.(.+)")

      conf.getAllConfs.flatMap { case (key, value) =>
        val m = pattern.matcher(key)
        if (m.matches() && m.groupCount() > 0) {
          Seq((m.group(1), value))
        } else {
          Seq.empty
        }
      }

    case _ => Map.empty
  }
} 
Example 10
Source File: StringUtils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 11
Source File: PlaceholderParser.scala    From cornichon   with Apache License 2.0 5 votes vote down vote up
package com.github.agourlay.cornichon.resolver

import java.util.regex.Pattern

import com.github.agourlay.cornichon.core.{ CornichonError, Session }
import com.github.agourlay.cornichon.resolver.PlaceholderParser._
import org.parboiled2._

import scala.util.{ Failure, Success }

class PlaceholderParser(val input: ParserInput) extends Parser {

  def placeholdersRule = rule {
    Ignore ~ zeroOrMore(PlaceholderRule).separatedBy(Ignore) ~ Ignore ~ EOI
  }

  def PlaceholderRule = rule('<' ~ PlaceholderTXT ~ optIndex ~ '>' ~> Placeholder)

  def optIndex = rule(optional('[' ~ Number ~ ']'))

  def PlaceholderTXT = rule(capture(oneOrMore(allowedCharsInPlaceholdersPredicate)))

  def Ignore = rule { zeroOrMore(!PlaceholderRule ~ ANY) }

  def Number = rule { capture(Digits) ~> (_.toInt) }

  def Digits = rule { oneOrMore(CharPredicate.Digit) }
}

object PlaceholderParser {

  private val noPlaceholders = Right(Nil)
  private val allowedCharsInPlaceholdersPredicate: CharPredicate = CharPredicate.Visible -- Session.notAllowedInKey

  def parse(input: String): Either[CornichonError, List[Placeholder]] =
    if (!input.contains("<"))
      // No need to parse the whole thing
      noPlaceholders
    else {
      val p = new PlaceholderParser(input)
      p.placeholdersRule.run() match {
        case Failure(e: ParseError) =>
          Left(PlaceholderParsingError(input, p.formatError(e, new ErrorFormatter(showTraces = true))))
        case Failure(e: Throwable) =>
          Left(PlaceholderError(input, e))
        case Success(dt) =>
          Right(dt.toList.distinct)
      }
    }
}

case class Placeholder(key: String, index: Option[Int]) {
  val fullKey = index.fold(s"<$key>") { index => s"<$key[$index]>" }
  lazy val pattern = Pattern.compile(Pattern.quote(fullKey))
}

case class PlaceholderError(input: String, error: Throwable) extends CornichonError {
  lazy val baseErrorMessage = s"error '${error.getMessage}' thrown during placeholder parsing for input $input"
}

case class PlaceholderParsingError(input: String, error: String) extends CornichonError {
  lazy val baseErrorMessage = s"error '$error' during placeholder parsing for input $input"
} 
Example 12
Source File: StringUtils.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.Pattern

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  //将_替换为,{1}完全匹配任何字符的1次将%替换为.*,与任何字符匹配0次或更多次
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }
} 
Example 13
Source File: DcosReleaseVersionParser.scala    From cosmos   with Apache License 2.0 5 votes vote down vote up
package com.mesosphere.universe.v3.model

import com.twitter.util.{Return, Throw, Try}

import java.util.regex.Pattern

object DcosReleaseVersionParser {

  private[this] val versionFragment = "(?:0|[1-9][0-9]*)"
  private[this] val subVersionFragment = "\\." + versionFragment
  private[this] val suffixFragment =
    "((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*)"

  private[v3] val versionRegex = s"^$versionFragment$$"
  private[v3] val suffixRegex = s"^$suffixFragment$$"
  private[v3] val fullRegex = s"^$versionFragment(?:$subVersionFragment)*(?<suffix>-$suffixFragment)?$$"

  private[v3] val versionPattern = Pattern.compile(versionRegex)
  private[v3] val suffixPattern = Pattern.compile(suffixRegex)
  private[v3] val fullPattern = Pattern.compile(fullRegex)

  def parseUnsafe(s: String): DcosReleaseVersion = parse(s).get

  def parse(s: String): Try[DcosReleaseVersion] = {
    val errMsg = s"Value '$s' does not conform to expected format $fullRegex"
    Try {
      assert(!s.trim.isEmpty, "Value must not be empty")
      assert(fullPattern.matcher(s).matches(), errMsg)
      s
    } flatMap { validatedString =>
      validatedString.split('-').toList match {
        case Seq(version) =>
          Return(version -> None)
        case Seq(version, tail @ _*) =>
          Return(version -> Some(tail.mkString("-")))
        case _ =>
          Throw(new AssertionError(errMsg))
      }
    } flatMap { case (version, subVersion) =>
      parseVersionSuffix(version, subVersion, errMsg)
    }
  }

  private[this] def parseVersion(s: String): Try[DcosReleaseVersion.Version] = Try {
    DcosReleaseVersion.Version(s.toInt)
  }

  private[this] def parseSuffix(s: Option[String]): Try[Option[DcosReleaseVersion.Suffix]] = s match {
    case None => Return(None)
    case Some(suff) => Return(Some(DcosReleaseVersion.Suffix(suff)))
  }

  private[this] def parseVersionSuffix(version: String, suffix: Option[String], errMsg: String): Try[DcosReleaseVersion] = {
    version.split('.').toList match {
      case head :: tail :: Nil =>
        for {
          h <- parseVersion(head)
          t <- parseVersion(tail)
          s <- parseSuffix(suffix)
        } yield {
          DcosReleaseVersion(h, List(t), s)
        }
      case head :: Nil =>
        for {
          h <- parseVersion(head)
          s <- parseSuffix(suffix)
        } yield {
          DcosReleaseVersion(h, List.empty, s)
        }
      case head :: tail =>
        for {
          h <- parseVersion(head)
          t <- Try.collect(tail.map(parseVersion))
          s <- parseSuffix(suffix)
        } yield {
          DcosReleaseVersion(h, t.toList, s)
        }
      case _ =>
        Throw(new AssertionError(errMsg))
    }
  }

} 
Example 14
Source File: Segment.scala    From shield   with MIT License 5 votes vote down vote up
package shield.routing

import java.util.regex.Pattern

sealed trait Segment {
  def regexPiece : String
  private[routing] def priority: Int
}

case object SlashSegment extends Segment {
  override def regexPiece: String = "/"
  override private[routing] val priority: Int = 0
  override def toString = "/"
}

case class StaticSegment(segment: String) extends Segment {
  override def regexPiece: String = Pattern.quote(segment)
  override private[routing] val priority: Int = 1
  override def toString = segment
}

case object ExtensionSegment extends Segment {
  override def regexPiece: String = "(\\.[^/]*)?"
  override private[routing] val priority: Int = 2
  override def toString = "(.extension)?"
}

case object WildcardSegment extends Segment {
  override def regexPiece: String = "[^/]*"
  override private[routing] val priority: Int = 3
  override def toString = "{}"
}

case class RegexSegment(expr: String) extends Segment {
  override def regexPiece: String = expr
  override private[routing] val priority: Int = 4
  override def toString = s"{regex: $expr}"
}

case object PathSegment extends Segment {
  override def regexPiece: String = ".*"
  override private[routing] val priority: Int = 5
  override def toString = "(.*)"
} 
Example 15
Source File: FileUtils.scala    From sctags   with Apache License 2.0 5 votes vote down vote up
package sctags

import java.io.{File, FileFilter}
import java.util.regex.Pattern

import scala.collection.mutable.ArrayBuffer
import scala.language.implicitConversions

object FileUtils {

  implicit def fun2fileFilter(fun: File => Boolean) =
    new FileFilter { def accept(f: File) = fun(f) }

  implicit def fileFilter2richFilter(filter: FileFilter): RichFilter =
    new RichFilter(filter)

  final class RichFilter(val self: FileFilter) extends Proxy {
    def unary_! = new FileFilter { def accept(f: File) = !self.accept(f) }
    def join(other: FileFilter, op: (Boolean,Boolean)=>Boolean) =
      new FileFilter { def accept(f: File) = op(self.accept(f), other.accept(f)) }
    def &&(other: FileFilter) = join(other, _ && _);
    def ||(other: FileFilter) = join(other, _ || _);
  }

  object DirectoryFilter extends FileFilter {
    def accept(f: File) = f.isDirectory;
  }

  class NameMatchFilter(val re: Pattern) extends FileFilter {
    def this(re: String) = this(Pattern.compile(re));
    def accept(f: File) = re.matcher(f.getName).matches;
  }

  object AcceptAllFilter extends FileFilter {
    def accept(f: File) = true;
  }

  def listFilesRecursive(base: File, filter: FileFilter): Seq[File] = {
    val files = new ArrayBuffer[File];
    def processdir(dir: File) {
      val directories = dir.listFiles(DirectoryFilter).foreach(d => processdir(d))
      val matchedFiles = dir.listFiles(filter)
      files ++= matchedFiles
    }
    processdir(base)
    files
  }
} 
Example 16
Source File: AccessLogParser.scala    From spark-scala   with Creative Commons Zero v1.0 Universal 5 votes vote down vote up
package com.supergloo.utils

import java.util.regex.Pattern

import com.supergloo.models.HttpStatus


  def parseHttpStatusCode(logLine: String): Option[HttpStatus] = {
    val matcher = p.matcher(logLine)
    if(matcher.find) {
      Some(createHttpStatus(matcher.group(6)))
    }
    else {
      None
    }
  }

} 
Example 17
Source File: RegexLiteral.scala    From kantan.regex   with Apache License 2.0 5 votes vote down vote up
package kantan.regex
package literals

import java.util.regex.Pattern
import scala.reflect.macros.blackbox.Context
import scala.util.{Failure, Success, Try => UTry}

final class RegexLiteral(val sc: StringContext) extends AnyVal {
  def rx(args: Any*): Pattern = macro RegexLiteral.rxImpl
}

// Relatively distatefull trick to get rid of spurious warnings.
trait RegexLiteralMacro {
  def rxImpl(c: Context)(args: c.Expr[Any]*): c.Expr[Pattern]
}

object RegexLiteral extends RegexLiteralMacro {

  override def rxImpl(c: Context)(args: c.Expr[Any]*): c.Expr[Pattern] = {
    import c.universe._

    c.prefix.tree match {
      case Apply(_, List(Apply(_, List(lit @ Literal(Constant(str: String)))))) =>
        UTry(Pattern.compile(str)) match {
          case Failure(_) => c.abort(c.enclosingPosition, s"Illegal regex: '$str'")
          case Success(_) =>
            reify {
              Pattern.compile(c.Expr[String](lit).splice)
            }
        }
      case _ =>
        c.abort(c.enclosingPosition, "rx can only be used on string literals")
    }
  }
}

trait ToRegexLiteral {
  implicit def toRegexLiteral(sc: StringContext): RegexLiteral = new RegexLiteral(sc)
} 
Example 18
Source File: ConfigReader.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

} 
Example 19
Source File: StringUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 20
Source File: PidUtils.scala    From sbt-dynamodb   with MIT License 5 votes vote down vote up
package com.localytics.sbt.dynamodb

import java.io.File
import java.util.regex.Pattern

object PidUtils {

  def extractPid(input: String, port: Int, jar: File): Option[String] = {
    val pidPortRegex = s"\\d+ ${Pattern.quote(jar.getAbsolutePath)} -port $port".r
    pidPortRegex.findFirstIn(input).map(_.split(" ")(0))
  }

  def osName: String = System.getProperty("os.name") match {
    case n: String if !n.isEmpty => n
    case _ => System.getProperty("os")
  }

  def killPidCommand(pid: String): String =
    if (osName.toLowerCase.contains("windows")) s"Taskkill /PID $pid /F" else s"kill $pid"

} 
Example 21
Source File: TemplateParamsSpec.scala    From comet-data-pipeline   with Apache License 2.0 5 votes vote down vote up
package com.ebiznext.comet.database.extractor

import java.util.regex.Pattern

import better.files.File
import com.ebiznext.comet.schema.model._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class TemplateParamsSpec extends AnyFlatSpec with Matchers {
  val scriptOutputFolder: File = File("/tmp")

  "fromSchema" should "generate the correct TemplateParams for a given Schema" in {
    val schema: Schema = Schema(
      name = "table1",
      pattern = Pattern.compile("output_file.*.csv"),
      List(Attribute(name = "col1"), Attribute(name = "col2")),
      metadata = Option(Metadata(write = Some(WriteMode.APPEND))),
      merge = Some(MergeOptions(List("col1", "col2"), None, timestamp = Some("updateCol"))),
      comment = None,
      presql = None,
      postsql = None
    )

    val expectedTemplateParams = TemplateParams(
      tableToExport = "table1",
      columnsToExport = List("col1", "col2"),
      fullExport = false,
      dsvDelimiter = ",",
      deltaColumn = Some("updateCol"),
      exportOutputFileBase = "output_file",
      scriptOutputFile = scriptOutputFolder / "EXTRACT_table1.sql"
    )
    TemplateParams.fromSchema(schema, scriptOutputFolder) shouldBe expectedTemplateParams
  }

  it should "generate the correct TemplateParams for an other Schema" in {
    val schema: Schema = Schema(
      name = "table1",
      pattern = Pattern.compile("output_file.*.csv"),
      List(Attribute(name = "col1"), Attribute(name = "col2")),
      metadata = Option(Metadata(write = Some(WriteMode.OVERWRITE), separator = Some("|"))),
      merge = Some(MergeOptions(List("col1", "col2"), None, timestamp = Some("updateCol"))),
      comment = None,
      presql = None,
      postsql = None
    )

    val expectedTemplateParams = TemplateParams(
      tableToExport = "table1",
      columnsToExport = List("col1", "col2"),
      fullExport = true,
      dsvDelimiter = "|",
      deltaColumn = None,
      exportOutputFileBase = "output_file",
      scriptOutputFile = scriptOutputFolder / "EXTRACT_table1.sql"
    )
    TemplateParams.fromSchema(schema, scriptOutputFolder) shouldBe expectedTemplateParams
  }
} 
Example 22
Source File: ESLoadConfig.scala    From comet-data-pipeline   with Apache License 2.0 5 votes vote down vote up
package com.ebiznext.comet.job.index.esload

import java.util.regex.Pattern

import buildinfo.BuildInfo
import com.ebiznext.comet.config.Settings
import com.ebiznext.comet.schema.model.RowLevelSecurity
import com.ebiznext.comet.utils.CliConfig
import org.apache.hadoop.fs.Path
import scopt.OParser

case class ESLoadConfig(
  timestamp: Option[String] = None,
  id: Option[String] = None,
  mapping: Option[Path] = None,
  domain: String = "",
  schema: String = "",
  format: String = "",
  dataset: Option[Path] = None,
  conf: Map[String, String] = Map(),
  rls: Option[List[RowLevelSecurity]] = None
) {

  def getDataset()(implicit settings: Settings): Path = {
    dataset.getOrElse {
      new Path(s"${settings.comet.datasets}/${settings.comet.area.accepted}/$domain/$schema")
    }
  }

  def getIndexName(): String = s"${domain.toLowerCase}_${schema.toLowerCase}"

  private val pattern = Pattern.compile("\\{(.*)\\|(.*)\\}")

  def getTimestampCol(): Option[String] = {
    timestamp.flatMap { ts =>
      val matcher = pattern.matcher(ts)
      if (matcher.matches()) {
        Some(matcher.group(1))
      } else {
        None
      }
    }
  }

  def getResource(): String = {
    timestamp.map { ts =>
      s"${this.getIndexName()}-$ts/_doc"
    } getOrElse {
      s"${this.getIndexName()}/_doc"
    }
  }
}

object ESLoadConfig extends CliConfig[ESLoadConfig] {

  val parser: OParser[Unit, ESLoadConfig] = {
    val builder = OParser.builder[ESLoadConfig]
    import builder._
    OParser.sequence(
      programName("comet"),
      head("comet", BuildInfo.version),
      opt[String]("timestamp")
        .action((x, c) => c.copy(timestamp = Some(x)))
        .optional()
        .text("Elasticsearch index timestamp suffix as in {@timestamp|yyyy.MM.dd}"),
      opt[String]("id")
        .action((x, c) => c.copy(id = Some(x)))
        .optional()
        .text("Elasticsearch Document Id"),
      opt[String]("mapping")
        .action((x, c) => c.copy(mapping = Some(new Path(x))))
        .optional()
        .text("Path to Elasticsearch Mapping File"),
      opt[String]("domain")
        .action((x, c) => c.copy(domain = x))
        .required()
        .text("Domain Name"),
      opt[String]("schema")
        .action((x, c) => c.copy(schema = x))
        .required()
        .text("Schema Name"),
      opt[String]("format")
        .action((x, c) => c.copy(format = x))
        .required()
        .text("Dataset input file : parquet, json or json-array"),
      opt[String]("dataset")
        .action((x, c) => c.copy(dataset = Some(new Path(x))))
        .optional()
        .text("Input dataset path"),
      opt[Map[String, String]]("conf")
        .action((x, c) => c.copy(conf = x))
        .optional()
        .valueName(
          "es.batch.size.entries=1000,es.batch.size.bytes=1mb... (see https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html)"
        )
        .text("eshadoop configuration options")
    )
  }

  def parse(args: Seq[String]): Option[ESLoadConfig] = OParser.parse(parser, args, ESLoadConfig())
} 
Example 23
Source File: Device.scala    From uap-scala   with Do What The F*ck You Want To Public License 5 votes vote down vote up
package org.uaparser.scala

import java.util.regex.{ Matcher, Pattern }
import org.uaparser.scala.MatcherOps._

case class Device(family: String, brand: Option[String] = None, model: Option[String] = None)

object Device {
  private[scala] def fromMap(m: Map[String, String]) = m.get("family").map(Device(_, m.get("brand"), m.get("model")))

  private[scala] case class DevicePattern(pattern: Pattern, familyReplacement: Option[String],
                                           brandReplacement: Option[String], modelReplacement: Option[String]) {
    def process(agent: String): Option[Device] = {
      val matcher = pattern.matcher(agent)
      if (!matcher.find()) None else {
        val family = familyReplacement.map(r => replace(r, matcher)).orElse(matcher.groupAt(1))
        val brand = brandReplacement.map(r => replace(r, matcher)).filterNot(s => s.isEmpty)
        val model = modelReplacement.map(r => replace(r, matcher)).orElse(matcher.groupAt(1)).filterNot(s => s.isEmpty)
        family.map(Device(_, brand, model))
      }
    }

    def replace(replacement: String, matcher: Matcher): String = {
      (if (replacement.contains("$") && matcher.groupCount() >= 1)  {
        (1 to matcher.groupCount()).foldLeft(replacement)((rep, i) => {
          val toInsert = if (matcher.group(i) ne null) matcher.group(i) else ""
          rep.replaceFirst("\\$" + i, Matcher.quoteReplacement(toInsert))
        })
      } else replacement).trim
    }
  }

  private object DevicePattern {
    def fromMap(m: Map[String, String]): Option[DevicePattern] = m.get("regex").map { r =>
      val pattern = m.get("regex_flag").map(flag =>
        Pattern.compile(r, Pattern.CASE_INSENSITIVE)).getOrElse(Pattern.compile(r)
      )
      DevicePattern(pattern, m.get("device_replacement"), m.get("brand_replacement"), m.get("model_replacement"))
    }
  }

  case class DeviceParser(patterns: List[DevicePattern]) {
    def parse(agent: String): Device = patterns.foldLeft[Option[Device]](None) {
      case (None, pattern) => pattern.process(agent)
      case (result, _) => result
    }.getOrElse(Device("Other"))
  }

  object DeviceParser {
    def fromList(config: List[Map[String, String]]): DeviceParser =
      DeviceParser(config.map(DevicePattern.fromMap).flatten)
  }
} 
Example 24
Source File: UserAgent.scala    From uap-scala   with Do What The F*ck You Want To Public License 5 votes vote down vote up
package org.uaparser.scala

import MatcherOps._
import java.util.regex.{Matcher, Pattern}

case class UserAgent(family: String, major: Option[String] = None, minor: Option[String] = None,
                     patch: Option[String] = None)

object UserAgent {
  private[scala] def fromMap(m: Map[String, String]) = m.get("family").map { family =>
    UserAgent(family, m.get("major"), m.get("minor"), m.get("patch"))
  }

  private[scala] case class UserAgentPattern(pattern: Pattern, familyReplacement: Option[String],
                                      v1Replacement: Option[String], v2Replacement: Option[String],
                                      v3Replacement: Option[String]) {
    def process(agent: String): Option[UserAgent] = {
      val matcher = pattern.matcher(agent)
      if (!matcher.find()) return None
      familyReplacement.map { replacement =>
        if (replacement.contains("$1") && matcher.groupCount() >= 1) {
          replacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)))
        } else replacement
      }.orElse(matcher.groupAt(1)).map { family =>
        val major = v1Replacement.orElse(matcher.groupAt(2)).filter(_.nonEmpty)
        val minor = v2Replacement.orElse(matcher.groupAt(3)).filter(_.nonEmpty)
        val patch = v3Replacement.orElse(matcher.groupAt(4)).filter(_.nonEmpty)
        UserAgent(family, major, minor, patch)
      }
    }
  }

  private object UserAgentPattern {
    def fromMap(config: Map[String, String]): Option[UserAgentPattern] = config.get("regex").map { r =>
      UserAgentPattern(Pattern.compile(r), config.get("family_replacement"), config.get("v1_replacement"),
        config.get("v2_replacement"), config.get("v3_replacement"))
    }
  }

  case class UserAgentParser(patterns: List[UserAgentPattern]) {
    def parse(agent: String): UserAgent = patterns.foldLeft[Option[UserAgent]](None) {
      case (None, pattern) => pattern.process(agent)
      case (result, _) => result
    }.getOrElse(UserAgent("Other"))
  }

  object UserAgentParser {
    def fromList(config: List[Map[String, String]]): UserAgentParser =
      UserAgentParser(config.map(UserAgentPattern.fromMap).flatten)
  }
} 
Example 25
Source File: RegexConstraint.scala    From drunken-data-quality   with Apache License 2.0 5 votes vote down vote up
package de.frosner.ddq.constraints

import java.util.regex.Pattern

import org.apache.spark.sql.functions._
import org.apache.spark.sql.{Column, DataFrame}

import scala.util.Try

case class RegexConstraint(columnName: String, regex: String) extends Constraint {

  val fun = (df: DataFrame) => {
    val pattern = Pattern.compile(regex)
    val doesNotMatch = udf((column: String) => column != null && !pattern.matcher(column).find())
    val maybeDoesNotMatchCount = Try(df.filter(doesNotMatch(new Column(columnName))).count)
    RegexConstraintResult(
      constraint = this,
      data = maybeDoesNotMatchCount.toOption.map(RegexConstraintResultData),
      status = ConstraintUtil.tryToStatus[Long](maybeDoesNotMatchCount, _ == 0)
    )
  }

}

case class RegexConstraintResult(constraint: RegexConstraint,
                                 data: Option[RegexConstraintResultData],
                                 status: ConstraintStatus) extends ConstraintResult[RegexConstraint] {

  val message: String = {
    val columnName = constraint.columnName
    val regex = constraint.regex
    val maybeFailedRows = data.map(_.failedRows)
    val maybePluralSAndVerb = maybeFailedRows.map(failedRows => if (failedRows == 1) ("", "does") else ("s", "do"))
    (status, maybeFailedRows, maybePluralSAndVerb) match {
      case (ConstraintSuccess, Some(0), _) =>
        s"Column $columnName matches $regex"
      case (ConstraintFailure, Some(failedRows), Some((pluralS, verb))) =>
        s"Column $columnName contains $failedRows row$pluralS that $verb not match $regex"
      case (ConstraintError(throwable), None, None) =>
        s"Checking whether column $columnName matches $regex failed: $throwable"
      case default => throw IllegalConstraintResultException(this)
    }
  }

}

case class RegexConstraintResultData(failedRows: Long) 
Example 26
Source File: SearchPluginId.scala    From sbt-idea-plugin   with Apache License 2.0 5 votes vote down vote up
package org.jetbrains.sbtidea.tasks

import java.net.URLEncoder
import java.nio.file.Path
import java.util.regex.Pattern

import org.jetbrains.sbtidea.PluginLogger
import org.jetbrains.sbtidea.download.BuildInfo
import org.jetbrains.sbtidea.download.plugin.LocalPluginRegistry
import com.eclipsesource.json._
import scalaj.http.Http

import scala.collection.JavaConverters._

class SearchPluginId(ideaRoot: Path, buildInfo: BuildInfo, useBundled: Boolean = true, useRemote: Boolean = true) {

  private val REPO_QUERY = "https://plugins.jetbrains.com/api/search/plugins?search=%s&build=%s"

  // true if plugin was found in the remote repo
  def apply(query: String): Map[String, (String, Boolean)] = {
    val local  = if (useBundled) searchPluginIdLocal(query) else Map.empty
    val remote = if (useRemote) searchPluginIdRemote(query) else Map.empty
    remote ++ local
  }

  private def searchPluginIdLocal(query: String): Map[String, (String, Boolean)] = {
    val pattern = Pattern.compile(query)
    val registry = new LocalPluginRegistry(ideaRoot)
    val allDescriptors = registry.getAllDescriptors
    allDescriptors
        .filter(descriptor => pattern.matcher(descriptor.name).find() || pattern.matcher(descriptor.id).find())
        .map(descriptor => descriptor.id -> (descriptor.name, false))
        .toMap
  }

  // Apparently we can't use json4s when cross-compiling for sbt because there are BOTH no shared versions AND binary compatibility
  private def searchPluginIdRemote(query: String): Map[String, (String, Boolean)] = {
    try {
      val param = URLEncoder.encode(query, "UTF-8")
      val url = REPO_QUERY.format(param, s"${buildInfo.edition.edition}-${buildInfo.getActualIdeaBuild(ideaRoot)}")
      val data = Http(url).asString.body
      val json = Json.parse(data)
      val values = json.asArray().values().asScala.map(_.asObject())
      val names = values.map(_.getString("name", "") -> true)
      val ids = values.map(_.getString("xmlId", ""))
      ids.zip(names).toMap
    } catch {
      case ex: Throwable =>
        PluginLogger.warn(s"Failed to query IJ plugin repo: $ex")
        Map.empty
    }
  }
} 
Example 27
Source File: Constraint.scala    From exhibitor-mesos-framework   with Apache License 2.0 5 votes vote down vote up
package ly.stealth.mesos.exhibitor

import java.util.regex.{Pattern, PatternSyntaxException}

import scala.util.Try

trait Constraint {
  def matches(value: String, values: List[String] = Nil): Boolean
}

object Constraint {
  def apply(value: String): Constraint = {
    if (value.startsWith("like:")) Constraint.Like(value.substring("like:".length))
    else if (value.startsWith("unlike:")) Constraint.Like(value.substring("unlike:".length), negated = true)
    else if (value == "unique") Constraint.Unique()
    else if (value.startsWith("cluster")) {
      val tail = value.substring("cluster".length)
      val cluster = if (tail.startsWith(":")) Some(tail.substring(1)) else None
      Cluster(cluster)
    } else if (value.startsWith("groupBy")) {
      val tail = value.substring("groupBy".length)
      val groups = if (tail.startsWith(":")) Try(tail.substring(1).toInt).toOption.getOrElse(throw new IllegalArgumentException(s"invalid condition $value"))
      else 1

      GroupBy(groups)
    }
    else throw new IllegalArgumentException(s"Unsupported condition: $value")
  }

  def parse(constraints: String): Map[String, List[Constraint]] = {
    Util.parseList(constraints).foldLeft[Map[String, List[Constraint]]](Map()) { case (all, (name, value)) =>
      all.get(name) match {
        case Some(values) => all.updated(name, Constraint(value) :: values)
        case None => all.updated(name, List(Constraint(value)))
      }
    }
  }

  case class Like(regex: String, negated: Boolean = false) extends Constraint {
    val pattern = try {
      Pattern.compile(s"^$regex$$")
    } catch {
      case e: PatternSyntaxException => throw new IllegalArgumentException(s"Invalid $name: ${e.getMessage}")
    }

    private def name: String = if (negated) "unlike" else "like"

    def matches(value: String, values: List[String]): Boolean = negated ^ pattern.matcher(value).find()

    override def toString: String = s"$name:$regex"
  }

  case class Unique() extends Constraint {
    def matches(value: String, values: List[String]): Boolean = !values.contains(value)

    override def toString: String = "unique"
  }

  case class Cluster(value: Option[String]) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = this.value match {
      case Some(v) => v == value
      case None => values.isEmpty || values.head == value
    }

    override def toString: String = "cluster" + value.map(":" + _).getOrElse("")
  }

  case class GroupBy(groups: Int = 1) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = {
      val counts = values.groupBy(identity).mapValues(_.size)
      if (counts.size < groups) !counts.contains(value)
      else {
        val minCount = counts.values.reduceOption(_ min _).getOrElse(0)
        counts.getOrElse(value, 0) == minCount
      }
    }

    override def toString: String = "groupBy" + (if (groups > 1) s":$groups" else "")
  }

} 
Example 28
Source File: TypeScopeNameObjectNameFactory.scala    From scala-server-toolkit   with MIT License 5 votes vote down vote up
package com.avast.sst.micrometer.jmx

import java.util
import java.util.regex.Pattern

import cats.syntax.either._
import com.codahale.metrics.jmx.{DefaultObjectNameFactory, ObjectNameFactory}
import javax.management.ObjectName


      val map = new java.util.LinkedHashMap[String, String](parts.length)
      val properties = new java.util.Hashtable[String, String](parts.length) {
        override def entrySet(): util.Set[util.Map.Entry[String, String]] = map.entrySet()
      }

      parts.zip(partNames).foreach {
        case (part, partName) =>
          val quoted = quote(part)
          properties.put(partName, quoted)
          map.put(partName, quoted)
      }

      new ObjectName(domain, properties)
    }

  private def quote(objectName: String) = objectName.replaceAll("[\\Q.?*\"\\E]", "_")

} 
Example 29
Source File: Configuration.scala    From tapir   with Apache License 2.0 5 votes vote down vote up
package sttp.tapir.generic

import java.util.regex.Pattern

final case class Configuration(toLowLevelName: String => String) {
  def withSnakeCaseMemberNames: Configuration =
    copy(
      toLowLevelName = Configuration.snakeCaseTransformation
    )

  def withKebabCaseMemberNames: Configuration =
    copy(
      toLowLevelName = Configuration.kebabCaseTransformation
    )
}

object Configuration {
  implicit val default: Configuration = Configuration(Predef.identity)

  private val basePattern: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val swapPattern: Pattern = Pattern.compile("([a-z\\d])([A-Z])")

  private val snakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toLowerCase
  }

  private val kebabCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1-$2")
    swapPattern.matcher(partial).replaceAll("$1-$2").toLowerCase
  }
} 
Example 30
Source File: ColumnMetadataTest.scala    From spark-vector   with Apache License 2.0 5 votes vote down vote up
package com.actian.spark_vector.vector

import java.util.regex.Pattern

import org.apache.spark.sql.types.DecimalType

import org.scalacheck.Arbitrary.arbitrary
import org.scalacheck.Gen
import org.scalacheck.Gen.{choose, identifier}
import org.scalacheck.Prop.{forAll, propBoolean}
import org.scalatest.{FunSuite, Matchers}

import com.actian.spark_vector.test.tags.RandomizedTest

class ColumnMetadataTest extends FunSuite with Matchers {
  // Generate random column metadata and ensure the resultant StructField's are valid
  test("generated", RandomizedTest) {
    forAll(columnMetadataGen)(colMD => {
      assertColumnMetadata(colMD)
    }).check
  }

  val milliSecsPattern = Pattern.compile(".*\\.(S*)")

  def assertColumnMetadata(columnMD: ColumnMetadata): Boolean = {
    val structField = columnMD.structField
    structField.dataType match {
      // For decimal type, ensure the scale and precision match
      case decType: DecimalType =>
        decType.precision should be(columnMD.precision)
        decType.scale should be(columnMD.scale)
      case _ =>
    }
    true
  }

  val columnMetadataGen: Gen[ColumnMetadata] =
    for {
      name <- identifier
      typeName <- VectorTypeGen.vectorJdbcTypeGen
      nullable <- arbitrary[Boolean]
      precision <- choose(0, 20)
      scale <- choose(0, Math.min(20, precision))
    } yield ColumnMetadata(name, typeName, nullable, precision, scale)
} 
Example 31
Source File: BigQueryPartitionUtil.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.bigquery

import java.util.regex.Pattern

import com.google.api.services.bigquery.model.TableReference
import com.spotify.scio.bigquery.client.BigQuery
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers

import scala.util.Try

private[bigquery] object BigQueryPartitionUtil {
  // Ported from com.google.cloud.dataflow.sdk.io.BigQueryHelpers

  private[this] val PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]{4,61}[a-z0-9]"
  private[this] val DATASET_REGEXP = "[-\\w.]{1,1024}"
  private[this] val TABLE_REGEXP = "[-\\w$@]{1,1024}($LATEST)?"
  private[this] val DATASET_TABLE_REGEXP_LEGACY =
    s"((?<PROJECT>$PROJECT_ID_REGEXP):)?(?<DATASET>$DATASET_REGEXP)\\.(?<TABLE>$TABLE_REGEXP)"
  private[this] val DATASET_TABLE_REGEXP_STANDARD =
    s"((?<PROJECT>$PROJECT_ID_REGEXP).)?(?<DATASET>$DATASET_REGEXP)\\.(?<TABLE>$TABLE_REGEXP)"
  private[this] val QUERY_TABLE_SPEC_LEGACY =
    Pattern.compile(s"(?<=\\[)$DATASET_TABLE_REGEXP_LEGACY(?=\\])")
  private[this] val QUERY_TABLE_SPEC_STANDARD =
    Pattern.compile(s"(?<=\\`)$DATASET_TABLE_REGEXP_STANDARD(?=\\`)")

  private def extractTables(sqlQuery: String): Map[String, TableReference] = {
    val b = Map.newBuilder[String, TableReference]
    val m1 = QUERY_TABLE_SPEC_LEGACY.matcher(sqlQuery)
    while (m1.find()) {
      val t = m1.group(0)
      b += (s"[$t]" -> BigQueryHelpers.parseTableSpec(t))
    }
    val m2 = QUERY_TABLE_SPEC_STANDARD.matcher(sqlQuery)
    while (m2.find()) {
      val t = m2.group(0)
      b += (s"`$t`" -> BigQueryHelpers.parseTableSpec(t.replaceFirst("\\.", ":")))
    }
    b.result()
  }

  private def getPartitions(bq: BigQuery, tableRef: TableReference): Set[String] = {
    val prefix = tableRef.getTableId.split('$')(0)
    bq.tables
      .tableReferences(tableRef.getProjectId, tableRef.getDatasetId)
      .filter(_.getTableId.startsWith(prefix))
      .map(_.getTableId.substring(prefix.length))
      .toSet
      // get all table with prefix and filter only the day/date partitioned tables. Current
      // format for date partition is YYYYMMDD, thus all numeric.
      .filter(e => Try(e.toLong).isSuccess)
  }

  def latestQuery(bq: BigQuery, sqlQuery: String): String = {
    val tables =
      extractTables(sqlQuery).filter(_._2.getTableId.endsWith("$LATEST"))
    if (tables.isEmpty) {
      sqlQuery
    } else {
      val overlaps = tables
        .map(t => getPartitions(bq, t._2))
        .reduce(_ intersect _)
      require(
        overlaps.nonEmpty,
        "Cannot find latest common partition for " + tables.keys.mkString(", ")
      )
      val latest = overlaps.max
      tables.foldLeft(sqlQuery) {
        case (q, (spec, _)) =>
          q.replace(spec, spec.replace("$LATEST", latest))
      }
    }
  }

  def latestTable(bq: BigQuery, tableSpec: String): String = {
    val ref = BigQueryHelpers.parseTableSpec(tableSpec)
    if (ref.getTableId.endsWith("$LATEST")) {
      val partitions = getPartitions(bq, ref)
      require(partitions.nonEmpty, s"Cannot find latest partition for $tableSpec")
      tableSpec.replace("$LATEST", partitions.max)
    } else {
      tableSpec
    }
  }
} 
Example 32
Source File: CompilationErrorParser.scala    From scala-clippy   with Apache License 2.0 5 votes vote down vote up
package com.softwaremill.clippy

import java.util.regex.Pattern

object CompilationErrorParser {
  private val FoundRegexp            = """found\s*:\s*([^\n]+)""".r
  private val RequiredPrefixRegexp   = """required\s*:""".r
  private val AfterRequiredRegexp    = """required\s*:\s*([^\n]+)""".r
  private val WhichExpandsToRegexp   = """\s*\(which expands to\)\s*([^\n]+)""".r
  private val NotFoundRegexp         = """not found\s*:\s*([^\n]+)""".r
  private val NotAMemberRegexp       = """:?\s*([^\n:]+) is not a member of""".r
  private val NotAMemberOfRegexp     = """is not a member of\s*([^\n]+)""".r
  private val ImplicitNotFoundRegexp = """could not find implicit value for parameter\s*([^:]+):\s*([^\n]+)""".r
  private val DivergingImplicitExpansionRegexp =
    """diverging implicit expansion for type\s*([^\s]+)\s*.*\s*starting with method\s*([^\s]+)\s*in\s*([^\n]+)""".r
  private val TypeArgumentsDoNotConformToOverloadedBoundsRegexp =
    """type arguments \[([^\]]+)\] conform to the bounds of none of the overloaded alternatives of\s*([^:\n]+)[^:]*: ([^\n]+)""".r
  private val TypeclassNotFoundRegexp = """No implicit (.*) defined for ([^\n]+)""".r

  def parse(e: String): Option[CompilationError[ExactT]] = {
    val error = e.replaceAll(Pattern.quote("[error]"), "")
    if (error.contains("type mismatch")) {
      RequiredPrefixRegexp.split(error).toList match {
        case List(beforeReq, afterReq) =>
          for {
            found <- FoundRegexp.findFirstMatchIn(beforeReq)
            foundExpandsTo = WhichExpandsToRegexp.findFirstMatchIn(beforeReq)
            required <- AfterRequiredRegexp.findFirstMatchIn(error)
            requiredExpandsTo = WhichExpandsToRegexp.findFirstMatchIn(afterReq)
          } yield {
            val notes = requiredExpandsTo match {
              case Some(et) => getNotesFromIndex(afterReq, et.end)
              case None     => getNotesFromIndex(error, required.end)
            }

            TypeMismatchError[ExactT](
              ExactT(found.group(1)),
              foundExpandsTo.map(m => ExactT(m.group(1))),
              ExactT(required.group(1)),
              requiredExpandsTo.map(m => ExactT(m.group(1))),
              notes
            )
          }

        case _ =>
          None
      }
    } else if (error.contains("not found")) {
      for {
        what <- NotFoundRegexp.findFirstMatchIn(error)
      } yield NotFoundError[ExactT](ExactT(what.group(1)))
    } else if (error.contains("is not a member of")) {
      for {
        what         <- NotAMemberRegexp.findFirstMatchIn(error)
        notAMemberOf <- NotAMemberOfRegexp.findFirstMatchIn(error)
      } yield NotAMemberError[ExactT](ExactT(what.group(1)), ExactT(notAMemberOf.group(1)))
    } else if (error.contains("could not find implicit value for parameter")) {
      for {
        inf <- ImplicitNotFoundRegexp.findFirstMatchIn(error)
      } yield ImplicitNotFoundError[ExactT](ExactT(inf.group(1)), ExactT(inf.group(2)))
    } else if (error.contains("diverging implicit expansion for type")) {
      for {
        inf <- DivergingImplicitExpansionRegexp.findFirstMatchIn(error)
      } yield DivergingImplicitExpansionError[ExactT](ExactT(inf.group(1)), ExactT(inf.group(2)), ExactT(inf.group(3)))
    } else if (error.contains("conform to the bounds of none of the overloaded alternatives")) {
      for {
        inf <- TypeArgumentsDoNotConformToOverloadedBoundsRegexp.findFirstMatchIn(error)
      } yield
        TypeArgumentsDoNotConformToOverloadedBoundsError[ExactT](
          ExactT(inf.group(1)),
          ExactT(inf.group(2)),
          inf.group(3).split(Pattern.quote(" <and> ")).toSet.map(ExactT.apply)
        )
    } else if (error.contains("No implicit")) {
      for {
        inf <- TypeclassNotFoundRegexp.findFirstMatchIn(error)
        group2 = inf.group(2)
      } yield
        TypeclassNotFoundError(
          ExactT(inf.group(1)),
          ExactT(if (group2.endsWith(".")) group2.substring(0, group2.length - 1) else group2)
        )
    } else None
  }

  private def getNotesFromIndex(msg: String, afterIdx: Int): Option[String] = {
    val fromIdx = afterIdx + 1
    if (msg.length >= fromIdx + 1) {
      val notes = msg.substring(fromIdx).trim
      if (notes == "") None else Some(notes)
    } else None
  }
} 
Example 33
Source File: Template.scala    From scala-clippy   with Apache License 2.0 5 votes vote down vote up
package com.softwaremill.clippy

import java.util.regex.Pattern

import scala.util.Try
import scala.util.matching.Regex

sealed trait Template {
  def v: String
}

case class ExactT(v: String) extends Template {
  override def toString = v
}

case class RegexT(v: String) extends Template {
  lazy val regex                  = Try(new Regex(v)).getOrElse(new Regex("^$"))
  def matches(e: ExactT): Boolean = regex.pattern.matcher(e.v).matches()
  override def toString           = v
}
object RegexT {

  
  def fromPattern(pattern: String): RegexT = {
    val regexp = pattern
      .split("\\*", -1)
      .map(el => if (el != "") Pattern.quote(el) else el)
      .flatMap(el => List(".*", el))
      .tail
      .filter(_.nonEmpty)
      .mkString("")

    RegexT.fromRegex(regexp)
  }

  def fromRegex(v: String): RegexT =
    new RegexT(v)

  def setMatches(rr: Set[RegexT], ee: Set[ExactT]): Boolean =
    if (rr.size != ee.size) false
    else {
      rr.toList.forall { r =>
        ee.exists(r.matches)
      }
    }
} 
Example 34
Source File: VwFeatureNormalizer.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.dataset.vw

import java.text.DecimalFormat
import java.util.regex.Pattern

class VwFeatureNormalizer extends (CharSequence => CharSequence) with java.io.Serializable {
    private[this] val lineRegex = Pattern.compile("\\|(\\w)\\s+([^\\|]+)")
    private[this] val namespaceRegex = ".+:(.+)".r
    private[this] val format = new DecimalFormat("0.00000")

    def apply(vwLine: CharSequence): CharSequence = {
        val matcher = lineRegex.matcher(vwLine)
        val sb = new StringBuffer
        while(matcher.find) {
            matcher.appendReplacement(sb, "|" + matcher.group(1) + ":" + format.format(normalizeNamespace(matcher.group(2))) + " " + matcher.group(2))
        }
        matcher.appendTail(sb)
        sb
    }

    private[this] def normalizeNamespace(namespace: String): Double = {
        var sum = 0d
        namespace.split("\\s+").foreach {
            case namespaceRegex(w) =>
                val currentWeight = w.toDouble
                sum += currentWeight * currentWeight
            case _ => sum += 1
        }
        if (sum == 0) 0
        else 1.0 / math.sqrt(sum)
    }
}

object VwFeatureNormalizer {
    val instance = new VwFeatureNormalizer
}

 
Example 35
Source File: Constraint.scala    From zipkin-mesos-framework   with Apache License 2.0 5 votes vote down vote up
package net.elodina.mesos.zipkin.components

import java.util.regex.{Pattern, PatternSyntaxException}

import net.elodina.mesos.zipkin.utils.Util

import scala.util.Try

trait Constraint {
  def matches(value: String, values: List[String] = Nil): Boolean
}

object Constraint {
  def apply(value: String): Constraint = {
    if (value.startsWith("like:")) Constraint.Like(value.substring("like:".length))
    else if (value.startsWith("unlike:")) Constraint.Like(value.substring("unlike:".length), negated = true)
    else if (value == "unique") Constraint.Unique()
    else if (value.startsWith("cluster")) {
      val tail = value.substring("cluster".length)
      val cluster = if (tail.startsWith(":")) Some(tail.substring(1)) else None
      Cluster(cluster)
    } else if (value.startsWith("groupBy")) {
      val tail = value.substring("groupBy".length)
      val groups = if (tail.startsWith(":")) Try(tail.substring(1).toInt).toOption.getOrElse(throw new IllegalArgumentException(s"invalid condition $value"))
      else 1

      GroupBy(groups)
    }
    else throw new IllegalArgumentException(s"Unsupported condition: $value")
  }

  def parse(constraints: String): Map[String, List[Constraint]] = {
    Util.parseList(constraints).foldLeft[Map[String, List[Constraint]]](Map()) { case (all, (name, value)) =>
      all.get(name) match {
        case Some(values) => all.updated(name, Constraint(value) :: values)
        case None => all.updated(name, List(Constraint(value)))
      }
    }
  }

  case class Like(regex: String, negated: Boolean = false) extends Constraint {
    val pattern = try {
      Pattern.compile(s"^$regex$$")
    } catch {
      case e: PatternSyntaxException => throw new IllegalArgumentException(s"Invalid $name: ${e.getMessage}")
    }

    private def name: String = if (negated) "unlike" else "like"

    def matches(value: String, values: List[String]): Boolean = negated ^ pattern.matcher(value).find()

    override def toString: String = s"$name:$regex"
  }

  case class Unique() extends Constraint {
    def matches(value: String, values: List[String]): Boolean = !values.contains(value)

    override def toString: String = "unique"
  }

  case class Cluster(value: Option[String]) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = this.value match {
      case Some(v) => v == value
      case None => values.isEmpty || values.head == value
    }

    override def toString: String = "cluster" + value.map(":" + _).getOrElse("")
  }

  case class GroupBy(groups: Int = 1) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = {
      val counts = values.groupBy(identity).mapValues(_.size)
      if (counts.size < groups) !counts.contains(value)
      else {
        val minCount = counts.values.reduceOption(_ min _).getOrElse(0)
        counts.getOrElse(value, 0) == minCount
      }
    }

    override def toString: String = "groupBy" + (if (groups > 1) s":$groups" else "")
  }

} 
Example 36
Source File: I18NProcessor.scala    From ez-framework   with Apache License 2.0 5 votes vote down vote up
package com.ecfront.ez.framework.core.i18n

import java.io.File
import java.util.regex.Pattern

import com.ecfront.common.Resp
import com.ecfront.ez.framework.core.EZ
import com.ecfront.ez.framework.core.logger.Logging

import scala.io.Source


  def setLanguage(_language: String): Unit = {
    EZ.Info.language = _language
  }

  private val tabR = "\t"

  def process(resp: Resp[_]): Unit = {
    if (resp.message != null && resp.message.nonEmpty) {
      resp.message = i18n(resp.message.replaceAll(tabR, " "))
    }
  }

  def i18n(str: String): String = {
    var newStr = str
    i18nInfo.find(_._1.matcher(str).matches()).foreach {
      matchedItem =>
        val matcher = matchedItem._1.matcher(str)
        newStr = matcher.replaceAll(matchedItem._2(EZ.Info.language))
    }
    newStr
  }

  implicit class Impl(val str: String) {
    def x: String = i18n(str)
  }

} 
Example 37
Source File: OS.scala    From uap-scala   with Do What The F*ck You Want To Public License 5 votes vote down vote up
package org.uaparser.scala

import MatcherOps._
import java.util.regex.{ Matcher, Pattern }
import scala.util.control.Exception.allCatch

case class OS(family: String, major: Option[String] = None, minor: Option[String] = None, patch: Option[String] = None,
              patchMinor: Option[String] = None)

object OS {
  private[scala] def fromMap(m: Map[String, String]) = m.get("family").map { family =>
    OS(family, m.get("major"), m.get("minor"), m.get("patch"), m.get("patch_minor"))
  }

  private[this] val quotedBack1: Pattern = Pattern.compile(s"(${Pattern.quote("$1")})")

  private[this] def replacementBack1(matcher: Matcher)(replacement: String): String =
    if (matcher.groupCount() >= 1) {
      quotedBack1.matcher(replacement).replaceAll(matcher.group(1))
    } else replacement

  private[this] def replaceBackreference(matcher: Matcher)(replacement: String): Option[String] =
    getBackreferenceGroup(replacement) match {
      case Some(group) => matcher.groupAt(group)
      case None        => Some(replacement)
    }

  private[this] def getBackreferenceGroup(replacement: String): Option[Int] =
    for {
      ref <- Option(replacement).filter(_.contains("$"))
      groupOpt = allCatch opt ref.substring(1).toInt
      group <- groupOpt
    } yield group


  private[scala] case class OSPattern(
    pattern: Pattern,
    osReplacement: Option[String],
    v1Replacement: Option[String],
    v2Replacement: Option[String],
    v3Replacement: Option[String],
    v4Replacement: Option[String]
  ) {
    def process(agent: String): Option[OS] = {
      val matcher = pattern.matcher(agent)
      if (!matcher.find()) None else {
        osReplacement
          .map(replacementBack1(matcher))
          .orElse(matcher.groupAt(1)).map { family =>
            val major = v1Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(2))
            val minor = v2Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(3))
            val patch = v3Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(4))
            val patchMinor = v4Replacement.flatMap(replaceBackreference(matcher)).orElse(matcher.groupAt(5))
            OS(family, major, minor, patch, patchMinor)
        }
      }
    }
  }

  private object OSPattern {
    def fromMap(m: Map[String, String]): Option[OSPattern] = m.get("regex").map { r =>
      OSPattern(Pattern.compile(r), m.get("os_replacement"), m.get("os_v1_replacement"), m.get("os_v2_replacement"),
        m.get("os_v3_replacement"), m.get("os_v4_replacement"))
    }
  }

  case class OSParser(patterns: List[OSPattern]) {
    def parse(agent: String): OS = patterns.foldLeft[Option[OS]](None) {
      case (None, pattern) => pattern.process(agent)
      case (result, _) => result
    }.getOrElse(OS("Other"))
  }

  object OSParser {
    def fromList(config: List[Map[String, String]]): OSParser = OSParser(config.map(OSPattern.fromMap).flatten)
  }
} 
Example 38
Source File: ByteUnit.scala    From aloha   with Apache License 2.0 5 votes vote down vote up
package me.jrwang.aloha.common.util

import java.util.regex.Pattern

sealed abstract class ByteUnit(val multiplier: Long) {

  // Interpret the provided number (d) with suffix (u) as this unit type.
  // E.g. KiB.interpret(1, MiB) interprets 1MiB as its KiB representation = 1024k
  def convertFrom(d: Long, u: ByteUnit): Long = {
    u.convertTo(d, this)
  }

  // Convert the provided number (d) interpreted as this unit type to unit type (u).
  def convertTo(d: Long, u: ByteUnit): Long = {
    if (multiplier > u.multiplier) {
      val ratio = multiplier / u.multiplier
      if (Long.MaxValue / ratio < d) {
        throw new IllegalArgumentException(s"Conversion of $d exceeds Long.MAX_VALUE in ${name()}. Try a larger unit (e.g. MiB instead of KiB)")
      }
      d * ratio
    } else {
      // Perform operations in this order to avoid potential overflow
      // when computing d * multiplier
      d / (u.multiplier / multiplier)
    }
  }

  def name(): String
}

object ByteUnit {
  case object BYTE extends ByteUnit(1L) {
    override def name(): String = "BYTE"
  }

  case object KiB extends ByteUnit(1L << 10) {
    override def name(): String = "KB"
  }

  case object MiB extends ByteUnit(1L << 20) {
    override def name(): String = "MB"
  }

  case object GiB extends ByteUnit(1L << 30) {
    override def name(): String = "GB"
  }

  case object TiB extends ByteUnit(1L << 40) {
    override def name(): String = "TB"
  }

  case object PiB extends ByteUnit(1L << 50) {
    override def name(): String = "PB"
  }
}


object ByteUtils {
  private val byteSuffixes =
    Map(
      "b" -> ByteUnit.BYTE,
      "k" -> ByteUnit.KiB,
      "kb" -> ByteUnit.KiB,
      "m" -> ByteUnit.MiB,
      "mb" -> ByteUnit.MiB,
      "g" -> ByteUnit.GiB,
      "gb" -> ByteUnit.GiB,
      "t" -> ByteUnit.TiB,
      "tb" -> ByteUnit.TiB,
      "p" -> ByteUnit.PiB,
      "pb" -> ByteUnit.PiB
    )


  
  def byteStringAsGb(str: String): Long = {
    byteStringAs(str, ByteUnit.GiB)
  }
} 
Example 39
Source File: TimeUtils.scala    From aloha   with Apache License 2.0 5 votes vote down vote up
package me.jrwang.aloha.common.util

import java.util.concurrent.TimeUnit
import java.util.regex.Pattern

object TimeUtils {
  private val timeSuffixes =
    Map(
      "us" -> TimeUnit.MICROSECONDS,
      "ms" -> TimeUnit.MILLISECONDS,
      "s" -> TimeUnit.SECONDS,
      "m" -> TimeUnit.MINUTES,
      "min" -> TimeUnit.MINUTES,
      "h" -> TimeUnit.HOURS,
      "d" -> TimeUnit.DAYS
    )

  
  def timeStringAsSeconds(str: String): Long =
    timeStringAs(str, TimeUnit.SECONDS)
} 
Example 40
Source File: OutputTransformer.scala    From play-swagger   with Apache License 2.0 5 votes vote down vote up
package com.iheart.playSwagger

import java.util.regex.Pattern

import com.iheart.playSwagger.OutputTransformer.SimpleOutputTransformer
import play.api.libs.json.{ JsArray, JsString, JsValue, JsObject }

import scala.util.matching.Regex
import scala.util.{ Success, Failure, Try }


  def >=>(b: JsObject ⇒ Try[JsObject]): OutputTransformer = SimpleOutputTransformer { value: JsObject ⇒
    this.apply(value).flatMap(b)
  }
}

object OutputTransformer {
  final case class SimpleOutputTransformer(run: (JsObject ⇒ Try[JsObject])) extends OutputTransformer {
    override def apply(value: JsObject): Try[JsObject] = run(value)
  }

  def traverseTransformer(vals: JsArray)(transformer: JsValue ⇒ Try[JsValue]): Try[JsArray] = {
    val tryElements = vals.value.map {
      case value: JsObject ⇒ traverseTransformer(value)(transformer)
      case value: JsArray  ⇒ traverseTransformer(value)(transformer)
      case value: JsValue  ⇒ transformer(value)
    }

    val failures: Seq[Failure[JsValue]] = tryElements.filter(_.isInstanceOf[Failure[_]]).asInstanceOf[Seq[Failure[JsValue]]]
    if (failures.nonEmpty) {
      Failure(failures.head.exception)
    } else {
      Success(JsArray(tryElements.asInstanceOf[Seq[Success[JsValue]]].map(_.value)))
    }
  }

  def traverseTransformer(obj: JsObject)(transformer: JsValue ⇒ Try[JsValue]): Try[JsObject] = {
    val tryFields = obj.fields.map {
      case (key, value: JsObject) ⇒ (key, traverseTransformer(value)(transformer))
      case (key, values: JsArray) ⇒ (key, traverseTransformer(values)(transformer))
      case (key, value: JsValue)  ⇒ (key, transformer(value))
    }
    val failures: Seq[(String, Failure[JsValue])] = tryFields
      .filter(_._2.isInstanceOf[Failure[_]])
      .asInstanceOf[Seq[(String, Failure[JsValue])]]
    if (failures.nonEmpty) {
      Failure(failures.head._2.exception)
    } else {
      Success(JsObject(tryFields.asInstanceOf[Seq[(String, Success[JsValue])]].map {
        case (key, Success(result)) ⇒ (key, result)
      }))
    }
  }
}

class PlaceholderVariablesTransformer(map: String ⇒ Option[String], pattern: Regex = "^\\$\\{(.*)\\}$".r) extends OutputTransformer {
  def apply(value: JsObject) = OutputTransformer.traverseTransformer(value) {
    case JsString(pattern(key)) ⇒ map(key) match {
      case Some(result) ⇒ Success(JsString(result))
      case None         ⇒ Failure(new IllegalStateException(s"Unable to find variable $key"))
    }
    case e: JsValue ⇒ Success(e)
  }
}

final case class MapVariablesTransformer(map: Map[String, String]) extends PlaceholderVariablesTransformer(map.get)
class EnvironmentVariablesTransformer extends PlaceholderVariablesTransformer((key: String) ⇒ Option(System.getenv(key))) 
Example 41
Source File: QuicklensRelease.scala    From quicklens   with Apache License 2.0 5 votes vote down vote up
import java.util.regex.Pattern

import sbt._
import sbtrelease.ReleasePlugin.autoImport._
import sbtrelease.ReleasePlugin.autoImport.ReleaseKeys._
import sbtrelease.ReleasePlugin.autoImport.ReleaseStep
import sbtrelease.ReleaseStateTransformations._

object QuicklensRelease {
  def steps: Seq[ReleaseStep] = Seq(
    checkSnapshotDependencies,
    inquireVersions,
    // publishing locally so that the pgp password prompt is displayed early
    // in the process
    releaseStepCommand("publishLocalSigned"),
    runClean,
    runTest,
    setReleaseVersion,
    updateVersionInReadme,
    commitReleaseVersion,
    tagRelease,
    publishArtifacts,
    setNextVersion,
    commitNextVersion,
    releaseStepCommand("sonatypeReleaseAll"),
    pushChanges
  )

  // based on https://github.com/EECOLOR/sbt-release-custom-steps/blob/master/src/main/scala/org/qirx/sbtrelease/UpdateVersionInFiles.scala
  private def updateVersionInReadme: ReleaseStep = { s: State =>
    val readmeFile = file("README.md")
    val readme = IO.read(readmeFile)

    val currentVersionPattern = """"com.softwaremill.quicklens" %% "quicklens" % "([\w\.-]+)"""".r
    val currentVersionInReadme = currentVersionPattern.findFirstMatchIn(readme).get.group(1)

    val releaseVersion = s.get(versions).get._1

    s.log.info(s"Replacing $currentVersionInReadme with $releaseVersion in ${readmeFile.name}")

    val newReadme = readme.replaceAll(Pattern.quote(currentVersionInReadme), releaseVersion)
    IO.write(readmeFile, newReadme)

    val settings = Project.extract(s)
    settings.get(releaseVcs).get.add(readmeFile.getAbsolutePath) !! s.log

    s
  }
} 
Example 42
Source File: StaxEncoder.scala    From scalaz-deriving   with GNU Lesser General Public License v3.0 5 votes vote down vote up
// Copyright: 2017 - 2020 Sam Halliday
// License: http://www.gnu.org/licenses/lgpl-3.0.en.html

package xmlformat
package stax

import java.io.StringWriter
import java.util.regex.Pattern

import javax.xml.stream.{ XMLOutputFactory, XMLStreamWriter }
import scalaz._, Scalaz._

import com.ctc.wstx.stax.WstxOutputFactory

object StaxEncoder {
  // must not escape the code in this module
  private[this] val factory = new ThreadLocal[XMLOutputFactory] {
    override def initialValue: WstxOutputFactory = {
      val f = new com.ctc.wstx.stax.WstxOutputFactory
      f.configureForSpeed()
      f.getConfig.doSupportNamespaces(false)
      f
    }
  }

  def encode(t: XTag): String = {
    val output = new StringWriter

    val x = factory.get.createXMLStreamWriter(output)
    x.writeStartDocument()
    writeTag(x, t, 0)
    x.writeEndDocument()
    output.toString()
  }

  private[this] def writeTag(x: XMLStreamWriter, t: XTag, level: Int): Unit = {
    x.writeCharacters("\n")
    x.writeCharacters(" " * 2 * level)
    x.writeStartElement(t.name)

    t.attrs.toList.foreach { a =>
      x.writeAttribute(a.name, a.value.text)
    }

    t.children.toList.foreach { c =>
      writeTag(x, c, level + 1)
    }

    t.body.toList.foreach { s =>
      if (t.children.nonEmpty) {
        x.writeCharacters("\n")
        x.writeCharacters(" " * 2 * (level + 1))
      }
      if (!containsXmlEntities(s.text))
        x.writeCharacters(s.text)
      else {
        val clean =
          if (!s.text.contains("]]>")) s.text
          else s.text.replace("]]>", "]]]]><![CDATA[>")
        x.writeCData(clean)
      }
    }

    if (t.children.nonEmpty) {
      x.writeCharacters("\n")
      x.writeCharacters(" " * 2 * level)
    }

    x.writeEndElement()
  }

  private[this] val entities                      = Pattern.compile("""("|&|'|<|>)""")
  def containsXmlEntities(input: String): Boolean =
    entities.matcher(input).find()

} 
Example 43
Source File: StringReplacement.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package com.gravity.goose.text



import java.util.regex.Pattern

object StringReplacement {
  def compile(pattern: String, replaceWith: String): StringReplacement = {
    if (string.isNullOrEmpty(pattern)) throw new IllegalArgumentException("Patterns must not be null or empty!")
    var p: Pattern = Pattern.compile(pattern)
    return new StringReplacement(p, replaceWith)
  }
}

class StringReplacement {
  private def this(pattern: Pattern, replaceWith: String) {
    this ()
    this.pattern = pattern
    this.replaceWith = replaceWith
  }

  def replaceAll(input: String): String = {
    if (string.isNullOrEmpty(input)) return string.empty
    return pattern.matcher(input).replaceAll(replaceWith)
  }

  private var pattern: Pattern = null
  private var replaceWith: String = null
} 
Example 44
Source File: ESSearchSanitizer.scala    From recogito2   with Apache License 2.0 5 votes vote down vote up
package storage.es


  def sanitize(term: String): String = (
    escapeSpecialCharacters _ andThen
    escapeSetOperators andThen
    collapseWhiteSpaces andThen
    escapeOddQuote
  )(term)

  private def escapeSpecialCharacters(term: String): String = {
    val escapedCharacters = Pattern.quote("""\/+-&|!(){}[]^~*?:""")
    term.replaceAll(s"([$escapedCharacters])", "\\\\$1")
  }

  private def escapeSetOperators(term: String): String = {
    val operators = Set("AND", "OR", "NOT")
    operators.foldLeft(term) { case (accTerm, op) =>
      val escapedOp = escapeEachCharacter(op)
      accTerm.replaceAll(s"""\\b($op)\\b""", escapedOp)
    }
  }

  private def escapeEachCharacter(op: String): String =
    op.toCharArray.map(ch => s"""\\\\$ch""").mkString

  private def collapseWhiteSpaces(term: String): String = term.replaceAll("""\s+""", " ")

  private def escapeOddQuote(term: String): String = {
    if (term.count(_ == '"') % 2 == 1) term.replaceAll("""(.*)"(.*)""", """$1\\"$2""") else term
  }

} 
Example 45
Source File: LimitedSharingClassLoader.scala    From polynote   with Apache License 2.0 5 votes vote down vote up
package polynote.kernel.util

import java.net.URL
import java.util.regex.Pattern


// TODO: should resource loading be similarly altered?
class LimitedSharingClassLoader(
  shareRegex: String,
  urls: Seq[URL],
  parent: ClassLoader
) extends scala.reflect.internal.util.ScalaClassLoader.URLClassLoader(urls, parent) {

  private val share = Pattern.compile(shareRegex).asPredicate()

  override def loadClass(name: String, resolve: Boolean): Class[_] =  getClassLoadingLock(name).synchronized {
    val c = if (share.test(name)) {
      //System.err.println(s"Delegating class $name")
      try {
        super.loadClass(name, false)
      } catch {
        case err: ClassNotFoundException => findClass(name)
      }
    } else try {
      findClass(name)
    } catch {
      case _: ClassNotFoundException | _: LinkageError =>
        super.loadClass(name, resolve)
    }

    if (resolve) {
      resolveClass(c)
    }

    c
  }


} 
Example 46
Source File: OrcAcidUtil.scala    From spark-acid   with Apache License 2.0 5 votes vote down vote up
package com.qubole.shaded.hadoop.hive.ql.io.orc

import java.util.regex.Pattern

import com.qubole.shaded.hadoop.hive.ql.io.AcidUtils
import org.apache.hadoop.fs.Path

object OrcAcidUtil {
  val BUCKET_PATTERN = Pattern.compile("bucket_[0-9]{5}$")

  def getDeleteDeltaPaths(orcSplit: OrcSplit): Array[Path] = {
    assert(BUCKET_PATTERN.matcher(orcSplit.getPath.getName).matches())
    val bucket = AcidUtils.parseBucketId(orcSplit.getPath)
    assert(bucket != -1)
    val deleteDeltaDirPaths = VectorizedOrcAcidRowBatchReader.getDeleteDeltaDirsFromSplit(orcSplit);
    deleteDeltaDirPaths.map(deleteDir => AcidUtils.createBucketFile(deleteDir, bucket))
  }
} 
Example 47
Source File: TestFilter.scala    From stryker4s   with Apache License 2.0 5 votes vote down vote up
package stryker4s.config

import java.util.regex.Pattern
import scala.util.Try
import stryker4s.config.TestFilter.wildcardToRegex

class TestFilter(implicit config: Config) {

  val exclamationMark = "!"

  lazy val partition: Partition = config.testFilter.partition(_.startsWith(exclamationMark)) match {
    case (negative, positive) =>
      Partition(
        negative.map(p => Regex(wildcardToRegex(p.substring(1)))),
        positive.map(p => Regex(wildcardToRegex(p)))
      )
  }

  def filter(testName: String): Boolean = {
    def matches(regexSeq: Seq[Regex]): Boolean =
      regexSeq.foldLeft(false)((acc, regex) => acc || regex.matches(testName))

    if (matches(partition.negative))
      false
    else
      partition.positive.isEmpty || matches(partition.positive)
  }
}

case class Partition(negative: Seq[Regex], positive: Seq[Regex])

case class Regex(regex: String) {
  def matches(testName: String): Boolean = Try(Pattern.matches(regex, testName)).fold(_ => false, b => b)
}

object TestFilter {

  def wildcardToRegex(wildcard: String): String = s"^${wildcard.toList.map(convertChar).mkString}$$"

  def convertChar(c: Char): String =
    c match {
      case '*'                 => ".*"
      case '?'                 => "."
      case _ if isRegexChar(c) => s"\\${c.toString}"
      case c                   => c.toString
    }

  def isRegexChar(c: Char): Boolean =
    Seq('(', ')', '[', ']', '$', '^', '.', '{', '}', '|', '\\').foldLeft(false)((acc, elt) => acc || c == elt)
} 
Example 48
Source File: ImapResponses.scala    From gatling-imap   with GNU Affero General Public License v3.0 5 votes vote down vote up
package com.linagora.gatling.imap.protocol

import java.util.regex.Pattern

import com.sun.mail.imap.protocol.IMAPResponse

import scala.collection.immutable.Seq

case class ImapResponses(responses: Seq[IMAPResponse]) {

  import ImapResponses._

  def mkString(separator: String = ",") = {
    responses.mkString(separator)
  }

  def isBad = responses.lastOption.exists(_.isBAD)

  def isOk = responses.lastOption.exists(_.isOK)

  def isNo = responses.lastOption.exists(_.isNO)

  def countRecent: Option[Int] = {
    responses.map(_.toString).find(_.matches(Recent.regex))
      .map {
        case Recent(actual) => actual.toInt
      }
  }

  def folderList: Seq[String] = {
    responses.map(_.toString).filter(_.matches(List.regex))
      .map {
        case List(name, null) => name
        case List(null, quotedName) => quotedName
      }
  }

  def uidList: Seq[Uid] = {
    responses.map(_.toString).filter(_.matches(UidRegex.regex))
      .map {
        case UidRegex(uid) => Uid(uid.toInt)
      }
  }

  def contains(content: String): Boolean =
    responses.map(_.toString).exists(_.contains(content))
}

object ImapResponses {
  val empty = ImapResponses(Seq.empty)

  private[this] val dotAllFlag = """(?s)"""
  private[this] val startWithStar = """^(?:(?:, )?\*)"""
  private[this] val mailboxName = """(?:"([^"]*)"|([^"\s]*))"""

  private val Recent = (dotAllFlag + startWithStar + """ (\d+) RECENT\s*$""").r
  private val List = ("""^\* LIST .*? """ + mailboxName + """\s*$""").r
  private val UidRegex = (dotAllFlag + startWithStar + """ .*UID (\d+).*$""").r
} 
Example 49
Source File: ShouldNotTypecheck.scala    From lagom   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.macrotestkit

import scala.language.experimental.macros
import java.util.regex.Pattern

import scala.reflect.macros.TypecheckException
import scala.reflect.macros.blackbox


object ShouldNotTypecheck {
  def apply(name: String, code: String): Unit = macro ShouldNotTypecheck.applyImplNoExp
  def apply(name: String, code: String, expected: String): Unit = macro ShouldNotTypecheck.applyImpl
}

final class ShouldNotTypecheck(val c: blackbox.Context) {
  import c.universe._

  def applyImplNoExp(name: Expr[String], code: Expr[String]): Expr[Unit] = applyImpl(name, code, c.Expr(EmptyTree))

  def applyImpl(name: Expr[String], code: Expr[String], expected: Expr[String]): Expr[Unit] = {
    val Expr(Literal(Constant(codeStr: String))) = code
    val Expr(Literal(Constant(nameStr: String))) = name
    val (expPat, expMsg) = expected.tree match {
      case EmptyTree => (Pattern.compile(".*"), "Expected some error.")
      case Literal(Constant(s: String)) =>
        (Pattern.compile(s, Pattern.CASE_INSENSITIVE), "Expected error matching: " + s)
    }

    try c.typecheck(c.parse("{ " + codeStr + " }"))
    catch {
      case e: TypecheckException =>
        val msg = e.getMessage
        if (!expPat.matcher(msg).matches) {
          c.abort(c.enclosingPosition, s"$nameStr failed in an unexpected way.\n$expMsg\nActual error: $msg")
        } else {
          println(s"$nameStr passed.")
          return reify(())
        }
    }

    c.abort(c.enclosingPosition, s"$nameStr succeeded unexpectedly.\n$expMsg")
  }
} 
Example 50
Source File: MetadataActor.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.serving.api.actor

import java.util.regex.Pattern

import akka.actor.Actor
import com.github.nscala_time.time.Imports.{DateTime, DateTimeFormat}
import com.stratio.sparta.serving.api.actor.MetadataActor.ExecuteBackup
import com.stratio.sparta.serving.api.actor.MetadataActor._
import com.stratio.sparta.serving.api.constants.HttpConstant
import com.stratio.sparta.serving.api.utils.{BackupRestoreUtils, FileActorUtils}
import com.stratio.sparta.serving.core.config.SpartaConfig
import com.stratio.sparta.serving.core.constants.AppConstant._
import com.stratio.sparta.serving.core.exception.ServingCoreException
import com.stratio.sparta.serving.core.helpers.InfoHelper
import com.stratio.sparta.serving.core.models.SpartaSerializer
import com.stratio.sparta.serving.core.models.files.{BackupRequest, SpartaFilesResponse}
import spray.http.BodyPart
import spray.httpx.Json4sJacksonSupport

import scala.util.{Failure, Success, Try}

class MetadataActor extends Actor with Json4sJacksonSupport with BackupRestoreUtils with SpartaSerializer
  with FileActorUtils {

  //The dir where the backups will be saved
  val targetDir = Try(SpartaConfig.getDetailConfig.get.getString(BackupsLocation)).getOrElse(DefaultBackupsLocation)
  override val apiPath = HttpConstant.MetadataPath
  override val patternFileName = Option(Pattern.compile(""".*\.json""").asPredicate())

  //The dir where the jars will be saved
  val zkConfig = Try(SpartaConfig.getZookeeperConfig.get)
    .getOrElse(throw new ServingCoreException("Zookeeper configuration is mandatory"))
  override val uri = Try(zkConfig.getString("connectionString")).getOrElse(DefaultZKConnection)
  override val connectionTimeout = Try(zkConfig.getInt("connectionTimeout")).getOrElse(DefaultZKConnectionTimeout)
  override val sessionTimeout = Try(zkConfig.getInt("sessionTimeout")).getOrElse(DefaultZKSessionTimeout)

  override def receive: Receive = {
    case UploadBackups(files) => if (files.isEmpty) errorResponse() else uploadBackups(files)
    case ListBackups => browseBackups()
    case BuildBackup => buildBackup()
    case DeleteBackups => deleteBackups()
    case CleanMetadata => cleanMetadata()
    case DeleteBackup(fileName) => deleteBackup(fileName)
    case ExecuteBackup(backupRequest) => executeBackup(backupRequest)
    case _ => log.info("Unrecognized message in Backup/Restore Actor")
  }

  def executeBackup(backupRequest: BackupRequest): Unit =
    sender ! BackupResponse(Try{
      importer("/", s"$targetDir/${backupRequest.fileName}", backupRequest.deleteAllBefore)
    })

  def errorResponse(): Unit =
    sender ! SpartaFilesResponse(Failure(new IllegalArgumentException(s"At least one file is expected")))

  def deleteBackups(): Unit = sender ! BackupResponse(deleteFiles())

  def cleanMetadata(): Unit = sender ! BackupResponse(Try(cleanZk(BaseZKPath)))

  def buildBackup(): Unit = {
    val format = DateTimeFormat.forPattern("yyyy-MM-dd-hh:mm:ss")
    val appInfo = InfoHelper.getAppInfo
    Try {
      dump(BaseZKPath, s"$targetDir/backup-${format.print(DateTime.now)}-${appInfo.pomVersion}.json")
    } match {
      case Success(_) =>
        sender ! SpartaFilesResponse(browseDirectory())
      case Failure(e) =>
        sender ! BackupResponse(Try(throw e))
    }
  }

  def deleteBackup(fileName: String): Unit = sender ! BackupResponse(deleteFile(fileName))

  def browseBackups(): Unit = sender ! SpartaFilesResponse(browseDirectory())

  def uploadBackups(files: Seq[BodyPart]): Unit = sender ! SpartaFilesResponse(uploadFiles(files))
}

object MetadataActor {

  case class UploadBackups(files: Seq[BodyPart])

  case class BackupResponse(status: Try[_])

  case class ExecuteBackup(backupRequest: BackupRequest)

  case object ListBackups

  case object BuildBackup

  case object DeleteBackups

  case object CleanMetadata

  case class DeleteBackup(fileName: String)

} 
Example 51
Source File: KStreamBuilderS.scala    From kafka-streams-scala   with Apache License 2.0 5 votes vote down vote up
package com.github.aseigneurin.kafka.streams.scala

import java.util.regex.Pattern

import com.github.aseigneurin.kafka.streams.scala.ImplicitConversions._
import org.apache.kafka.common.serialization.Serde
import org.apache.kafka.streams.kstream.{GlobalKTable, KStreamBuilder}
import org.apache.kafka.streams.processor.TopologyBuilder

object KStreamBuilderS {

  val inner = new KStreamBuilder

  def stream[K, V](topics: String*)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](keySerde, valSerde, topics: _*)

  def stream[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
                   topics: String*)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](offsetReset, keySerde, valSerde, topics: _*)

  def stream[K, V](topicPattern: Pattern)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](keySerde, valSerde, topicPattern)

  def stream[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
                   topicPattern: Pattern)
                  (implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
    inner.stream[K, V](offsetReset, keySerde, valSerde, topicPattern)

  def table[K, V](topic: String,
                  storeName: String)
                 (implicit keySerde: Serde[K], valSerde: Serde[V]): KTableS[K, V] =
    inner.table[K, V](keySerde, valSerde, topic, storeName)

  def table[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
                  topic: String,
                  storeName: String)
                 (implicit keySerde: Serde[K], valSerde: Serde[V]): KTableS[K, V] =
    inner.table[K, V](offsetReset, keySerde, valSerde, topic, storeName)


  def globalTable[K, V](topic: String,
                        storeName: String)
                       (implicit keySerde: Serde[K], valSerde: Serde[V]): GlobalKTable[K, V] =
    inner.globalTable(keySerde, valSerde, topic, storeName)

  def merge[K, V](streams: KStreamS[K, V]*): KStreamS[K, V] = {
    val streamsJ = streams.map { streamS => streamS.inner }
    inner.merge(streamsJ: _*)
  }

} 
Example 52
Source File: typechecking.scala    From perf_tester   with Apache License 2.0 5 votes vote down vote up
package shapeless.test

import scala.language.experimental.macros

import java.util.regex.Pattern

import scala.reflect.macros.{ whitebox, ParseException, TypecheckException }


object illTyped {
  def apply(code: String): Unit = macro IllTypedMacros.applyImplNoExp
  def apply(code: String, expected: String): Unit = macro IllTypedMacros.applyImpl
}

@macrocompat.bundle
class IllTypedMacros(val c: whitebox.Context) {
  import c.universe._

  def applyImplNoExp(code: Tree): Tree = applyImpl(code, null)

  def applyImpl(code: Tree, expected: Tree): Tree = {
    val Literal(Constant(codeStr: String)) = code
    val (expPat, expMsg) = expected match {
      case null => (null, "Expected some error.")
      case Literal(Constant(s: String)) =>
        (Pattern.compile(s, Pattern.CASE_INSENSITIVE | Pattern.DOTALL), "Expected error matching: "+s)
    }

    try {
      val dummy0 = TermName(c.freshName)
      val dummy1 = TermName(c.freshName)
      c.typecheck(c.parse(s"object $dummy0 { val $dummy1 = { $codeStr } }"))
      c.error(c.enclosingPosition, "Type-checking succeeded unexpectedly.\n"+expMsg)
    } catch {
      case e: TypecheckException =>
        val msg = e.getMessage
        if((expected ne null) && !(expPat.matcher(msg)).matches)
          c.error(c.enclosingPosition, "Type-checking failed in an unexpected way.\n"+expMsg+"\nActual error: "+msg)
      case e: ParseException =>
        c.error(c.enclosingPosition, s"Parsing failed.\n${e.getMessage}")
    }

    q"()"
  }
} 
Example 53
Source File: Configuration.scala    From circe-magnolia   with Apache License 2.0 5 votes vote down vote up
package io.circe.magnolia.configured

import java.util.regex.Pattern


final case class Configuration(
  transformMemberNames: String => String,
  transformConstructorNames: String => String,
  useDefaults: Boolean,
  discriminator: Option[String]
) {
  def withSnakeCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.snakeCaseTransformation
  )

  def withKebabCaseMemberNames: Configuration = copy(
    transformMemberNames = Configuration.kebabCaseTransformation
  )

  def withSnakeCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.snakeCaseTransformation
  )

  def withKebabCaseConstructorNames: Configuration = copy(
    transformConstructorNames = Configuration.kebabCaseTransformation
  )

  def withDefaults: Configuration = copy(useDefaults = true)
  def withDiscriminator(discriminator: String): Configuration = copy(discriminator = Some(discriminator))
}

final object Configuration {

  val default: Configuration = Configuration(Predef.identity, Predef.identity, false, None)
  private val basePattern: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val swapPattern: Pattern = Pattern.compile("([a-z\\d])([A-Z])")

  val snakeCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1_$2")
    swapPattern.matcher(partial).replaceAll("$1_$2").toLowerCase
  }

  val kebabCaseTransformation: String => String = s => {
    val partial = basePattern.matcher(s).replaceAll("$1-$2")
    swapPattern.matcher(partial).replaceAll("$1-$2").toLowerCase
  }
}

final object defaults {
  implicit val defaultGenericConfiguration: Configuration = Configuration.default
} 
Example 54
Source File: package.scala    From tscfg   with Apache License 2.0 5 votes vote down vote up
package tscfg

import java.util.regex.Pattern

import scala.util.control.NonFatal

package object codeDefs {
  private val beginTemplatePattern = Pattern.compile("\\s*//<([^>]+)>.*$")

  private val javaMap = getMap("codeDefs/JavaDefs.java")
  private val scalaMap = getMap("codeDefs/ScalaDefs.scala")

  def javaDef(key: String): String = getDef("java", javaMap, key)

  def scalaDef(key: String): String = getDef("scala", scalaMap, key)

  private def getDef(lang: String, map: Map[String, String], key: String): String = {
    try map(key)
    catch {
      // $COVERAGE-OFF$
      case NonFatal(e) =>
        val keys = map.keySet.toList.sorted
        val msg = s"Unexpected: undefined key '$key' for $lang. Defined keys: $keys. Please report this bug"
        throw new RuntimeException(msg, e)
      // $COVERAGE-ON$
    }
  }

  private def getMap(resourceName: String): Map[String, String] = try {
    //println(s"codeDefs.getMap $resourceName")
    val map = collection.mutable.HashMap[String, String]()
    val is = getClass.getClassLoader.getResourceAsStream(resourceName)
    assert(is != null)
    val source = io.Source.fromInputStream(is, "utf-8")
    var key: String = null
    val template = new StringBuilder
    for (line <- source.getLines()) {
      if (key == null) {
        val m = beginTemplatePattern.matcher(line)
        if (m.find) {
          key = m.group(1)
        }
      }
      else if (line.contains("//</" + key + ">")) {
        map.update(key, template.toString)
        key = null
        template.setLength(0)
      }
      else template.append(line).append("\n")
    }
    is.close()
    map.toMap
  }
  catch {
    case NonFatal(ex) =>
      throw new RuntimeException(
        s"Unexpected exception in getMap(resourceName=$resourceName)." +
        " Please report this bug.", ex)
  }
} 
Example 55
Source File: DebuggingWordCount.scala    From beam-scala-examples   with Apache License 2.0 5 votes vote down vote up
package org.apache.beam.examples

import java.util.regex.Pattern

import org.apache.beam.sdk.Pipeline
import org.apache.beam.sdk.io.TextIO
import org.apache.beam.sdk.metrics.Metrics
import org.apache.beam.sdk.options.{Default, Description, PipelineOptionsFactory}
import org.apache.beam.sdk.transforms.DoFn.ProcessElement
import org.apache.beam.sdk.transforms.{Count, DoFn, MapElements, ParDo}
import org.apache.beam.sdk.values.KV
import org.slf4j.LoggerFactory

object DebuggingWordCount {

  def main(args: Array[String]): Unit = {
    val options = PipelineOptionsFactory
      .fromArgs(args: _*)
      .withValidation()
      .as(classOf[DebuggingWordCountOptions])

    val pipeline = Pipeline.create(options)

    pipeline.apply("ReadFiles", TextIO.read().from(options.getInputFile))
      .apply(ParDo.of(new ExtractWords))
      .apply(Count.perElement())
      .apply(ParDo.of(new FilterTextFn(options.getFilterPattern)))
      .apply(MapElements.via(new FormatResult))
      .apply("WriteWords", TextIO.write().to(options.getOutput))

    pipeline.run().waitUntilFinish()
  }
}

// ======================================== UDFs ===============================================

class FilterTextFn(pattern: String) extends DoFn[KV[String, java.lang.Long], KV[String, java.lang.Long]] {
  private val logger = LoggerFactory.getLogger(getClass)
  lazy val filter = Pattern.compile(pattern)
  lazy val matchedWords = Metrics.counter(classOf[FilterTextFn], "matchedWords")
  lazy val unmatchedWords = Metrics.counter(classOf[FilterTextFn], "unmatchedWords")

  @ProcessElement
  def processElement(c: ProcessContext): Unit = {
    filter.matcher(c.element.getKey).matches() match {
      case true => logger.debug("Matched: " + c.element.getKey)
        matchedWords.inc()
        c.output(c.element)
      case false => logger.trace("Did not match: " + c.element.getKey)
        unmatchedWords.inc()
    }
  }
}

// ======================================= Options ==============================================

trait DebuggingWordCountOptions extends WordCountOptions {
  @Description("Regex filter pattern to use in DebuggingWordCount. Only words matching this pattern will be counted.")
  @Default.String("Flourish|stomach")
  def getFilterPattern: String
  def setFilterPattern(value: String): Unit
} 
Example 56
Source File: YamlProjectOperationInfoParser.scala    From rug   with GNU General Public License v3.0 5 votes vote down vote up
package com.atomist.project.common.yaml

import java.util.regex.{Pattern, PatternSyntaxException}

import com.atomist.param._
import com.atomist.project.common.template.{InvalidTemplateException, TemplateBasedProjectOperationInfo}
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
import org.apache.commons.lang3.builder.ReflectionToStringBuilder

import scala.util.{Failure, Success, Try}


object YamlProjectOperationInfoParser {

  private val mapper = new ObjectMapper(new YAMLFactory()) with ScalaObjectMapper
  mapper.registerModule(DefaultScalaModule)

  @throws[InvalidYamlDescriptorException]
  def parse(yaml: String): TemplateBasedProjectOperationInfo = {
    if (yaml == null || "".equals(yaml))
      throw new InvalidYamlDescriptorException("YAML content required in template metadata file")

    Try(mapper.readValue(yaml, classOf[BoundProjectOperationInfo])) match {
      case s: Success[BoundProjectOperationInfo] =>
        val badPatterns = s.value.parameters.flatMap(p => patternError(p))
        if (badPatterns.nonEmpty)
          throw new InvalidYamlDescriptorException(s"Bad regexp patterns: ${badPatterns.mkString(",")}")
        s.value
      case f: Failure[BoundProjectOperationInfo] =>
        throw new InvalidYamlDescriptorException(s"Failed to parse YAML [$yaml]: ${f.exception.getMessage}", f.exception)
    }
  }

  private def patternError(p: Parameter): Option[String] = {
    try {
      Pattern.compile(p.getPattern)
      None
    } catch {
      case pse: PatternSyntaxException => Some(s"${p.getName}: Bad regular expression pattern: ${pse.getMessage}")
    }
  }
}

private class BoundProjectOperationInfo extends TemplateBasedProjectOperationInfo {

  @JsonProperty("name")
  var name: String = _

  @JsonProperty("description")
  var description: String = _

  @JsonProperty("template_name")
  var templateName: String = _

  @JsonProperty("type")
  var _templateType: String = _

  override def templateType: Option[String] =
    if (_templateType == null || "".equals(_templateType)) None
    else Some(_templateType)

  @JsonProperty("parameters")
  private var _params: Seq[Parameter] = Nil

  @JsonProperty("tags")
  private var _tags: Seq[TagHolder] = Nil

  override def parameters: Seq[Parameter] = _params

  override def tags: Seq[Tag] = _tags.map(tw => tw.toTag)

  override def toString = ReflectionToStringBuilder.toString(this)
}

private class TagHolder {

  @JsonProperty
  var name: String = _

  @JsonProperty
  var description: String = _

  def toTag = Tag(name, description)
}

class InvalidYamlDescriptorException(msg: String, ex: Throwable = null) extends InvalidTemplateException(msg, ex) 
Example 57
Source File: DynamicConfigurationUtils.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.service.config.dynamic

import java.util.regex.Pattern

import grizzled.slf4j.Logging
import org.json4s.JsonAST.JString
import org.json4s.{JField, JValue}

import scala.collection.mutable

object DynamicConfigurationUtils extends Logging {
  private val START = Pattern.quote("<%(")
  private val END = Pattern.quote(")%>")
  val DYNAMIC_CONFIG_PATTERN = Pattern.compile(s"$START(.*),(.*)$END")

  def extractDynamicFields(json: JValue): Map[String, (String, String)] = {
    val dynamicFieldMap = new mutable.HashMap[String, (String, String)]()
    val dynamicFields = getDynamicFields(json)
    dynamicFields.foreach(f => {
      require(f._2.isInstanceOf[JString], s"Cannot extract dynamic property from non-string field: $f")
      implicit val formats = org.json4s.DefaultFormats
      val matcher = DYNAMIC_CONFIG_PATTERN.matcher(f._2.extract[String])
      require(matcher.find(), s"Field does not contain dynamic property $f. Pattern - $DYNAMIC_CONFIG_PATTERN")
      require(matcher.groupCount() == 2, s"Expected name and default value in dynamic property field: $f")
      val propertyKey = matcher.group(1).trim
      val defaultValue = matcher.group(2).trim
      dynamicFieldMap.put(propertyKey, (f._1, defaultValue))
    })
    dynamicFieldMap.toMap
  }

  def getDynamicFields(json: JValue): List[JField] = {
    implicit val formats = org.json4s.DefaultFormats
     json.filterField(_._2 match {
      case JString(s) => {
        DYNAMIC_CONFIG_PATTERN.matcher(s).find()
      }
      case a => false
    })
  }
} 
Example 58
Source File: DataSourceV2Utils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.v2

import java.util.regex.Pattern

import org.apache.spark.internal.Logging
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sources.v2.{DataSourceV2, SessionConfigSupport}

private[sql] object DataSourceV2Utils extends Logging {

  
  def extractSessionConfigs(ds: DataSourceV2, conf: SQLConf): Map[String, String] = ds match {
    case cs: SessionConfigSupport =>
      val keyPrefix = cs.keyPrefix()
      require(keyPrefix != null, "The data source config key prefix can't be null.")

      val pattern = Pattern.compile(s"^spark\\.datasource\\.$keyPrefix\\.(.+)")

      conf.getAllConfs.flatMap { case (key, value) =>
        val m = pattern.matcher(key)
        if (m.matches() && m.groupCount() > 0) {
          Seq((m.group(1), value))
        } else {
          Seq.empty
        }
      }

    case _ => Map.empty
  }
} 
Example 59
Source File: StringUtils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 60
Source File: Block.scala    From sort-imports   with MIT License 5 votes vote down vote up
package fix

import java.util.regex.Pattern

sealed trait Block {
  def string: String
  def matches(s: String): Boolean
}
object Block {
  final class StaticPrefix(val string: String) extends Block {
    override def matches(s: String): Boolean = s.startsWith(string)
  }
  object RegexPrefix {
    val Prefix: String = "re:"
  }
  final class RegexPrefix(val string: String) extends Block {
    private val pattern                      = Pattern.compile(string)
    override def matches(s: String): Boolean = pattern.matcher(s).lookingAt
  }
  object Default extends Block {
    val string                               = "*"
    override def matches(s: String): Boolean = true
  }
} 
Example 61
Source File: DefaultInstanceAliasConverter.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.rpc.instancealias.impl

import java.util.Base64
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.rpc.instancealias.InstanceAliasConverter
import org.apache.commons.lang.StringUtils
import org.springframework.stereotype.Component


@Component
class DefaultInstanceAliasConverter extends InstanceAliasConverter  {

  val pattern = Pattern.compile("[a-zA-Z\\d=\\+/]+")

  // todo use base64 for the moment
  override def instanceToAlias(instance: String): String = {
    new String(Base64.getEncoder.encode(instance.getBytes()))
  }

  override def aliasToInstance(alias: String): String = {
    new String(Base64.getDecoder.decode(alias))
  }

  override def checkAliasFormatValid(alias: String): Boolean = {
    if (StringUtils.isBlank(alias)) {
      return false
    }
    val matcher = pattern.matcher(alias)
    matcher.find()
  }
} 
Example 62
Source File: CSResourceParser.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.cs


import java.util
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.cs.client.service.CSResourceService
import com.webank.wedatasphere.linkis.engine.PropertiesExecuteRequest
import org.apache.commons.lang.StringUtils

import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer


class CSResourceParser {

  private val pb = Pattern.compile("cs://[^\\s\"]+[$\\s]{0,1}", Pattern.CASE_INSENSITIVE)

  private val PREFIX = "cs://"

  private def getPreFixResourceNames(code: String): Array[String] = {
    val bmlResourceNames = new ArrayBuffer[String]()
    val mb = pb.matcher(code)
    while (mb.find) bmlResourceNames.append(mb.group.trim)
    bmlResourceNames.toArray
  }

  def parse(executeRequest: PropertiesExecuteRequest, code: String, contextIDValueStr: String, nodeNameStr: String): String = {

    //TODO getBMLResource peaceWong
    val bmlResourceList = CSResourceService.getInstance().getUpstreamBMLResource(contextIDValueStr, nodeNameStr)

    val parsedResources = new util.ArrayList[util.Map[String, Object]]()
    val preFixResourceNames = getPreFixResourceNames(code)

    val preFixNames = new ArrayBuffer[String]()
    val parsedNames = new ArrayBuffer[String]()
    preFixResourceNames.foreach { preFixResourceName =>
      val resourceName = preFixResourceName.replace(PREFIX, "").trim
      val bmlResourceOption = bmlResourceList.find(_.getDownloadedFileName.equals(resourceName))
      if (bmlResourceOption.isDefined) {
        val bmlResource = bmlResourceOption.get
        val map = new util.HashMap[String, Object]()
        map.put("resourceId", bmlResource.getResourceId)
        map.put("version", bmlResource.getVersion)
        map.put("fileName", resourceName)
        parsedResources.add(map)
        preFixNames.append(preFixResourceName)
        parsedNames.append(resourceName)
      }

    }
    executeRequest.properties.put("resources", parsedResources)
    StringUtils.replaceEach(code, preFixNames.toArray, parsedNames.toArray)
  }

} 
Example 63
Source File: CommentInterceptor.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.entrance.interceptor.impl

import java.lang
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.entrance.interceptor.EntranceInterceptor
import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask
import com.webank.wedatasphere.linkis.protocol.task.Task
import org.slf4j.{Logger, LoggerFactory}

import scala.util.matching.Regex

"
  override def dealComment(code: String): String = {
    val p = Pattern.compile(scalaCommentPattern)
    p.matcher(code).replaceAll("$1")
  }
}


object CommentMain{
  def main(args: Array[String]): Unit = {
    val sqlCode = "select * from default.user;--你好;show tables"
    val sqlCode1 = "select * from default.user--你好;show tables"
    println(SQLCommentHelper.dealComment(sqlCode))
  }
} 
Example 64
Source File: CSTableParser.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.cs

import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.cs.client.service.CSTableService
import com.webank.wedatasphere.linkis.cs.common.entity.metadata.CSTable
import com.webank.wedatasphere.linkis.cs.common.utils.CSCommonUtils
import com.webank.wedatasphere.linkis.engine.exception.ExecuteError
import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext
import org.apache.commons.lang.StringUtils
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.datasources.csv.DolphinToSpark

import scala.collection.mutable.ArrayBuffer


  def getCSTable(csTempTable:String,  contextIDValueStr: String, nodeNameStr: String):CSTable = {
    CSTableService.getInstance().getUpstreamSuitableTable(contextIDValueStr, nodeNameStr, csTempTable)
  }

  def registerTempTable(csTable: CSTable):Unit = {
    val spark = SparkSession.builder().enableHiveSupport().getOrCreate()
    info(s"Start to create  tempView to sparkSession viewName(${csTable.getName}) location(${csTable.getLocation})")
    DolphinToSpark.createTempView(spark, csTable.getName, csTable.getLocation, true)
    info(s"Finished to create  tempView to sparkSession viewName(${csTable.getName}) location(${csTable.getLocation})")
  }
} 
Example 65
Source File: ConfManager.scala    From HadoopLearning   with MIT License 5 votes vote down vote up
package com.utils

import java.util.regex.Pattern

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.mutable.HashMap

/**
  * 描述 Spark Streaming 配置
  *
  * @author liumm
  * @since 2018-07-27 20:27
  */
object ConfManager {

  /**
    * 每次入库最大记录数量
    */
  val maxRecords = 1000

  /**
    * 配置Kafka
    *
    * @param streamConf
    * @return
    */
  def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = {
    (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics))
  }

  def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = {
    val kafkaParams = new HashMap[String, String]()
    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers)
    kafkaParams += ("metadata.broker.list" -> streamConf.brokers)
    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest")
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId)
    kafkaParams.toMap
  }

  /**
    * 生成Kafka的Consumer配置信息
    *
    * @return Kafka的Consumer配置信息
    */
  private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = {
    val kafkaParams = new HashMap[String, Object]()

    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers)
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId)
    kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
    kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])

    kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024))
    kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100))

    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest")
    //关闭kafka自动提交offset方式
    kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))

    kafkaParams.toMap
  }

  def newStreamConf() = {
    val conf = new StreamConf()
    conf.zkUrl = "hdp01:2181"
    conf.brokers = "hdp01:9092"
    conf.groupId = "liumm_group"
    conf.topics = "i57_.*"
    conf
  }

} 
Example 66
Source File: PlainEnum.scala    From docless   with MIT License 5 votes vote down vote up
package com.timeout.docless.schema

import java.util.regex.Pattern

import enumeratum.EnumEntry
import shapeless._
import shapeless.labelled._
import shapeless.ops.hlist

trait PlainEnum[A] {
  def ids: List[String]
}

object PlainEnum {
  sealed trait IdFormat {
    def apply(s: String): String
  }

  object IdFormat {
    case object SnakeCase extends IdFormat {
      override def apply(s: String) = snakify(s)
    }

    case object UpperSnakeCase extends IdFormat {
      override def apply(s: String) = snakify(s).toUpperCase()
    }

    case object UpperCase extends IdFormat {
      override def apply(s: String) = s.toUpperCase
    }

    case object LowerCase extends IdFormat {
      override def apply(s: String) = s.toLowerCase
    }

    case object Default extends IdFormat {
      override def apply(s: String) = s
    }

    
    private val snakifyRegexp1 = Pattern.compile("([A-Z]+)([A-Z][a-z])")
    private val snakifyRegexp2 = Pattern.compile("([a-z\\d])([A-Z])")
    private val snakifyReplacement = "$1_$2"

    private def snakify(s: String): String = {
      val first = snakifyRegexp1.matcher(s).replaceAll(snakifyReplacement)
      snakifyRegexp2.matcher(first).replaceAll(snakifyReplacement).toLowerCase
    }

    implicit val default: IdFormat = Default
  }

  def instance[A](_ids: List[String]): PlainEnum[A] = new PlainEnum[A] {
    override def ids = _ids
  }

  implicit val cnilEnum: PlainEnum[CNil] = instance(Nil)

  implicit def coprodEnum[K <: Symbol, H, T <: Coproduct, HL <: HList, N <: Nat](
    implicit
    witness: Witness.Aux[K],
    gen: Generic.Aux[H, HL],
    hLen: hlist.Length.Aux[HL, N],
    lazyEnum: Lazy[PlainEnum[T]],
    zeroLen: N =:= Nat._0,
    format: IdFormat
  ): PlainEnum[FieldType[K, H] :+: T] =
    instance(format(witness.value.name) :: lazyEnum.value.ids)

  implicit def genericPlainEnum[A, R <: Coproduct](
    implicit
    gen: LabelledGeneric.Aux[A, R],
    enum: PlainEnum[R],
    format: IdFormat,
    ev: A <:!< EnumEntry
  ): PlainEnum[A] = instance(enum.ids)

  def apply[A](implicit ev: PlainEnum[A]): PlainEnum[A] = ev
} 
Example 67
Source File: ConfigReader.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

} 
Example 68
Source File: StringUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 69
Source File: EntityFilter.scala    From prometheus-akka   with Apache License 2.0 5 votes vote down vote up
package com.workday.prometheus.akka.impl

import java.util.regex.Pattern

private[akka] case class EntityFilter(includes: List[PathFilter], excludes: List[PathFilter]) {
  def accept(name: String): Boolean =
    includes.exists(_.accept(name)) && !excludes.exists(_.accept(name))
}

private[akka] trait PathFilter {
  def accept(path: String): Boolean
}

private[akka] case class RegexPathFilter(path: String) extends PathFilter {
  private val pathRegex = path.r
  override def accept(path: String): Boolean = {
    path match {
      case pathRegex(_*) ⇒ true
      case _             ⇒ false
    }
  }
}


private[akka] case class GlobPathFilter(glob: String) extends PathFilter {
  private val GLOB_PATTERN = Pattern.compile("(\\*\\*?)|(\\?)|(\\\\.)|(/+)|([^*?]+)")
  private val pattern = getGlobPattern(glob)

  def accept(path: String): Boolean = pattern.matcher(path).matches

  private def getGlobPattern(glob: String) = {
    val patternBuilder = new StringBuilder
    val m = GLOB_PATTERN.matcher(glob)
    var lastWasSlash = false
    while (m.find) {
      lastWasSlash = false
      val grp1 = m.group(1)
      if (grp1 != null) {
        // match a * or **
        if (grp1.length == 2) {
          // it's a *workers are able to process multiple metrics*
          patternBuilder.append(".*")
        }
        else { // it's a *
          patternBuilder.append("[^/]*")
        }
      }
      else if (m.group(2) != null) {
        // match a '?' glob pattern; any non-slash character
        patternBuilder.append("[^/]")
      }
      else if (m.group(3) != null) {
        // backslash-escaped value
        patternBuilder.append(Pattern.quote(m.group.substring(1)))
      }
      else if (m.group(4) != null) {
        // match any number of / chars
        patternBuilder.append("/+")
        lastWasSlash = true
      }
      else {
        // some other string
        patternBuilder.append(Pattern.quote(m.group))
      }
    }
    if (lastWasSlash) {
      // ends in /, append **
      patternBuilder.append(".*")
    }
    Pattern.compile(patternBuilder.toString)
  }
} 
Example 70
Source File: Assertions.scala    From daml   with Apache License 2.0 5 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.testtool.infrastructure

import ai.x.diff.DiffShow
import com.daml.grpc.{GrpcException, GrpcStatus}
import java.util.regex.Pattern
import io.grpc.Status

import scala.language.higherKinds
import scala.util.control.NonFatal

object Assertions extends DiffExtensions {
  def fail(message: String): Nothing =
    throw new AssertionError(message)

  def fail(message: String, cause: Throwable): Nothing =
    throw new AssertionError(message, cause)

  def assertLength[A, F[_] <: Seq[_]](context: String, length: Int, as: F[A]): F[A] = {
    assert(as.length == length, s"$context: expected $length item(s), got ${as.length}")
    as
  }

  def assertSingleton[A](context: String, as: Seq[A]): A =
    assertLength(context, 1, as).head

  def assertEquals[T: DiffShow](context: String, actual: T, expected: T): Unit = {
    val diff = DiffShow.diff(actual, expected)
    if (!diff.isIdentical)
      throw AssertionErrorWithPreformattedMessage(
        diff.string,
        s"$context: two objects are supposed to be equal but they are not",
      )
  }

  
  def assertGrpcError(t: Throwable, expectedCode: Status.Code, pattern: String): Unit = {
    assertGrpcError(
      t,
      expectedCode,
      if (pattern.isEmpty) None else Some(Pattern.compile(Pattern.quote(pattern))))
  }
} 
Example 71
Source File: FilteringTest.scala    From scala-commons   with MIT License 5 votes vote down vote up
package com.avsystem.commons
package mongo.core.ops

import java.util.regex.Pattern

import com.avsystem.commons.mongo.BsonRef
import com.avsystem.commons.serialization.GenCodec
import com.mongodb.client.model.Filters
import org.bson.BsonType
import org.bson.conversions.Bson
import org.scalatest.funsuite.AnyFunSuite

class FilteringTest extends AnyFunSuite {

  import Filtering._
  import FilteringTest._

  private def testCase(name: String)(filter: (Ref[String]) => Bson)(verify: (String) => Bson): Unit = {
    import BsonEquality.bsonEquality

    test(name) {
      assert(filter(sRef) === verify("s"))
    }
  }

  private def testValue(name: String)(filter: (Ref[String], String) => Bson)(verify: (String, String) => Bson): Unit = {
    val someValue = "someValue"
    testCase(name)(filter(_, someValue))(verify(_, someValue))
  }

  testValue("equal")(_ equal _)(Filters.eq)
  testValue("notEqual")(_ notEqual _)(Filters.ne)

  testValue("gt")(_ gt _)(Filters.gt)
  testValue("lt")(_ lt _)(Filters.lt)
  testValue("gte")(_ gte _)(Filters.gte)
  testValue("lte")(_ lte _)(Filters.lte)

  testValue("in")(_ in _)(Filters.in(_, _))
  testValue("nin")(_ nin _)(Filters.nin(_, _))

  testCase("exists")(_.exists())(Filters.exists)
  testCase("notExists")(_.exists(false))(Filters.exists(_, false))

  testCase("ofType")(_.ofType("someTypeName"))(Filters.`type`(_, "someTypeName"))
  testCase("ofTypeEnum")(_.ofType(BsonType.STRING))(Filters.`type`(_, BsonType.STRING))

  testCase("mod")(_.mod(313, 131))(Filters.mod(_, 313, 131))

  private val regexString = "\\d"
  private val regexScala = regexString.r
  private val regexJava = Pattern.compile(regexString)
  testCase("regexScala")(_ regex regexScala)(Filters.regex(_, regexString))
  testCase("regexJava")(_ regex regexJava)(Filters.regex(_, regexJava))
  testCase("regexString")(_ regex regexString)(Filters.regex(_, regexString))
  testCase("regexOptions")(_.regex(regexString, "ops"))(Filters.regex(_, regexString, "ops"))

  import BsonEquality.bsonEquality

  test("contains") {
    assert(aRef.contains("elem") === Filters.eq("a", "elem"))
  }

  private val simpleFilter = Filters.eq("key", "value")
  test("elemMatch") {
    assert(aRef.elemMatch(simpleFilter) === Filters.elemMatch("a", simpleFilter))
  }

  test("size") {
    assert(aRef.size(131) === Filters.size("a", 131))
  }

  test("all") {
    assert(aRef.all("e1", "e2") === Filters.all("a", "e1", "e2"))
  }

  private val otherFilter = Filters.eq("key2", "value2")
  test("and") {
    assert(and(simpleFilter, otherFilter) === Filters.and(simpleFilter, otherFilter))
  }

  test("or") {
    assert(or(simpleFilter, otherFilter) === Filters.or(simpleFilter, otherFilter))
  }

  test("nor") {
    assert(nor(simpleFilter, otherFilter) === Filters.nor(simpleFilter, otherFilter))
  }

  test("not") {
    assert(not(simpleFilter) === Filters.not(simpleFilter))
  }
}

object FilteringTest extends BsonRef.Creator[SomeEntity] {
  implicit val codec: GenCodec[SomeEntity] = GenCodec.materialize
  val sRef: Ref[String] = ref(_.s)
  val aRef: Ref[List[String]] = ref(_.a)
} 
Example 72
Source File: URLInString.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.jawa.core.util

import java.util.regex.Pattern

 
object URLInString {
  def extract(str: String): Set[String] = {
    val results = msetEmpty[String]
    val regex = "\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" + 
            "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" + 
            "|mil|biz|info|mobi|name|aero|jobs|museum" + 
            "|travel|[a-z]{2}))(:[\\d]{1,5})?" + 
            "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" + 
            "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
            "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" + 
            "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + 
            "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" + 
            "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b"
    val p = Pattern.compile(regex)
    val m = p.matcher(str)
    while(m.find()) {
      var urlStr = m.group()
      if (urlStr.startsWith("(") && urlStr.endsWith(")"))
      {
        urlStr = urlStr.substring(1, urlStr.length() - 1)
      }
      results.add(urlStr)
    }
    results.toSet
  }
} 
Example 73
Source File: PScoutTranslator.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.amandroid.core.util

import org.argus.jawa.core.util._
import java.util.regex.Pattern

import org.argus.jawa.core.elements.{JavaKnowledge, Signature}


object PScoutTranslator {
  def main(args: Array[String]): Unit = {
    val filepath = args(0)
    val fileuri = FileUtil.toUri(filepath)
    translate(fileuri)
  }
  
  def translate(uri: FileResourceUri): IMap[String, ISet[Signature]] = {
    val permissionMap: MMap[String, MSet[Signature]] = mmapEmpty
    var currentPermission: String = null
    scala.io.Source.fromFile(FileUtil.toFile(uri)).getLines().foreach {
      case permission if permission.startsWith("Permission:") =>
        currentPermission = permission.replace("Permission:", "")
      case sigstr if sigstr.startsWith("<") =>
        val sig = formatSignature(sigstr)
        permissionMap.getOrElseUpdate(currentPermission, msetEmpty) ++= sig
      case _ =>
    }
    permissionMap.map{case (k, v) => (k, v.toSet)}.toMap
  }
  //                                    1            2                     3                     4
  private val regex = "<([[^\\s]&&[^:]]+):\\s([^\\s]+)\\s([[^\\s]&&[^\\(]]+)\\(([[^\\s]&&[^\\)]]*)\\)>\\s+\\(.*\\)"
  private def formatSignature(sigstr: String): Option[Signature] = {
    val p: Pattern = Pattern.compile(regex)
    val m = p.matcher(sigstr)
    if(m.find()){
      val classTypStr = m.group(1)
      val retTypStr = m.group(2)
      val methodName = m.group(3)
      val paramTypStrList = m.group(4).split(",")
      val classTyp = JavaKnowledge.getTypeFromJawaName(classTypStr)
      val protosb = new StringBuilder
      protosb.append("(")
      paramTypStrList.foreach{
        paramTypStr =>
          if(!paramTypStr.isEmpty)
            protosb.append(JavaKnowledge.formatTypeToSignature(JavaKnowledge.getTypeFromJawaName(paramTypStr)))
      }
      protosb.append(")")
      protosb.append(JavaKnowledge.formatTypeToSignature(JavaKnowledge.getTypeFromJawaName(retTypStr)))
      Some(new Signature(classTyp, methodName, protosb.toString()))
    } else {
      System.err.println("PScoutTranslator, does not match: " + sigstr)
      None
    }
  }
} 
Example 74
Source File: ARSCFileParser_apktool.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.amandroid.core.parser

import org.argus.jawa.core.util.FileResourceUri
import brut.androlib.res.decoder.ARSCDecoder
import brut.androlib.res.decoder.ARSCDecoder.ARSCData
import brut.androlib.res.data.ResID
import brut.androlib.res.data.ResResSpec
import brut.androlib.res.data.ResPackage
import java.util.regex.Pattern
import org.argus.jawa.core.util._
import java.util.zip.ZipFile

class ARSCFileParser_apktool {
//  final private val TITLE = "ARSCFileParser_apktool"
  private var data: ARSCData = _
  
  def parse(apkUri: FileResourceUri): Unit = {
    val apkFile = FileUtil.toFilePath(apkUri)
    val zf = new ZipFile(apkFile)
    try{
      val ze = zf.getEntry("resources.arsc")
      if(ze != null){
        val in = zf.getInputStream(ze)
        this.data = ARSCDecoder.decode(in, false, false)
      } else {}//err_msg_normal(TITLE, "Cannot find resources.arsc file.")
    } finally {
      zf.close()
    }
  }
  
  def findResource(resourceId: Int): ResResSpec = {
    var result: ResResSpec = null
    val id = new ResID(resourceId)
    if(this.data != null){
      this.data.getPackages.foreach{ pkg =>
        if(pkg.hasResSpec(id)){
          result = pkg.getResSpec(id)
        }
      }
    }
    result
  }
  
  def getPackages: Set[ResPackage] = {
    if(this.data != null){
      data.getPackages.toSet
    } else Set()
  }
  
  def getData: ARSCData = this.data
  
  def getGlobalStringPool: Map[Int, String] = {
    val matches: MMap[Int, String] = mmapEmpty
    getPackages.foreach{ pkg =>
      val str = pkg.getResTable.toString
      val strs = str.substring(1, str.length() - 1).split(", ")
      val p = Pattern.compile("(.+)\\sstring\\/(.+)")
      var matches: Map[Int, String] = Map()
      strs foreach { str =>
        val m = p.matcher(str)
        if(m.find()){
          matches += (Integer.parseInt(m.group(1).substring(2), 16) -> m.group(2))
        }
      }
    }
    matches.toMap
  }
} 
Example 75
Source File: TokenParser.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.nlp.annotators.common

import java.util.regex.Pattern

trait PreprocessingParser {
  def separate(token:String): String
}


class SuffixedToken(suffixes:Array[String]) extends PreprocessingParser {

  def belongs(token: String): Option[String] =
    suffixes.find(token.endsWith)


  override def separate(token:String): String = {
    belongs(token).map { suffix  =>
      s"""${separate(token.dropRight(suffix.length))} $suffix"""
    }.getOrElse(token)
  }

}

object SuffixedToken {
  def apply(suffixes:Array[String]) = new SuffixedToken(suffixes)
}


class PrefixedToken(prefixes:Array[String]) extends PreprocessingParser {

  private def parse(token:String)  =
    (token.head.toString, token.tail)

  def belongs(token: String): Boolean =
    if(token.length > 1)
      prefixes.map(token.head.toString.equals).reduce(_ || _)
    else
      false

  override def separate(token:String): String = {
    if (belongs(token))
      s"""${token.head} ${separate(token.tail)}"""
    else
      token
  }
}

object PrefixedToken {
  def apply(prefixes:Array[String]) = new PrefixedToken(prefixes)
}


class InfixToken(tokens:Array[String]) extends PreprocessingParser {

  private def parse(token:String)  =
    (token.head.toString, token.tail)

  def belongs(token: String): Boolean = {
    if(token.length > 2) {
      val insideChunk = token.tail.dropRight(1)
      tokens.exists(insideChunk.contains)
    }else{
      false
    }
  }

  override def separate(token:String): String = {
    var result = token
    if (belongs(token)) {
      tokens.foreach{ t =>
        val quotedInfix = Pattern.quote(t)
        result = result.replaceAll(quotedInfix, s" $t ")
      }
    }
    result
  }
}

object InfixToken {
  def apply(infixes:Array[String]) = new InfixToken(infixes)
} 
Example 76
Source File: ModuleMatcher.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.util

import java.util.regex.Pattern

import coursier.core.{Module, ModuleName, Organization}
import dataclass.data

import scala.annotation.tailrec
import scala.util.matching.Regex

@data class ModuleMatcher(matcher: Module) {

  import ModuleMatcher.blobToPattern

  lazy val orgPattern = blobToPattern(matcher.organization.value)
  lazy val namePattern = blobToPattern(matcher.name.value)
  lazy val attributesPattern = matcher
    .attributes
    .mapValues(blobToPattern(_))
    .iterator
    .toMap

  def matches(module: Module): Boolean =
    orgPattern.pattern.matcher(module.organization.value).matches() &&
      namePattern.pattern.matcher(module.name.value).matches() &&
      module.attributes.keySet == attributesPattern.keySet &&
      attributesPattern.forall {
        case (k, p) =>
          module.attributes.get(k).exists(p.pattern.matcher(_).matches())
      }

}

object ModuleMatcher {

  def apply(org: Organization, name: ModuleName, attributes: Map[String, String] = Map.empty): ModuleMatcher =
    ModuleMatcher(Module(org, name, attributes))

  def all: ModuleMatcher =
    ModuleMatcher(Module(Organization("*"), ModuleName("*"), Map.empty))

  @tailrec
  private def blobToPattern(s: String, b: StringBuilder = new StringBuilder): Regex =
    if (s.isEmpty)
      b.result().r
    else {
      val idx = s.indexOf('*')
      if (idx < 0) {
        b ++= Pattern.quote(s)
        b.result().r
      } else {
        if (idx > 0)
          b ++= Pattern.quote(s.substring(0, idx))
        b ++= ".*"
        blobToPattern(s.substring(idx + 1), b)
      }
    }

} 
Example 77
Source File: CacheChecksum.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.cache

import java.math.BigInteger
import java.nio.charset.StandardCharsets
import java.util.regex.Pattern

object CacheChecksum {

  private val checksumLength = Set(
    32, // md5
    40, // sha-1
    64, // sha-256
    128 // sha-512
  )

  private def ifHexString(s: String) =
    s.forall(c => c.isDigit || c >= 'a' && c <= 'z')

  private def findChecksum(elems: Seq[String]): Option[BigInteger] =
    elems.collectFirst {
      case rawSum if ifHexString(rawSum) && checksumLength.contains(rawSum.length) =>
        new BigInteger(rawSum, 16)
    }

  private def parseChecksumLine(lines: Seq[String]): Option[BigInteger] =
    findChecksum(lines.map(_.toLowerCase.replaceAll("\\s", "")))

  private def parseChecksumAlternative(lines: Seq[String]): Option[BigInteger] =
    findChecksum(lines.flatMap(_.toLowerCase.split("\\s+"))).orElse {
      findChecksum(
        lines.map { line =>
          line
            .toLowerCase
            .split("\\s+")
            .filter(ifHexString)
            .mkString
        }
      )
    }

  def parseChecksum(content: String): Option[BigInteger] = {
    val lines = Predef.augmentString(content)
      .lines
      .toVector

    parseChecksumLine(lines).orElse(parseChecksumAlternative(lines))
  }

  def parseRawChecksum(content: Array[Byte]): Option[BigInteger] =
    if (content.length == 16 || content.length == 20)
      Some(new BigInteger(content))
    else {
      val s = new String(content, StandardCharsets.UTF_8)
      val lines = Predef.augmentString(s)
        .lines
        .toVector

      parseChecksumLine(lines) orElse parseChecksumAlternative(lines)
    }

} 
Example 78
Source File: MergeRule.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.launcher

import java.util.jar.JarFile
import java.util.regex.Pattern
import dataclass.data

sealed abstract class MergeRule extends Product with Serializable

object MergeRule {
  sealed abstract class PathRule extends MergeRule {
    def path: String
  }

  @data class Exclude(path: String) extends PathRule
  @data class ExcludePattern(path: Pattern) extends MergeRule

  object ExcludePattern {
    def apply(s: String): ExcludePattern =
      ExcludePattern(Pattern.compile(s))
  }

  // TODO Accept a separator: Array[Byte] argument in these
  // (to separate content with a line return in particular)
  @data class Append(path: String) extends PathRule
  @data class AppendPattern(path: Pattern) extends MergeRule

  object AppendPattern {
    def apply(s: String): AppendPattern =
      AppendPattern(Pattern.compile(s))
  }

  val default = Seq(
    MergeRule.Append("reference.conf"),
    MergeRule.AppendPattern("META-INF/services/.*"),
    MergeRule.Exclude("log4j.properties"),
    MergeRule.Exclude(JarFile.MANIFEST_NAME),
    MergeRule.ExcludePattern("META-INF/.*\\.[sS][fF]"),
    MergeRule.ExcludePattern("META-INF/.*\\.[dD][sS][aA]"),
    MergeRule.ExcludePattern("META-INF/.*\\.[rR][sS][aA]")
  )
} 
Example 79
Source File: package.scala    From pureconfig   with Mozilla Public License 2.0 5 votes vote down vote up
package pureconfig

import java.util.regex.Pattern

import scala.util.matching.Regex

import org.scalactic.Equality
import org.scalactic.TypeCheckedTripleEquals._

package object equality {

  implicit final val PatternEquality = new Equality[Pattern] {
    def areEqual(a: Pattern, b: Any): Boolean = b match {
      case bp: Pattern => a.pattern === bp.pattern
      case _ => false
    }
  }

  implicit final val RegexEquality = new Equality[Regex] {
    override def areEqual(a: Regex, b: Any): Boolean = b match {
      case r: Regex => PatternEquality.areEqual(a.pattern, r.pattern)
      case _ => false
    }
  }

} 
Example 80
Source File: KanjiCharacter.scala    From scalastringcourseday7   with Apache License 2.0 5 votes vote down vote up
package text.kanji

import java.io.File
import java.util.regex.Pattern

import util.Config

import scala.collection.mutable.ListBuffer
import scala.io.Source


trait KanjiCharacter {
  val kanji: Seq[String]

  lazy val regex: String = {
    val size: Int = kanji.size
    if (size <= 0) {
      ""
    } else {
      val builder: StringBuilder = new StringBuilder(size)
      kanji foreach builder.append
      builder.result.mkString("[", "", "]")
    }
  }

  lazy val pattern: Pattern = Pattern.compile(regex)

  def isDefined(codePoint: Int): Boolean = {
    if (Character.isValidCodePoint(codePoint)) {
      pattern.matcher(new String(Array(codePoint), 0, 1)).matches
    } else {
      false
    }
  }

  def notDefined(codePoint: Int): Boolean = {
    !isDefined(codePoint)
  }

  def isDefined(char: Char): Boolean = {
    pattern.matcher(char.toString).matches
  }

  def notDefined(char: Char): Boolean = {
    !isDefined(char)
  }

  protected def readKanjiCSV(fileName: String): Seq[String] = {
    val buffer = ListBuffer.empty[String]
    val file: File = Config.resourceFile("kanji", fileName.concat(".csv")).toFile
    if (file.canRead && file.isFile) {
      Source.fromFile(file).getLines foreach {
        line =>
          val elements: Array[String] = line.split(",")
          if (elements.nonEmpty && elements.length == 2) {
            val kanji: String = elements(1)
            buffer += kanji
          }
      }
    }
    buffer.result
  }
} 
Example 81
Source File: PatternChecker.scala    From incubator-daffodil   with Apache License 2.0 5 votes vote down vote up
package org.apache.daffodil.grammar.primitives

import org.apache.daffodil.exceptions.SavesErrorsAndWarnings
import java.util.regex.Pattern
import org.apache.daffodil.util.Misc
import java.util.regex.PatternSyntaxException
import org.apache.daffodil.api.WarnID


object PatternChecker {
  def checkPattern(pattern: String,
    context: SavesErrorsAndWarnings): Unit = {
    try {
      val pat = Pattern.compile(pattern)
      val m1 = pat.matcher("")
      val m2 = pat.matcher("\uFFFE") // obscure enough?
      if (m1.matches() && m2.lookingAt() && m2.group().length() == 0) {
        // the pattern will match with zero length, anything or nothing
        // This is a flawed pattern for an assert and dubious
        // generally. The pattern should have to match something.
        val needCDATA =
          if (pattern.startsWith("(?x)") &&
            !pattern.contains("\n") &&
            pattern.contains("#")) {
            // it's free form regex notation
            // it's all on one line,
            // and it contains a comment (# to end of line)
            // Almost guaranteed you are missing a CDATA wrapper.
            "\nMissing <![CDATA[...]]> around the regular expression." +
              "\nThis is required for free-form regular expression syntax with comments."
          } else ""
        context.SDW(WarnID.RegexPatternZeroLength, "Regular expression pattern '%s'.\n" +
          "This pattern will match with zero length, so it can always match.%s", pattern, needCDATA)
      }
    } catch {
      case e: PatternSyntaxException => {
        context.SDE("Invalid regular expression pattern '%s'.\nReason: %s.", pattern, Misc.getSomeMessage(e).get)
      }
    }
  }
} 
Example 82
Source File: ShouldNotTypecheck.scala    From scala-parallel-collections   with Apache License 2.0 5 votes vote down vote up
package testutil

import scala.language.experimental.macros
import scala.reflect.macros.blackbox.Context
import scala.reflect.macros.TypecheckException
import java.util.regex.Pattern


object ShouldNotTypecheck {
  def apply(code: String): Unit = macro applyImplNoExp
  def apply(code: String, expected: String): Unit = macro applyImpl

  def applyImplNoExp(ctx: Context)(code: ctx.Expr[String]) = applyImpl(ctx)(code, null)

  def applyImpl(ctx: Context)(code: ctx.Expr[String], expected: ctx.Expr[String]): ctx.Expr[Unit] = {
    import ctx.universe._

    val Expr(Literal(Constant(codeStr: String))) = code
    val (expPat, expMsg) = expected match {
      case null => (null, "Expected some error.")
      case Expr(Literal(Constant(s: String))) =>
        (Pattern.compile(s, Pattern.CASE_INSENSITIVE | Pattern.DOTALL), "Expected error matching: "+s)
    }

    try ctx.typecheck(ctx.parse("{ "+codeStr+" }")) catch { case e: TypecheckException =>
      val msg = e.getMessage
      if((expected ne null) && !(expPat.matcher(msg)).matches)
        ctx.abort(ctx.enclosingPosition, "Type-checking failed in an unexpected way.\n"+
          expMsg+"\nActual error: "+msg)
      else return reify(())
    }

    ctx.abort(ctx.enclosingPosition, "Type-checking succeeded unexpectedly.\n"+expMsg)
  }
} 
Example 83
Source File: ClassUtils.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.it.code

import java.io.FileInputStream
import java.lang.reflect.Modifier
import java.util.jar.JarInputStream
import java.util.regex.Pattern

import org.junit.Test

import scala.jdk.CollectionConverters._

private[code] object ClassUtils {
  
  def classesInProductionScope(): Seq[Class[_]] = allClasses(n => !n.contains("tests.jar"))

  def allClasses(fileNameFilter: String => Boolean): Seq[Class[_]] = {
    val classLoader = ClassLoader.getSystemClassLoader
    val path        = "oharastream/ohara"
    val pattern     = Pattern.compile("^file:(.+\\.jar)!/" + path + "$")
    val urls        = classLoader.getResources(path)
    urls.asScala
      .map(url => pattern.matcher(url.getFile))
      .filter(_.find())
      .map(_.group(1))
      .filter(fileNameFilter)
      .flatMap { f =>
        val jarInput = new JarInputStream(new FileInputStream(f))
        try Iterator
          .continually(jarInput.getNextJarEntry)
          .takeWhile(_ != null)
          .map(_.getName)
          .toArray
          .filter(_.endsWith(".class"))
          .map(_.replace('/', '.'))
          .map(className => className.substring(0, className.length - ".class".length))
          .map(Class.forName)
        finally jarInput.close()
      }
      .toSeq
  }
} 
Example 84
Source File: BaseFiltering.scala    From scala-commons   with MIT License 5 votes vote down vote up
package com.avsystem.commons
package mongo.core.ops

import java.util.regex.Pattern

import com.avsystem.commons.mongo.text.TextSearchLanguage
import com.google.common.collect.ImmutableList
import com.mongodb.client.model.geojson.{Geometry, Point}
import com.mongodb.client.model.{Filters, TextSearchOptions}
import org.bson.BsonType
import org.bson.conversions.Bson

import scala.util.matching.Regex

trait BaseFiltering[T] extends Any with KeyValueHandling[T] {
  def equal(t: T): Bson = use(t)(Filters.eq)
  def notEqual(t: T): Bson = use(t)(Filters.ne)

  def gt(t: T): Bson = use(t)(Filters.gt)
  def lt(t: T): Bson = use(t)(Filters.lt)
  def gte(t: T): Bson = use(t)(Filters.gte)
  def lte(t: T): Bson = use(t)(Filters.lte)

  def in(ts: T*): Bson = Filters.in(key, ts.map(encode).asJava)
  def nin(ts: T*): Bson = Filters.nin(key, ts.map(encode).asJava)

  def exists(exists: Boolean = true): Bson = Filters.exists(key, exists)

  def ofType(bsonType: BsonType): Bson = Filters.`type`(key, bsonType)
  def ofType(typeName: String): Bson = Filters.`type`(key, typeName)

  def mod(divisor: Long, remainder: Long): Bson = Filters.mod(key, divisor, remainder)

  def regex(re: Regex): Bson = regex(re.pattern)
  def regex(pattern: Pattern): Bson = Filters.regex(key, pattern)
  def regex(patternStr: String): Bson = Filters.regex(key, patternStr)
  def regex(patternStr: String, options: String): Bson = Filters.regex(key, patternStr, options)

  def text(str: String, caseSensitive: OptArg[Boolean] = OptArg.Empty,
    language: OptArg[TextSearchLanguage] = OptArg.Empty, diacriticSensitive: OptArg[Boolean] = OptArg.Empty): Bson = {
    val searchOptions = new TextSearchOptions().setup { options =>
      caseSensitive.foreach(b => options.caseSensitive(b))
      language.foreach(l => options.language(l.code))
      diacriticSensitive.foreach(b => options.diacriticSensitive(b))
    }
    Filters.text(str, searchOptions)
  }

  def bitsAllClear(bitMask: Long): Bson = Filters.bitsAllClear(key, bitMask)
  def bitsAllSet(bitMask: Long): Bson = Filters.bitsAllSet(key, bitMask)
  def bitsAnyClear(bitMask: Long): Bson = Filters.bitsAnyClear(key, bitMask)
  def bitsAnySet(bitMask: Long): Bson = Filters.bitsAnySet(key, bitMask)

  def geoWithinBson(geometryBson: Bson): Bson = Filters.geoWithin(key, geometryBson)
  def geoWithin(geometry: Geometry): Bson = Filters.geoWithin(key, geometry)
  def geoWithinBox(lowerLeftX: Double, lowerLeftY: Double, upperRightX: Double, upperRightY: Double): Bson = {
    Filters.geoWithinBox(key, lowerLeftX, lowerLeftY, upperRightX, upperRightY)
  }
  def geoWithinPolygon(points: (Double, Double)*): Bson = {
    val javaPoints = points.map {
      case (x, y) => ImmutableList.of(x: JDouble, y: JDouble): JList[JDouble]
    }.asJava
    Filters.geoWithinPolygon(key, javaPoints)
  }
  def geoWithinCenter(x: Double, y: Double, radius: Double): Bson = Filters.geoWithinCenter(key, x, y, radius)
  def geoWithinCenterSphere(x: Double, y: Double, radius: Double): Bson = Filters.geoWithinCenterSphere(key, x, y, radius)

  def geoIntersectsBson(geometryBson: Bson): Bson = Filters.geoIntersects(key, geometryBson)
  def geoIntersects(geometry: Geometry): Bson = Filters.geoIntersects(key, geometry)

  private def jDouble(doubleOpt: Opt[Double]): JDouble = doubleOpt.map(d => d: JDouble).orNull
  private def useMinMax(min: Opt[Double], max: Opt[Double])(f: (JDouble, JDouble) => Bson): Bson = {
    f(jDouble(min), jDouble(max))
  }

  def nearBson(geometryBson: Bson, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.near(key, geometryBson, _, _))
  }
  def nearPoint(point: Point, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.near(key, point, _, _))
  }
  def nearXY(x: Double, y: Double, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.near(key, x, y, _, _))
  }

  def nearSphereBson(geometryBson: Bson, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, geometryBson, _, _))
  }
  def nearSpherePoint(point: Point, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, point, _, _))
  }
  def nearSphereXY(x: Double, y: Double, maxDistance: Opt[Double] = Opt.empty, minDistance: Opt[Double] = Opt.empty): Bson = {
    useMinMax(minDistance, maxDistance)(Filters.nearSphere(key, x, y, _, _))
  }
} 
Example 85
Source File: FieldStyle.scala    From tethys   with Apache License 2.0 5 votes vote down vote up
package tethys.derivation.builder

import java.util.regex.Pattern

import scala.annotation.StaticAnnotation

trait FieldStyle  { self =>
  def applyStyle(field: String): String

  def andThen(that: FieldStyle): FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = that.applyStyle(self.applyStyle(field))
  }

  def andThen(that: String => String): FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = that.apply(self.applyStyle(field))
  }

  def >>(that: FieldStyle): FieldStyle = andThen(that)
  def >>(that: String => String): FieldStyle = andThen(that)
}


object FieldStyle {

  def apply(fun: String => String): FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = fun(field)
  }

  class Ref(fieldStyle: FieldStyle) extends StaticAnnotation
  trait StyleReference extends FieldStyle {
    final override def applyStyle(field: String): String = throw new RuntimeException("StyleReference should not be used at runtime")
  }

  // Names transformations adopted from scala enumeratum
  private val regexp1: Pattern = Pattern.compile("([A-Z]+)([A-Z][a-z])")
  private val regexp2: Pattern = Pattern.compile("([a-z\\d])([A-Z])")
  private val replacement: String = "$1_$2"

  private def splitName(name: String): List[String] = {
    val first = regexp1.matcher(name).replaceAll(replacement)
    regexp2.matcher(first).replaceAll(replacement).split("_").toList
  }

  val snakecase: FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = splitName(field).mkString("_")
  }

  val lowercase: FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = field.toLowerCase()
  }

  val uppercase: FieldStyle = new FieldStyle {
    override def applyStyle(field: String): String = field.toUpperCase()
  }

  val lowerSnakecase: FieldStyle = snakecase >> lowercase
  val upperSnakecase: FieldStyle = snakecase >> uppercase
} 
Example 86
Source File: ScalastyleSettings.scala    From intellij-lsp   with Apache License 2.0 5 votes vote down vote up
package org.jetbrains.plugins.scala.editor.importOptimizer

import java.util.regex.Pattern

import org.scalastyle.ConfigurationChecker

import scala.util.Try


  def compareNames(name1: String, name2: String, isImport: Boolean): Int = {
    if (name1 != "_") {
      if (name2 == "_") {
        -1 * compareNames(name2, name1, isImport)
      } else {
        val isName1UpperCase = Character.isUpperCase(name1.codePointAt(0))
        val isName2UpperCase = Character.isUpperCase(name2.codePointAt(0))

        if (isName1UpperCase == isName2UpperCase) {
          name1.compareToIgnoreCase(name2)
        } else {
          if (isName1UpperCase && !isImport) 1 else -1
        }
      }
    } else {
      if (isImport) -1 else 1
    }
  }

  def groups(checker: ConfigurationChecker): Option[Seq[Pattern]] = {
    Try {
      checker.parameters("groups").split(",").toSeq.map { name =>
        Pattern.compile(checker.parameters(s"group.$name"))
      }
    }.toOption
  }

  val nameOrdering: Ordering[String] = Ordering.fromLessThan(compareNames(_, _, isImport = false) < 0)
}

case class ScalastyleSettings(scalastyleOrder: Boolean, groups: Option[Seq[Pattern]]) 
Example 87
Source File: RegexBenchmark.scala    From chronicler   with Apache License 2.0 5 votes vote down vote up
package com.github.fsanaulla.chronicler.benchmark

import java.util.concurrent.TimeUnit
import java.util.regex.Pattern

import com.github.fsanaulla.chronicler.benchmark.RegexBenchmark.CompiledPattern
import com.github.fsanaulla.chronicler.core.regex
import org.openjdk.jmh.annotations._

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MILLISECONDS)
class RegexBenchmark {

  // 10x time faster
  @Benchmark
  def compiledPattern(state: CompiledPattern): Unit =
    state.pattern.matcher("My=, Name").replaceAll("\\\\$1")

  @Benchmark
  def uncompiledPattern(): Unit =
    "My=, Name".replaceAll("([ ,=])", "\\\\$1")
}

object RegexBenchmark {
  @State(Scope.Benchmark)
  class CompiledPattern {
    var pattern: Pattern = _
    @Setup
    def up(): Unit = pattern = regex.tagPattern
    @TearDown
    def close(): Unit = {}
  }
} 
Example 88
Source File: CarbonGlobalDictionaryRDD.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.rdd

import java.util.regex.Pattern

import org.apache.spark.sql.Row

case class DataFormat(delimiters: Array[String],
    var delimiterIndex: Int,
    patterns: Array[Pattern]) extends Serializable {
  self =>
  def getSplits(input: String): Array[String] = {
    // -1 in case after splitting the last column is empty, the surrogate key ahs to be generated
    // for empty value too
    patterns(delimiterIndex).split(input, -1)
  }

  def cloneAndIncreaseIndex: DataFormat = {
    DataFormat(delimiters, Math.min(delimiterIndex + 1, delimiters.length - 1), patterns)
  }
}

class StringArrayRow(var values: Array[String]) extends Row {

  override def length: Int = values.length

  override def get(i: Int): Any = values(i)

  override def getString(i: Int): String = values(i)

  private def reset(): Unit = {
    val len = values.length
    var i = 0
    while (i < len) {
      values(i) = null
      i = i + 1
    }
  }

  override def copy(): Row = {
    val tmpValues = new Array[String](values.length)
    System.arraycopy(values, 0, tmpValues, 0, values.length)
    new StringArrayRow(tmpValues)
  }

  def setValues(values: Array[String]): StringArrayRow = {
    reset()
    if (values != null) {
      val minLength = Math.min(this.values.length, values.length)
      System.arraycopy(values, 0, this.values, 0, minLength)
    }
    this
  }
} 
Example 89
Source File: CodeDumperTests.scala    From codepropertygraph   with Apache License 2.0 5 votes vote down vote up
package io.shiftleft.semanticcpg.codedumper

import java.util.regex.Pattern

import org.scalatest.{Matchers, WordSpec}
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.testfixtures.CodeToCpgFixture

class CodeDumperTests extends WordSpec with Matchers {

  val code = """
                | // A comment
                |int my_func(int param1)
                |{
                |   int x = foo(param1);
                |}""".stripMargin

  CodeToCpgFixture(code) { cpg =>
    "should return empty string for empty traversal" in {
      CodeDumper
        .dump(cpg.method.name("notinthere"), false)
        .mkString("\n") shouldBe ""
    }

    "should be able to dump complete function" in {
      val query = cpg.method.name("my_func")
      val code = CodeDumper.dump(query, false).mkString("\n")
      code should startWith("int my_func")
      code should include("foo(param1)")
      code should endWith("}")
    }

    "should dump method with arrow for expression (a call)" in {
      val query = cpg.call.name("foo")
      val code = CodeDumper.dump(query, false).mkString("\n")
      code should startWith("int")
      code should include regex (".*" + "int x = foo" + ".*" + Pattern.quote(CodeDumper.arrow.toString) + ".*")
      code should endWith("}")
    }

    "methodCode should return nothing on invalid filename" in {
      CodeDumper.code("fooNonexisting", 1, 2) shouldBe ""
    }

    "should allow dumping via .dump" in {
      val code = cpg.method.name("my_func").dumpRaw.mkString("\n")
      code should startWith("int my_func")
    }

    "should allow dumping callIn" in {
      implicit val resolver: ICallResolver = NoResolve
      val code = cpg.method.name("foo").callIn.dumpRaw.mkString("\n")
      code should startWith("int")
    }

  }

} 
Example 90
Source File: FilterJsonLigatures.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.apps

import java.io.File
import java.io.PrintWriter
import java.util.regex.Pattern

import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.Sinker
import org.clulab.wm.eidos.utils.TsvWriter
import org.json4s.DefaultFormats
import org.json4s.JString
import org.json4s.JValue
import org.json4s.jackson.JsonMethods

object FilterJsonLigatures extends App {
  val pattern: Pattern = Pattern.compile("([A-Za-z]+(f([bhkl]|[ft]|[ij])|ij)) ([A-Za-z]+)")

  class Filter(tsvWriter: TsvWriter) {
    implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats

    tsvWriter.println("file", "left", "right")

    def filter(jValue: JValue, inputFile: File): Unit = {
      val extractions: JValue = jValue \ "_source" \ "extracted_text"

      extractions match {
        case text: JString =>
          val matcher = pattern.matcher(text.extract[String])

          while (matcher.find)
            tsvWriter.println(inputFile.getName, matcher.group(1), matcher.group(4))
        case _ => throw new RuntimeException(s"Unexpected extractions value: $extractions")
      }
    }
  }

  val inputDir = args(0)
  val extension = args(1)
  val outputFile = args(2)

  new TsvWriter(Sinker.printWriterFromFile(outputFile)).autoClose { tsvWriter =>
    val filter = new Filter(tsvWriter)
    val inputFiles = FileUtils.findFiles(inputDir, extension)

    inputFiles.sortBy(_.getName).foreach { inputFile =>
      val text = FileUtils.getTextFromFile(inputFile)
      val json = JsonMethods.parse(text)

      filter.filter(json, inputFile)
    }
  }
} 
Example 91
Source File: TelegramBot4sRelease.scala    From telegram   with Apache License 2.0 5 votes vote down vote up
import java.util.regex.Pattern

import sbt._
import sbtrelease.ReleasePlugin.autoImport.ReleaseKeys._
import sbtrelease.ReleasePlugin.autoImport.{ReleaseStep, _}
import sbtrelease.ReleaseStateTransformations._

object TelegramBot4sRelease {

  def steps: Seq[ReleaseStep] = Seq(
    checkSnapshotDependencies,
    inquireVersions,
    // publishing locally so that the pgp password prompt is displayed early
    // in the process
    releaseStepCommand("publishLocalSigned"),
    runClean,
    runTest,
    setReleaseVersion,
    updateVersionInReadme,
    commitReleaseVersion,
    tagRelease,
    publishArtifacts,
    setNextVersion,
    commitNextVersion,
    releaseStepCommand("sonatypeReleaseAll"),
    pushChanges
  )

  private def updateVersionInReadme: ReleaseStep = { s: State =>
    val readmeFile = file("README.md")
    val readme = IO.read(readmeFile)

    val currentVersionPattern = """"info.mukel" %% "telegrambot4s" % "([\w\.-]+)"""".r
    val currentVersionInReadme = currentVersionPattern.findFirstMatchIn(readme).get.group(1)

    val releaseVersion = s.get(versions).get._1

    s.log.info(s"Replacing $currentVersionInReadme with $releaseVersion in ${readmeFile.name}")

    val newReadme = readme.replaceAll(Pattern.quote(currentVersionInReadme), releaseVersion)
    IO.write(readmeFile, newReadme)

    val settings = Project.extract(s)
    settings.get(releaseVcs).get.add(readmeFile.getAbsolutePath) !! s.log

    s
  }
} 
Example 92
Source File: ConfigReader.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.internal.config

import java.util.{Map => JMap}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.matching.Regex

private object ConfigReader {

  private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r

}


  def substitute(input: String): String = substitute(input, Set())

  private def substitute(input: String, usedRefs: Set[String]): String = {
    if (input != null) {
      ConfigReader.REF_RE.replaceAllIn(input, { m =>
        val prefix = m.group(1)
        val name = m.group(2)
        val ref = if (prefix == null) name else s"$prefix:$name"
        require(!usedRefs.contains(ref), s"Circular reference in $input: $ref")

        val replacement = bindings.get(prefix)
          .flatMap(_.get(name))
          .map { v => substitute(v, usedRefs + ref) }
          .getOrElse(m.matched)
        Regex.quoteReplacement(replacement)
      })
    } else {
      input
    }
  }

} 
Example 93
Source File: StringUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
}