java.util.regex.PatternSyntaxException Scala Examples

The following examples show how to use java.util.regex.PatternSyntaxException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: StringUtils.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 2
Source File: PipeReaderGoldstandardClusterOutput.scala    From sddf   with GNU General Public License v3.0 5 votes vote down vote up
package de.unihamburg.vsis.sddf.reading.goldstandard

import java.util.regex.PatternSyntaxException

import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions

import de.unihamburg.vsis.sddf.SddfContext.rddToRdd
import de.unihamburg.vsis.sddf.pipe.PipeElement
import de.unihamburg.vsis.sddf.pipe.context.AbstractPipeContext
import de.unihamburg.vsis.sddf.reading.IdConverter
import de.unihamburg.vsis.sddf.reading.IdConverterBasic
import de.unihamburg.vsis.sddf.reading.SymPair


class PipeReaderGoldstandardClusterOutput(
  separator: Char = ',',
  clusterIdIndex: Int = 0,
  tupleIdIndex: Int = 1,
  idConverter: IdConverter = IdConverterBasic)
  extends PipeElement[RDD[String], RDD[Seq[Long]]] {

  override def step(inputRdd: RDD[String])(implicit pipeContext: AbstractPipeContext): RDD[Seq[Long]] = {
    // parse tuple ids
    val clusterIdTupleIdRdd = inputRdd.map(line => {
      val parts = line.split(separator)
      val tupleId = idConverter.convert(parts(tupleIdIndex).replaceAll("[^0-9]",""))
      val clusterId = idConverter.convert(parts(clusterIdIndex).replaceAll("[^0-9]",""))
      (clusterId, tupleId)
    })
    clusterIdTupleIdRdd.groupByKey().map(_._2.toSeq)
  }

}

object PipeReaderGoldstandardClusterOutput {
  
  def apply(
      separator: Char = ',',
      clusterIdIndex: Int = 0,
      tupleIdIndex: Int = 1,
      idConverter: IdConverter = IdConverterBasic) = {
    new PipeReaderGoldstandardClusterOutput(separator, clusterIdIndex, tupleIdIndex, idConverter)
  }

} 
Example 3
Source File: StringUtils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 4
Source File: YamlProjectOperationInfoParser.scala    From rug   with GNU General Public License v3.0 5 votes vote down vote up
package com.atomist.project.common.yaml

import java.util.regex.{Pattern, PatternSyntaxException}

import com.atomist.param._
import com.atomist.project.common.template.{InvalidTemplateException, TemplateBasedProjectOperationInfo}
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
import org.apache.commons.lang3.builder.ReflectionToStringBuilder

import scala.util.{Failure, Success, Try}


object YamlProjectOperationInfoParser {

  private val mapper = new ObjectMapper(new YAMLFactory()) with ScalaObjectMapper
  mapper.registerModule(DefaultScalaModule)

  @throws[InvalidYamlDescriptorException]
  def parse(yaml: String): TemplateBasedProjectOperationInfo = {
    if (yaml == null || "".equals(yaml))
      throw new InvalidYamlDescriptorException("YAML content required in template metadata file")

    Try(mapper.readValue(yaml, classOf[BoundProjectOperationInfo])) match {
      case s: Success[BoundProjectOperationInfo] =>
        val badPatterns = s.value.parameters.flatMap(p => patternError(p))
        if (badPatterns.nonEmpty)
          throw new InvalidYamlDescriptorException(s"Bad regexp patterns: ${badPatterns.mkString(",")}")
        s.value
      case f: Failure[BoundProjectOperationInfo] =>
        throw new InvalidYamlDescriptorException(s"Failed to parse YAML [$yaml]: ${f.exception.getMessage}", f.exception)
    }
  }

  private def patternError(p: Parameter): Option[String] = {
    try {
      Pattern.compile(p.getPattern)
      None
    } catch {
      case pse: PatternSyntaxException => Some(s"${p.getName}: Bad regular expression pattern: ${pse.getMessage}")
    }
  }
}

private class BoundProjectOperationInfo extends TemplateBasedProjectOperationInfo {

  @JsonProperty("name")
  var name: String = _

  @JsonProperty("description")
  var description: String = _

  @JsonProperty("template_name")
  var templateName: String = _

  @JsonProperty("type")
  var _templateType: String = _

  override def templateType: Option[String] =
    if (_templateType == null || "".equals(_templateType)) None
    else Some(_templateType)

  @JsonProperty("parameters")
  private var _params: Seq[Parameter] = Nil

  @JsonProperty("tags")
  private var _tags: Seq[TagHolder] = Nil

  override def parameters: Seq[Parameter] = _params

  override def tags: Seq[Tag] = _tags.map(tw => tw.toTag)

  override def toString = ReflectionToStringBuilder.toString(this)
}

private class TagHolder {

  @JsonProperty
  var name: String = _

  @JsonProperty
  var description: String = _

  def toTag = Tag(name, description)
}

class InvalidYamlDescriptorException(msg: String, ex: Throwable = null) extends InvalidTemplateException(msg, ex) 
Example 5
Source File: StringUtils.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 6
Source File: PatternChecker.scala    From incubator-daffodil   with Apache License 2.0 5 votes vote down vote up
package org.apache.daffodil.grammar.primitives

import org.apache.daffodil.exceptions.SavesErrorsAndWarnings
import java.util.regex.Pattern
import org.apache.daffodil.util.Misc
import java.util.regex.PatternSyntaxException
import org.apache.daffodil.api.WarnID


object PatternChecker {
  def checkPattern(pattern: String,
    context: SavesErrorsAndWarnings): Unit = {
    try {
      val pat = Pattern.compile(pattern)
      val m1 = pat.matcher("")
      val m2 = pat.matcher("\uFFFE") // obscure enough?
      if (m1.matches() && m2.lookingAt() && m2.group().length() == 0) {
        // the pattern will match with zero length, anything or nothing
        // This is a flawed pattern for an assert and dubious
        // generally. The pattern should have to match something.
        val needCDATA =
          if (pattern.startsWith("(?x)") &&
            !pattern.contains("\n") &&
            pattern.contains("#")) {
            // it's free form regex notation
            // it's all on one line,
            // and it contains a comment (# to end of line)
            // Almost guaranteed you are missing a CDATA wrapper.
            "\nMissing <![CDATA[...]]> around the regular expression." +
              "\nThis is required for free-form regular expression syntax with comments."
          } else ""
        context.SDW(WarnID.RegexPatternZeroLength, "Regular expression pattern '%s'.\n" +
          "This pattern will match with zero length, so it can always match.%s", pattern, needCDATA)
      }
    } catch {
      case e: PatternSyntaxException => {
        context.SDE("Invalid regular expression pattern '%s'.\nReason: %s.", pattern, Misc.getSomeMessage(e).get)
      }
    }
  }
} 
Example 7
Source File: StringUtils.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  // replace the _ with .{1} exactly match 1 time of any character
  // replace the % with .*, match 0 or more times with any character
  def escapeLikeRegex(v: String): String = {
    if (!v.isEmpty) {
      "(?s)" + (' ' +: v.init).zip(v).flatMap {
        case (prev, '\\') => ""
        case ('\\', c) =>
          c match {
            case '_' => "_"
            case '%' => "%"
            case _ => Pattern.quote("\\" + c)
          }
        case (prev, c) =>
          c match {
            case '_' => "."
            case '%' => ".*"
            case _ => Pattern.quote(Character.toString(c))
          }
      }.mkString
    } else {
      v
    }
  }

  private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
  private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)

  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 8
Source File: StringUtils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.util

import java.util.regex.{Pattern, PatternSyntaxException}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.unsafe.types.UTF8String

object StringUtils {

  
  def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
    val funcNames = scala.collection.mutable.SortedSet.empty[String]
    pattern.trim().split("\\|").foreach { subPattern =>
      try {
        val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
        funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
      } catch {
        case _: PatternSyntaxException =>
      }
    }
    funcNames.toSeq
  }
} 
Example 9
Source File: Constraint.scala    From zipkin-mesos-framework   with Apache License 2.0 5 votes vote down vote up
package net.elodina.mesos.zipkin.components

import java.util.regex.{Pattern, PatternSyntaxException}

import net.elodina.mesos.zipkin.utils.Util

import scala.util.Try

trait Constraint {
  def matches(value: String, values: List[String] = Nil): Boolean
}

object Constraint {
  def apply(value: String): Constraint = {
    if (value.startsWith("like:")) Constraint.Like(value.substring("like:".length))
    else if (value.startsWith("unlike:")) Constraint.Like(value.substring("unlike:".length), negated = true)
    else if (value == "unique") Constraint.Unique()
    else if (value.startsWith("cluster")) {
      val tail = value.substring("cluster".length)
      val cluster = if (tail.startsWith(":")) Some(tail.substring(1)) else None
      Cluster(cluster)
    } else if (value.startsWith("groupBy")) {
      val tail = value.substring("groupBy".length)
      val groups = if (tail.startsWith(":")) Try(tail.substring(1).toInt).toOption.getOrElse(throw new IllegalArgumentException(s"invalid condition $value"))
      else 1

      GroupBy(groups)
    }
    else throw new IllegalArgumentException(s"Unsupported condition: $value")
  }

  def parse(constraints: String): Map[String, List[Constraint]] = {
    Util.parseList(constraints).foldLeft[Map[String, List[Constraint]]](Map()) { case (all, (name, value)) =>
      all.get(name) match {
        case Some(values) => all.updated(name, Constraint(value) :: values)
        case None => all.updated(name, List(Constraint(value)))
      }
    }
  }

  case class Like(regex: String, negated: Boolean = false) extends Constraint {
    val pattern = try {
      Pattern.compile(s"^$regex$$")
    } catch {
      case e: PatternSyntaxException => throw new IllegalArgumentException(s"Invalid $name: ${e.getMessage}")
    }

    private def name: String = if (negated) "unlike" else "like"

    def matches(value: String, values: List[String]): Boolean = negated ^ pattern.matcher(value).find()

    override def toString: String = s"$name:$regex"
  }

  case class Unique() extends Constraint {
    def matches(value: String, values: List[String]): Boolean = !values.contains(value)

    override def toString: String = "unique"
  }

  case class Cluster(value: Option[String]) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = this.value match {
      case Some(v) => v == value
      case None => values.isEmpty || values.head == value
    }

    override def toString: String = "cluster" + value.map(":" + _).getOrElse("")
  }

  case class GroupBy(groups: Int = 1) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = {
      val counts = values.groupBy(identity).mapValues(_.size)
      if (counts.size < groups) !counts.contains(value)
      else {
        val minCount = counts.values.reduceOption(_ min _).getOrElse(0)
        counts.getOrElse(value, 0) == minCount
      }
    }

    override def toString: String = "groupBy" + (if (groups > 1) s":$groups" else "")
  }

} 
Example 10
Source File: Constraint.scala    From exhibitor-mesos-framework   with Apache License 2.0 5 votes vote down vote up
package ly.stealth.mesos.exhibitor

import java.util.regex.{Pattern, PatternSyntaxException}

import scala.util.Try

trait Constraint {
  def matches(value: String, values: List[String] = Nil): Boolean
}

object Constraint {
  def apply(value: String): Constraint = {
    if (value.startsWith("like:")) Constraint.Like(value.substring("like:".length))
    else if (value.startsWith("unlike:")) Constraint.Like(value.substring("unlike:".length), negated = true)
    else if (value == "unique") Constraint.Unique()
    else if (value.startsWith("cluster")) {
      val tail = value.substring("cluster".length)
      val cluster = if (tail.startsWith(":")) Some(tail.substring(1)) else None
      Cluster(cluster)
    } else if (value.startsWith("groupBy")) {
      val tail = value.substring("groupBy".length)
      val groups = if (tail.startsWith(":")) Try(tail.substring(1).toInt).toOption.getOrElse(throw new IllegalArgumentException(s"invalid condition $value"))
      else 1

      GroupBy(groups)
    }
    else throw new IllegalArgumentException(s"Unsupported condition: $value")
  }

  def parse(constraints: String): Map[String, List[Constraint]] = {
    Util.parseList(constraints).foldLeft[Map[String, List[Constraint]]](Map()) { case (all, (name, value)) =>
      all.get(name) match {
        case Some(values) => all.updated(name, Constraint(value) :: values)
        case None => all.updated(name, List(Constraint(value)))
      }
    }
  }

  case class Like(regex: String, negated: Boolean = false) extends Constraint {
    val pattern = try {
      Pattern.compile(s"^$regex$$")
    } catch {
      case e: PatternSyntaxException => throw new IllegalArgumentException(s"Invalid $name: ${e.getMessage}")
    }

    private def name: String = if (negated) "unlike" else "like"

    def matches(value: String, values: List[String]): Boolean = negated ^ pattern.matcher(value).find()

    override def toString: String = s"$name:$regex"
  }

  case class Unique() extends Constraint {
    def matches(value: String, values: List[String]): Boolean = !values.contains(value)

    override def toString: String = "unique"
  }

  case class Cluster(value: Option[String]) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = this.value match {
      case Some(v) => v == value
      case None => values.isEmpty || values.head == value
    }

    override def toString: String = "cluster" + value.map(":" + _).getOrElse("")
  }

  case class GroupBy(groups: Int = 1) extends Constraint {
    def matches(value: String, values: List[String]): Boolean = {
      val counts = values.groupBy(identity).mapValues(_.size)
      if (counts.size < groups) !counts.contains(value)
      else {
        val minCount = counts.values.reduceOption(_ min _).getOrElse(0)
        counts.getOrElse(value, 0) == minCount
      }
    }

    override def toString: String = "groupBy" + (if (groups > 1) s":$groups" else "")
  }

}