Python re.U Examples

The following are 30 code examples of re.U(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function .
Example #1
Source File: misc.py    From SEM with MIT License 7 votes vote down vote up
def add_all(self, event):
        if self.frame.current_selection is not None:
            start = self.frame.charindex2position(self.frame.current_selection.lb)
            end = self.frame.charindex2position(self.frame.current_selection.ub)
        else:
            start, end = ("sel.first", "sel.last")
        try:
            target = re.escape(self.frame.text.get(start, end).strip())
            pattern = (u"\\b" if target[0].isalnum() else u"((?<=\\s)|(?<=^))") + target + (u"\\b" if target[-1].isalnum() else u"(?=\\s|$)")
            regex = re.compile(pattern, re.U + re.M)
            for match in regex.finditer(self.frame.doc.content):
                cur_start, cur_end = self.frame.charindex2position(match.start()), self.frame.charindex2position(match.end())
                if Tag(self.type, match.start(), match.end()) not in self.frame.current_annotations:
                    self.frame.wish_to_add = [self.type, cur_start, cur_end]
                    self.frame.add_annotation(None, remove_focus=False)
        except tkinter.TclError:
            raise
        self.frame.type_combos[self.level].current(0)
        self.frame.wish_to_add = None
        self.frame.current_selection = None
        self.frame.current_type_hierarchy_level = 0
        self.frame.update_level()
        self.frame.text.tag_remove("BOLD",  "1.0", 'end') 
Example #2
Source File: test_re.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_ignore_case_set(self):
        self.assertTrue(re.match(r'[19A]', 'A', re.I))
        self.assertTrue(re.match(r'[19a]', 'a', re.I))
        self.assertTrue(re.match(r'[19a]', 'A', re.I))
        self.assertTrue(re.match(r'[19A]', 'a', re.I))
        if have_unicode:
            self.assertTrue(re.match(ur'[19A]', u'A', re.U | re.I))
            self.assertTrue(re.match(ur'[19a]', u'a', re.U | re.I))
            self.assertTrue(re.match(ur'[19a]', u'A', re.U | re.I))
            self.assertTrue(re.match(ur'[19A]', u'a', re.U | re.I))
            assert u(r'\u212a').lower() == u'k' # 'K'
            self.assertTrue(re.match(u(r'[19K]'), u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(u(r'[19k]'), u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u212a]'), u'K', re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u212a]'), u'k', re.U | re.I))
            assert u(r'\u017f').upper() == u'S' # 'ſ'
            self.assertTrue(re.match(ur'[19S]', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(ur'[19s]', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u017f]'), u'S', re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u017f]'), u's', re.U | re.I)) 
Example #3
Source File: tweets_talk.py    From t-hoarder with GNU General Public License v3.0 6 votes vote down vote up
def token_words (source):
  list_words=[]
  source_without_urls=u''
  #renove urls from tweet
  urls=re.findall (r'(http[s]*://\S+)', source,re.U)
  for url in urls:
    start=source.find(url)
    end=len(url)
    source_without_urls=source_without_urls+source[0:start-1]
    source=source[start+end:] 
  source_without_urls=source_without_urls+source
  list_tokens=re.findall (r'[@#]*\w+', source_without_urls,re.U) 
#  remove users and hashtags
  for token in list_tokens:
    if (token.find(u'#') == -1) and (token.find(u'@') == -1):
      token=token.lower()
      list_words.append(token)
  return list_words 
Example #4
Source File: tools.py    From hadrian with Apache License 2.0 6 votes vote down vote up
def getmatch(self, haystack):
        if not isinstance(haystack, basestring):
            return None
        flags = 0
        if self.flags is not None:
            if "i" in self.flags or "I" in self.flags:
                flags |= re.I
            if "l" in self.flags or "L" in self.flags:
                flags |= re.L
            if "m" in self.flags or "M" in self.flags:
                flags |= re.M
            if "s" in self.flags or "S" in self.flags:
                flags |= re.S
            if "u" in self.flags or "U" in self.flags:
                flags |= re.U
            if "x" in self.flags or "X" in self.flags:
                flags |= re.X
        if re.match(self.pattern, haystack, flags=flags) is None:
            return None
        elif self.to is None:
            return Match(haystack, haystack)
        else:
            return Match(haystack, re.sub(self.pattern, self.to, haystack, flags=flags)) 
Example #5
Source File: find_entities.py    From gransk with Apache License 2.0 6 votes vote down vote up
def setup(self, config):
    """
    Compile configured regular expressions.

    :param config: Configuration object.
    :type config: ``dict``
    """
    self.matches = {}

    patterns = []

    for entity_type, pattern_conf in config.get(helper.ENTITIES, {}).items():
      patterns.append(
          r'\b(?P<{}>{})\b'.format(entity_type, pattern_conf[helper.PATTERN]))

    self.pattern = regex.compile(
        '|'.join(patterns),
        regex.I | regex.U) 
Example #6
Source File: tweets_location.py    From t-hoarder with GNU General Public License v3.0 6 votes vote down vote up
def get_tweet (tweet):
   data = tweet.split('\t')
   try:
     id_tweet = data[0]
     timestamp = data[1]
     date_hour =re.findall(r'(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)',timestamp,re.U)
     (year,month,day,hour,minutes,seconds) = date_hour[0]
     author= data[2]
     text = data[3]
     app = data[4]
     user_id = data[6]
     followers=get_number(data[6])
     following=get_number(data[7])
     statuses=get_number(data[8])
     loc = data[9]
     return (year,month,day,hour,minutes,seconds, author,text,app,user_id,followers,following,statuses,loc)
   except:
     print ' tweet not match'
     return None
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# main
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
Example #7
Source File: tasks.py    From django-idcops with Apache License 2.0 6 votes vote down vote up
def get_dell_model(sn, model):
    """
    返回Dell设备SN号对应的设备型号
    """
    query_url = "http://www.dell.com/support/home/cn/zh/cnbsdt1/product-support/servicetag/"
    pattern = "deel|dell|PowerEdge|R7|R8|R6|R4|戴尔"
    if re.findall(pattern, model, re.M | re.I | re.U):
        try:
            url = "{0}{1}".format(query_url, sn)
            response = urllib.request(url, timeout=30)
            html = response.read()
            _model = ''.join(re.findall(r'productName:"(.*?)"', html))
            _code = ''.join(re.findall(
                r'<span class="beforeCaptcha">(.*?)</span>', html))
        except:
            _model = model
            _code = None
    else:
        _model = model
        _code = None
    return _model, _code 
Example #8
Source File: label_consistency.py    From SEM with MIT License 6 votes vote down vote up
def normalize(token):
    apostrophes = re.compile(u"[\u2019]", re.U)
    lower_a = re.compile(u"[àáâãäåæ]", re.U)
    upper_a = re.compile(u"[ÀÁÂÃÄÅÆ]", re.U)
    lower_e = re.compile(u"[éèêë]", re.U)
    upper_e = re.compile(u"[ÉÈÊË]", re.U)
    lower_i = re.compile(u"[ìíîï]", re.U)
    upper_i = re.compile(u"[ÌÍÎÏ]", re.U)
    
    normalized = apostrophes.sub(u"'", token)
    normalized = lower_a.sub(u"a", normalized)
    normalized = upper_a.sub(u"A", normalized)
    normalized = lower_e.sub(u"e", normalized)
    normalized = upper_e.sub(u"E", normalized)
    normalized = lower_i.sub(u"i", normalized)
    normalized = upper_i.sub(u"I", normalized)
    return normalized 
Example #9
Source File: test_re.py    From jawfish with MIT License 6 votes vote down vote up
def test_sre_character_class_literals(self):
        for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
            if i < 256:
                self.assertIsNotNone(re.match(r"[\%o]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\%o8]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\%03o]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\%03o0]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\%03o8]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i)))
            if i < 0x10000:
                self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i)))
                self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i)))
            self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
            self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
            self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
        self.assertIsNotNone(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
        self.assertRaises(re.error, re.match, r"[\911]", "")
        self.assertRaises(re.error, re.match, r"[\x1z]", "")
        self.assertRaises(re.error, re.match, r"[\u123z]", "")
        self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
        self.assertRaises(re.error, re.match, r"[\U00110000]", "") 
Example #10
Source File: fr.py    From SEM with MIT License 6 votes vote down vote up
def __init__(self):
        super(Tokeniser, self).__init__()
        
        self._cls         = re.compile(r"(-je|-tu|-nous|-vous|(:?-t)?-(:?on|ils?|elles?))\b", re.U + re.I)
        self._is_abn      = re.compile(r"\b(dr|me?lles?|mme?s?|mr?s?|st)\b\.?", re.U + re.I)
        self._abbrev      = re.compile(r"\b(i\.e\.|e\.g\.|c-à-d)", re.U + re.I)
        self._digit_valid = set(u"0123456789,.-")
        
        self._forbidden.append(self._is_abn)
        self._forbidden.append(self._abbrev)
        
        self._force.append(self._cls)
        
        self._spaces = re.compile(u"\s+", re.U+re.M)
        self._word = re.compile(u"^[^\W\d]+$", re.U + re.M)
        self._number_with_unit = re.compile(u"([0-9][^0-9,.])|([^0-9,.][0-9])")
        self._atomic = re.compile(u"[;:«»()\\[\\]{}=+*$£€/\\\"?!…%€$£]")
        self._comma_not_number = re.compile(u"(?<=[^0-9]),(?![0-9])", re.U + re.M)
        self._apostrophe = re.compile(u"(?=['ʼ’])", re.U + re.M)
        self._clitics = re.compile(r"(-je|-tu|-nous|-vous|(:?-t)?-(:?on|ils?|elles?))$", re.U + re.I) 
Example #11
Source File: test_re.py    From jawfish with MIT License 6 votes vote down vote up
def test_sre_byte_literals(self):
        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
            self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i])))
            self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0"))
            self.assertIsNotNone(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8"))
            self.assertIsNotNone(re.match((r"\x%02x" % i).encode(), bytes([i])))
            self.assertIsNotNone(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
            self.assertIsNotNone(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
        self.assertIsNotNone(re.match(br"\u", b'u'))
        self.assertIsNotNone(re.match(br"\U", b'U'))
        self.assertIsNotNone(re.match(br"\0", b"\000"))
        self.assertIsNotNone(re.match(br"\08", b"\0008"))
        self.assertIsNotNone(re.match(br"\01", b"\001"))
        self.assertIsNotNone(re.match(br"\018", b"\0018"))
        self.assertIsNotNone(re.match(br"\567", bytes([0o167])))
        self.assertRaises(re.error, re.match, br"\911", b"")
        self.assertRaises(re.error, re.match, br"\x1", b"")
        self.assertRaises(re.error, re.match, br"\x1z", b"") 
Example #12
Source File: test_re.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_sre_byte_class_literals(self):
        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
            self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i])))
            self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i])))
            self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i])))
            self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i])))
            self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i])))
            self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
            self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
            self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
        with self.assertWarns(DeprecationWarning):
            self.assertTrue(re.match(br"[\u1234]", b'u'))
        with self.assertWarns(DeprecationWarning):
            self.assertTrue(re.match(br"[\U00012345]", b'U'))
        self.checkPatternError(br"[\567]",
                               r'octal escape value \567 outside of '
                               r'range 0-0o377', 1)
        self.checkPatternError(br"[\911]", r'bad escape \9', 1)
        self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1) 
Example #13
Source File: test_re.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_getattr(self):
        self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
        self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
        self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
        self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
        self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
                         {'first': 1, 'other': 2})

        self.assertEqual(re.match("(a)", "a").pos, 0)
        self.assertEqual(re.match("(a)", "a").endpos, 1)
        self.assertEqual(re.match("(a)", "a").string, "a")
        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
        self.assertTrue(re.match("(a)", "a").re)

        # Issue 14260. groupindex should be non-modifiable mapping.
        p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')
        self.assertEqual(sorted(p.groupindex), ['first', 'other'])
        self.assertEqual(p.groupindex['other'], 2)
        with self.assertRaises(TypeError):
            p.groupindex['other'] = 0
        self.assertEqual(p.groupindex['other'], 2) 
Example #14
Source File: upload.py    From zulip with Apache License 2.0 6 votes vote down vote up
def sanitize_name(value: str) -> str:
    """
    Sanitizes a value to be safe to store in a Linux filesystem, in
    S3, and in a URL.  So unicode is allowed, but not special
    characters other than ".", "-", and "_".

    This implementation is based on django.utils.text.slugify; it is
    modified by:
    * adding '.' and '_' to the list of allowed characters.
    * preserving the case of the value.
    """
    value = unicodedata.normalize('NFKC', value)
    value = re.sub(r'[^\w\s._-]', '', value, flags=re.U).strip()
    value = re.sub(r'[-\s]+', '-', value, flags=re.U)
    assert value not in {'', '.', '..'}
    return mark_safe(value) 
Example #15
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_ignore_case(self):
        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")

        if have_unicode:
            assert u(r'\u212a').lower() == u'k' # 'K'
            self.assertTrue(re.match(ur'K', u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(ur'k', u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(u(r'\u212a'), u'K', re.U | re.I))
            self.assertTrue(re.match(u(r'\u212a'), u'k', re.U | re.I))
            assert u(r'\u017f').upper() == u'S' # 'ſ'
            self.assertTrue(re.match(ur'S', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(ur's', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(u(r'\u017f'), u'S', re.U | re.I))
            self.assertTrue(re.match(u(r'\u017f'), u's', re.U | re.I)) 
Example #16
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_ignore_case_set(self):
        self.assertTrue(re.match(r'[19A]', 'A', re.I))
        self.assertTrue(re.match(r'[19a]', 'a', re.I))
        self.assertTrue(re.match(r'[19a]', 'A', re.I))
        self.assertTrue(re.match(r'[19A]', 'a', re.I))
        if have_unicode:
            self.assertTrue(re.match(ur'[19A]', u'A', re.U | re.I))
            self.assertTrue(re.match(ur'[19a]', u'a', re.U | re.I))
            self.assertTrue(re.match(ur'[19a]', u'A', re.U | re.I))
            self.assertTrue(re.match(ur'[19A]', u'a', re.U | re.I))
            assert u(r'\u212a').lower() == u'k' # 'K'
            self.assertTrue(re.match(u(r'[19K]'), u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(u(r'[19k]'), u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u212a]'), u'K', re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u212a]'), u'k', re.U | re.I))
            assert u(r'\u017f').upper() == u'S' # 'ſ'
            self.assertTrue(re.match(ur'[19S]', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(ur'[19s]', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u017f]'), u'S', re.U | re.I))
            self.assertTrue(re.match(u(r'[19\u017f]'), u's', re.U | re.I)) 
Example #17
Source File: replacers.py    From fingerprints with MIT License 6 votes vote down vote up
def remove_types(text, clean=clean_strict):
    """Remove company type names from a piece of text.

    WARNING: This converts to ASCII by default, pass in a different
    `clean` function if you need a different behaviour."""
    if not hasattr(remove_types, '_remove'):
        remove_types._remove = {}
    if clean not in remove_types._remove:
        names = set()
        with open(DATA_PATH, 'r') as fh:
            types = json.load(fh).get('types', {})
            # Compile person prefixes into a regular expression.
            for items in types.items():
                for item in items:
                    item = clean(item)
                    if item is not None:
                        names.add(item)
        forms = '(%s)' % '|'.join(names)
        remove_types._remove[clean] = re.compile(forms, re.U)
    text = clean(text)
    if text is not None:
        return remove_types._remove[clean].sub('', text).strip() 
Example #18
Source File: tweets_talk.py    From t-hoarder with GNU General Public License v3.0 6 votes vote down vote up
def get_tweet_source (text):
  source=None
  text_aux=text
  start=text_aux.find('RT')
  while  start !=  -1:
    #print start
    text=text_aux[start:]
    #print text
    RT= re.match('[RT[\s]*(@\w+)[:]*',text,re.U)
    if RT:
      source=RT.group(1)
      text_aux=text[len(RT.group(0)):]
      #print text_aux
      #print source
      start=text_aux.find('RT')
    else:
      break
  return (source, text_aux) 
Example #19
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_cp16657(self):
        self.assertTrue(re.compile(r'^bar', re.M).search('foo\nbar') != None)
        self.assertTrue(re.compile(r'^bar(?m)').search('foo\nbar') != None)
        self.assertTrue(re.compile(r'^bar', re.M).search('foo\nbaar') == None)
        self.assertTrue(re.compile(r'^bar(?m)').search('foo\nbaar') == None)

        self.assertTrue(re.compile(r'^bar', re.U).search('bar') != None)
        self.assertTrue(re.compile(r'^bar(?u)').search('bar') != None)
        self.assertTrue(re.compile(r'^bar', re.U).search('baar') == None)
        self.assertTrue(re.compile(r'^bar(?u)').search('baar') == None)

        self.assertTrue(re.compile(r'     b ar   ', re.X).search('bar') != None)
        self.assertTrue(re.compile(r'b ar(?x)').search('bar') != None)
        self.assertTrue(re.compile(r'     b ar   ', re.X).search('baar') == None)
        self.assertTrue(re.compile(r'b ar(?x)').search('baar') == None)
        self.assertTrue(re.compile(r'b ar').search('bar') == None) 
Example #20
Source File: utils.py    From faces with GNU General Public License v2.0 6 votes vote down vote up
def add_refs(self, data):
        """Modify data according to the expected output."""
        if self.getRefs:
            titl_re = ur'(%s)' % '|'.join([re.escape(x) for x
                                            in self._titlesRefs.keys()])
            if titl_re != ur'()': re_titles = re.compile(titl_re, re.U)
            else: re_titles = None
            nam_re = ur'(%s)' % '|'.join([re.escape(x) for x
                                            in self._namesRefs.keys()])
            if nam_re != ur'()': re_names = re.compile(nam_re, re.U)
            else: re_names = None
            chr_re = ur'(%s)' % '|'.join([re.escape(x) for x
                                            in self._charactersRefs.keys()])
            if chr_re != ur'()': re_characters = re.compile(chr_re, re.U)
            else: re_characters = None
            _putRefs(data, re_titles, re_names, re_characters)
        return {'data': data, 'titlesRefs': self._titlesRefs,
                'namesRefs': self._namesRefs,
                'charactersRefs': self._charactersRefs} 
Example #21
Source File: expression_parser.py    From plaso with Apache License 2.0 6 votes vote down vote up
def __init__(self, state, regex, actions, next_state):
    """Initializes an event filter expressions parser token.

    Args:
      state (str): parser state within the token should be applied or None if
          the token should be applied regardless of the parser state.
      regex (str): regular expression to try and match from the current point.
      actions (list[str]): list of method names in the
          EventFilterExpressionParser to call.
      next_state (str): next state we transition to if this Token matches.
    """
    super(Token, self).__init__()
    self._regex = re.compile(regex, re.DOTALL | re.I | re.M | re.S | re.U)
    self.actions = []
    self.next_state = next_state
    self.state = state

    if actions:
      self.actions = actions.split(self._ACTION_SEPARATOR) 
Example #22
Source File: test_re.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_ignore_case(self):
        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")

        if have_unicode:
            assert u(r'\u212a').lower() == u'k' # 'K'
            self.assertTrue(re.match(ur'K', u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(ur'k', u(r'\u212a'), re.U | re.I))
            self.assertTrue(re.match(u(r'\u212a'), u'K', re.U | re.I))
            self.assertTrue(re.match(u(r'\u212a'), u'k', re.U | re.I))
            assert u(r'\u017f').upper() == u'S' # 'ſ'
            self.assertTrue(re.match(ur'S', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(ur's', u(r'\u017f'), re.U | re.I))
            self.assertTrue(re.match(u(r'\u017f'), u'S', re.U | re.I))
            self.assertTrue(re.match(u(r'\u017f'), u's', re.U | re.I)) 
Example #23
Source File: markdown2zim.py    From markdown2zim with MIT License 5 votes vote down vote up
def _strip_img_definitions(self, text):
        # Strips img definitions from text, stores the URLs and titles in
        # hash references.

        # Link defs are in the form:
        #   ![id]: url "optional title"
        _link_def_re = re.compile(r"""
            ![ ]*\[(.*?)\]     # id = \1
              [ \t]*
            \((.+?)\)           # url = \2
              [ \t]*
            (?:\n+|\Z)
            """, re.X | re.M | re.U | re.S)
        return _link_def_re.sub(self._extract_img_def_sub, text) 
Example #24
Source File: test_re.py    From jawfish with MIT License 5 votes vote down vote up
def test_sre_character_literals(self):
        for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
            if i < 256:
                self.assertIsNotNone(re.match(r"\%03o" % i, chr(i)))
                self.assertIsNotNone(re.match(r"\%03o0" % i, chr(i)+"0"))
                self.assertIsNotNone(re.match(r"\%03o8" % i, chr(i)+"8"))
                self.assertIsNotNone(re.match(r"\x%02x" % i, chr(i)))
                self.assertIsNotNone(re.match(r"\x%02x0" % i, chr(i)+"0"))
                self.assertIsNotNone(re.match(r"\x%02xz" % i, chr(i)+"z"))
            if i < 0x10000:
                self.assertIsNotNone(re.match(r"\u%04x" % i, chr(i)))
                self.assertIsNotNone(re.match(r"\u%04x0" % i, chr(i)+"0"))
                self.assertIsNotNone(re.match(r"\u%04xz" % i, chr(i)+"z"))
            self.assertIsNotNone(re.match(r"\U%08x" % i, chr(i)))
            self.assertIsNotNone(re.match(r"\U%08x0" % i, chr(i)+"0"))
            self.assertIsNotNone(re.match(r"\U%08xz" % i, chr(i)+"z"))
        self.assertIsNotNone(re.match(r"\0", "\000"))
        self.assertIsNotNone(re.match(r"\08", "\0008"))
        self.assertIsNotNone(re.match(r"\01", "\001"))
        self.assertIsNotNone(re.match(r"\018", "\0018"))
        self.assertIsNotNone(re.match(r"\567", chr(0o167)))
        self.assertRaises(re.error, re.match, r"\911", "")
        self.assertRaises(re.error, re.match, r"\x1", "")
        self.assertRaises(re.error, re.match, r"\x1z", "")
        self.assertRaises(re.error, re.match, r"\u123", "")
        self.assertRaises(re.error, re.match, r"\u123z", "")
        self.assertRaises(re.error, re.match, r"\U0001234", "")
        self.assertRaises(re.error, re.match, r"\U0001234z", "")
        self.assertRaises(re.error, re.match, r"\U00110000", "") 
Example #25
Source File: markdown2zim.py    From markdown2zim with MIT License 5 votes vote down vote up
def _strip_link_definitions(self, text):
        # Strips link definitions from text, stores the URLs and titles in
        # hash references.
        less_than_tab = self.tab_width - 1

        # Link defs are in the form:
        #   [id]: url "optional title"
        _link_def_re = re.compile(r"""
            ^[ ]{0,%d}\[(.+)\]: # id = \1
              [ \t]*
              \n?               # maybe *one* newline
              [ \t]*
            <?(.+?)>?           # url = \2
              [ \t]*
            (?:
                \n?             # maybe one newline
                [ \t]*
                (?<=\s)         # lookbehind for whitespace
                ['"(]
                ([^\n]*)        # title = \3
                ['")]
                [ \t]*
            )?  # title is optional
            (?:\n+|\Z)
            """ % less_than_tab, re.X | re.M | re.U)
        return _link_def_re.sub(self._extract_link_def_sub, text) 
Example #26
Source File: rulelang.py    From abusehelper with MIT License 5 votes vote down vote up
def format_regexp(format, regexp):
    escape_slash_rex = re.compile(r"((?:^|[^\\])(?:\\\\)*?)(/+)", re.U)

    def escape_slash(match):
        return match.group(1) + match.group(2).replace("/", "\\/")

    pattern = regexp.pattern
    pattern = escape_slash_rex.sub(escape_slash, pattern)

    result = "/" + pattern + "/"
    if regexp.ignore_case:
        result += "i"
    yield result 
Example #27
Source File: test_atoms.py    From abusehelper with MIT License 5 votes vote down vote up
def test_from_re(self):
        # re.U and re.S flags are implicitly set
        self.assertEqual(RegExp.from_re(re.compile("a", re.U)), RegExp("a"))
        self.assertEqual(RegExp.from_re(re.compile("a", re.S)), RegExp("a"))

        # re.I flag can be set explicitly
        self.assertEqual(
            RegExp.from_re(re.compile("a", re.I)),
            RegExp("a", ignore_case=True))

        # re.M, re.L and re.X are forbidden
        for flag in [re.M, re.L, re.X]:
            self.assertRaises(ValueError, RegExp.from_re, re.compile("a", flag)) 
Example #28
Source File: util.py    From coconut with Apache License 2.0 5 votes vote down vote up
def compile_regex(regex):
    """Compiles the given regex to support unicode."""
    return re.compile(regex, re.U) 
Example #29
Source File: _config.py    From flocker with Apache License 2.0 5 votes vote down vote up
def parse_storage_string(value):
    """
    Converts a string representing a quantity and a unit identifier in to
    an integer value representing the number of bytes in that quantity, e.g.
    an input of "1G" is parsed to 1073741824. Raises ``ValueError`` if
    value cannot be converted.

    An int is always returned, so conversions resulting in a floating-point
    are always rounded UP to ensure sufficient bytes for the specified storage
    size, e.g. input of "2.1M" is converted to 2202010 bytes, not 2202009.

    :param StringTypes value: The string value to convert to integer bytes.

    :returns: ``int`` representing the supplied value converted to bytes, e.g.
        input of "2G" (2 gigabytes) returns 2147483648.
    """
    byte_multipliers = {
        'K': 1024, 'M': 1048576,
        'G': 1073741824, 'T': 1099511627776
    }
    if not isinstance(value, types.StringTypes):
        raise ValueError("Value must be string, got {type}.".format(
            type=type(value).__name__))
    pattern = re.compile("^(\d+\.?\d*)(K|M|G|T)?$", re.I | re.U)
    parsed = pattern.match(value)
    if not parsed:
        raise ValueError(
            "Value '{value}' could not be parsed as a storage quantity.".
            format(value=value)
        )
    quantity, unit = parsed.groups()
    quantity = float(quantity)
    if unit is not None:
        unit = unit.upper()
        quantity = quantity * byte_multipliers[unit]
    quantity = int(math.ceil(quantity))
    return quantity 
Example #30
Source File: util.py    From coconut with Apache License 2.0 5 votes vote down vote up
def keyword(name):
    """Construct a grammar which matches name as a Python keyword."""
    return Regex(name + r"\b", re.U)