Python re.X Examples

The following are 30 code examples of re.X(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function .
Example #1
Source File: markdown2.py    From dl with Apache License 2.0 6 votes vote down vote up
def _do_code_blocks(self, text):
        """Process Markdown `<pre><code>` blocks."""
        code_block_re = re.compile(r'''
            (?:\n\n|\A\n?)
            (               # $1 = the code block -- one or more lines, starting with a space/tab
              (?:
                (?:[ ]{%d} | \t)  # Lines must start with a tab or a tab-width of spaces
                .*\n+
              )+
            )
            ((?=^[ ]{0,%d}\S)|\Z)   # Lookahead for non-space at line-start, or end of doc
            # Lookahead to make sure this block isn't already in a code block.
            # Needed when syntax highlighting is being used.
            (?![^<]*\</code\>)
            ''' % (self.tab_width, self.tab_width),
            re.M | re.X)
        return code_block_re.sub(self._code_block_sub, text) 
Example #2
Source File: tools.py    From hadrian with Apache License 2.0 6 votes vote down vote up
def getmatch(self, haystack):
        if not isinstance(haystack, basestring):
            return None
        flags = 0
        if self.flags is not None:
            if "i" in self.flags or "I" in self.flags:
                flags |= re.I
            if "l" in self.flags or "L" in self.flags:
                flags |= re.L
            if "m" in self.flags or "M" in self.flags:
                flags |= re.M
            if "s" in self.flags or "S" in self.flags:
                flags |= re.S
            if "u" in self.flags or "U" in self.flags:
                flags |= re.U
            if "x" in self.flags or "X" in self.flags:
                flags |= re.X
        if re.match(self.pattern, haystack, flags=flags) is None:
            return None
        elif self.to is None:
            return Match(haystack, haystack)
        else:
            return Match(haystack, re.sub(self.pattern, self.to, haystack, flags=flags)) 
Example #3
Source File: markdown2.py    From dl with Apache License 2.0 6 votes vote down vote up
def _xml_oneliner_re_from_tab_width(tab_width):
    """Standalone XML processing instruction regex."""
    return re.compile(r"""
        (?:
            (?<=\n\n)       # Starting after a blank line
            |               # or
            \A\n?           # the beginning of the doc
        )
        (                           # save in $1
            [ ]{0,%d}
            (?:
                <\?\w+\b\s+.*?\?>   # XML processing instruction
                |
                <\w+:\w+\b\s+.*?/>  # namespaced single tag
            )
            [ \t]*
            (?=\n{2,}|\Z)       # followed by a blank line or end of document
        )
        """ % (tab_width - 1), re.X) 
Example #4
Source File: lexer.py    From teleport with Apache License 2.0 6 votes vote down vote up
def match_text(self):
        match = self.match(r"""
                (.*?)         # anything, followed by:
                (
                 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
                                             # comment preceded by a
                                             # consumed newline and whitespace
                 |
                 (?=\${)      # an expression
                 |
                 (?=</?[%&])  # a substitution or block or call start or end
                              # - don't consume
                 |
                 (\\\r?\n)    # an escaped newline  - throw away
                 |
                 \Z           # end of string
                )""", re.X | re.S)

        if match:
            text = match.group(1)
            if text:
                self.append_node(parsetree.Text, text)
            return True
        else:
            return False 
Example #5
Source File: markdown2.py    From dl with Apache License 2.0 6 votes vote down vote up
def _hr_tag_re_from_tab_width(tab_width):
    return re.compile(r"""
        (?:
            (?<=\n\n)       # Starting after a blank line
            |               # or
            \A\n?           # the beginning of the doc
        )
        (                       # save in \1
            [ ]{0,%d}
            <(hr)               # start tag = \2
            \b                  # word break
            ([^<>])*?           #
            /?>                 # the matching end tag
            [ \t]*
            (?=\n{2,}|\Z)       # followed by a blank line or end of document
        )
        """ % (tab_width - 1), re.X) 
Example #6
Source File: markdown2.py    From dl with Apache License 2.0 6 votes vote down vote up
def _xml_oneliner_re_from_tab_width(tab_width):
    """Standalone XML processing instruction regex."""
    return re.compile(r"""
        (?:
            (?<=\n\n)       # Starting after a blank line
            |               # or
            \A\n?           # the beginning of the doc
        )
        (                           # save in $1
            [ ]{0,%d}
            (?:
                <\?\w+\b\s+.*?\?>   # XML processing instruction
                |
                <\w+:\w+\b\s+.*?/>  # namespaced single tag
            )
            [ \t]*
            (?=\n{2,}|\Z)       # followed by a blank line or end of document
        )
        """ % (tab_width - 1), re.X) 
Example #7
Source File: markdown2.py    From dl with Apache License 2.0 6 votes vote down vote up
def _do_code_blocks(self, text):
        """Process Markdown `<pre><code>` blocks."""
        code_block_re = re.compile(r'''
            (?:\n\n|\A\n?)
            (               # $1 = the code block -- one or more lines, starting with a space/tab
              (?:
                (?:[ ]{%d} | \t)  # Lines must start with a tab or a tab-width of spaces
                .*\n+
              )+
            )
            ((?=^[ ]{0,%d}\S)|\Z)   # Lookahead for non-space at line-start, or end of doc
            # Lookahead to make sure this block isn't already in a code block.
            # Needed when syntax highlighting is being used.
            (?![^<]*\</code\>)
            ''' % (self.tab_width, self.tab_width),
            re.M | re.X)
        return code_block_re.sub(self._code_block_sub, text) 
Example #8
Source File: core.py    From teleport with Apache License 2.0 6 votes vote down vote up
def __bytes__(self):
        """
        :return:
            A byte string
        """

        if self.contents is None:
            return b''
        if self._bytes is None:
            if not self._indefinite:
                self._bytes, self._unused_bits = self._as_chunk()[0]
            else:
                chunks = self._merge_chunks()
                self._unused_bits = ()
                for chunk in chunks:
                    if self._unused_bits:
                        # Disallowed by X.690 §8.6.4
                        raise ValueError('Only last chunk in a bit string may have unused bits')
                    self._unused_bits = chunk[1]
                self._bytes = b''.join(chunk[0] for chunk in chunks)

        return self._bytes 
Example #9
Source File: utils.py    From schemathesis with MIT License 6 votes vote down vote up
def make_loader(*tags_to_remove: str) -> Type[yaml.SafeLoader]:
    """Create a YAML loader, that doesn't parse specific tokens into Python objects."""
    cls: Type[yaml.SafeLoader] = type("YAMLLoader", (SafeLoader,), {})
    cls.yaml_implicit_resolvers = {
        key: [(tag, regexp) for tag, regexp in mapping if tag not in tags_to_remove]
        for key, mapping in cls.yaml_implicit_resolvers.copy().items()
    }

    # Fix pyyaml scientific notation parse bug
    # See PR: https://github.com/yaml/pyyaml/pull/174 for upstream fix
    cls.add_implicit_resolver(  # type: ignore
        "tag:yaml.org,2002:float",
        re.compile(
            r"""^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+]?[0-9]+)?
                       |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
                       |\.[0-9_]+(?:[eE][-+]?[0-9]+)?
                       |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
                       |[-+]?\.(?:inf|Inf|INF)
                       |\.(?:nan|NaN|NAN))$""",
            re.X,
        ),
        list("-+0123456789."),
    )

    return cls 
Example #10
Source File: config.py    From Pixel2MeshPlusPlus with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def parse_args(parser):
    args = parser.parse_args()
    if args.config_file:
        loader = yaml.SafeLoader
        loader.add_implicit_resolver(
            u'tag:yaml.org,2002:float',
            re.compile(u'''^(?:
        [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
        |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
        |\\.[0-9_]+(?:[eE][-+][0-9]+)?
        |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
        |[-+]?\\.(?:inf|Inf|INF)
        |\\.(?:nan|NaN|NAN))$''', re.X),
            list(u'-+0123456789.'))
        data = yaml.load(args.config_file, Loader=loader)
        delattr(args, 'config_file')
        arg_dict = args.__dict__
        # print(len(list(arg_dict.keys())))
        # print(len(list(data.keys())))
        for key, value in arg_dict.items():
            default_arg = parser.get_default(key)
            if arg_dict[key] == default_arg and key in data:
                arg_dict[key] = data[key]
    return args 
Example #11
Source File: lexer.py    From jbox with MIT License 6 votes vote down vote up
def match_text(self):
        match = self.match(r"""
                (.*?)         # anything, followed by:
                (
                 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
                                             # comment preceded by a
                                             # consumed newline and whitespace
                 |
                 (?=\${)      # an expression
                 |
                 (?=</?[%&])  # a substitution or block or call start or end
                              # - don't consume
                 |
                 (\\\r?\n)    # an escaped newline  - throw away
                 |
                 \Z           # end of string
                )""", re.X | re.S)

        if match:
            text = match.group(1)
            if text:
                self.append_node(parsetree.Text, text)
            return True
        else:
            return False 
Example #12
Source File: markdown2.py    From python-webapp-blog with GNU General Public License v3.0 6 votes vote down vote up
def _xml_oneliner_re_from_tab_width(tab_width):
    """Standalone XML processing instruction regex."""
    return re.compile(r"""
        (?:
            (?<=\n\n)       # Starting after a blank line
            |               # or
            \A\n?           # the beginning of the doc
        )
        (                           # save in $1
            [ ]{0,%d}
            (?:
                <\?\w+\b\s+.*?\?>   # XML processing instruction
                |
                <\w+:\w+\b\s+.*?/>  # namespaced single tag
            )
            [ \t]*
            (?=\n{2,}|\Z)       # followed by a blank line or end of document
        )
        """ % (tab_width - 1), re.X) 
Example #13
Source File: markdown2.py    From python-webapp-blog with GNU General Public License v3.0 6 votes vote down vote up
def _hr_tag_re_from_tab_width(tab_width):
    return re.compile(r"""
        (?:
            (?<=\n\n)       # Starting after a blank line
            |               # or
            \A\n?           # the beginning of the doc
        )
        (                       # save in \1
            [ ]{0,%d}
            <(hr)               # start tag = \2
            \b                  # word break
            ([^<>])*?           #
            /?>                 # the matching end tag
            [ \t]*
            (?=\n{2,}|\Z)       # followed by a blank line or end of document
        )
        """ % (tab_width - 1), re.X) 
Example #14
Source File: zim2markdown.py    From markdown2zim with MIT License 5 votes vote down vote up
def _strip_footnote_definitions(self, text):
        """A footnote definition looks like this:

            [^note-id]: Text of the note.

                May include one or more indented paragraphs.

        Where,
        - The 'note-id' can be pretty much anything, though typically it
          is the number of the footnote.
        - The first paragraph may start on the next line, like so:

            [^note-id]:
                Text of the note.
        """
        less_than_tab = self.tab_width - 1
        footnote_def_re = re.compile(r'''
            ^[ ]{0,%d}\[\^(.+)\]:   # id = \1
            [ \t]*
            (                       # footnote text = \2
              # First line need not start with the spaces.
              (?:\s*.*\n+)
              (?:
                (?:[ ]{%d} | \t)  # Subsequent lines must be indented.
                .*\n+
              )*
            )
            # Lookahead for non-space at line-start, or end of doc.
            (?:(?=^[ ]{0,%d}\S)|\Z)
            ''' % (less_than_tab, self.tab_width, self.tab_width),
            re.X | re.M)
        return footnote_def_re.sub(self._extract_footnote_def_sub, text) 
Example #15
Source File: markdown2.py    From python-webapp-blog with GNU General Public License v3.0 5 votes vote down vote up
def _do_wiki_tables(self, text):
        # Optimization.
        if "||" not in text:
            return text

        less_than_tab = self.tab_width - 1
        wiki_table_re = re.compile(r'''
            (?:(?<=\n\n)|\A\n?)            # leading blank line
            ^([ ]{0,%d})\|\|.+?\|\|[ ]*\n  # first line
            (^\1\|\|.+?\|\|\n)*        # any number of subsequent lines
            ''' % less_than_tab, re.M | re.X)
        return wiki_table_re.sub(self._wiki_table_sub, text) 
Example #16
Source File: filters.py    From teleport with Apache License 2.0 5 votes vote down vote up
def __escape(self, m):
        codepoint = ord(m.group())
        try:
            return self.codepoint2entity[codepoint]
        except (KeyError, IndexError):
            return '&#x%X;' % codepoint 
Example #17
Source File: yaml_patch.py    From pyGeoStatistics with MIT License 5 votes vote down vote up
def loader_patched():
    loader = yaml.SafeLoader
    loader.add_implicit_resolver(
        u'tag:yaml.org,2002:float',
        re.compile(u'''^(?:
        [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
        |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
        |\\.[0-9_]+(?:[eE][-+][0-9]+)?
        |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
        |[-+]?\\.(?:inf|Inf|INF)
        |\\.(?:nan|NaN|NAN))$''', re.X),
        list(u'-+0123456789.'))
    return loader 
Example #18
Source File: test_atoms.py    From abusehelper with MIT License 5 votes vote down vote up
def test_from_re(self):
        # re.U and re.S flags are implicitly set
        self.assertEqual(RegExp.from_re(re.compile("a", re.U)), RegExp("a"))
        self.assertEqual(RegExp.from_re(re.compile("a", re.S)), RegExp("a"))

        # re.I flag can be set explicitly
        self.assertEqual(
            RegExp.from_re(re.compile("a", re.I)),
            RegExp("a", ignore_case=True))

        # re.M, re.L and re.X are forbidden
        for flag in [re.M, re.L, re.X]:
            self.assertRaises(ValueError, RegExp.from_re, re.compile("a", flag)) 
Example #19
Source File: markdown2zim.py    From markdown2zim with MIT License 5 votes vote down vote up
def _strip_img_definitions(self, text):
        # Strips img definitions from text, stores the URLs and titles in
        # hash references.

        # Link defs are in the form:
        #   ![id]: url "optional title"
        _link_def_re = re.compile(r"""
            ![ ]*\[(.*?)\]     # id = \1
              [ \t]*
            \((.+?)\)           # url = \2
              [ \t]*
            (?:\n+|\Z)
            """, re.X | re.M | re.U | re.S)
        return _link_def_re.sub(self._extract_img_def_sub, text) 
Example #20
Source File: markdown2zim.py    From markdown2zim with MIT License 5 votes vote down vote up
def _strip_link_definitions(self, text):
        # Strips link definitions from text, stores the URLs and titles in
        # hash references.
        less_than_tab = self.tab_width - 1

        # Link defs are in the form:
        #   [id]: url "optional title"
        _link_def_re = re.compile(r"""
            ^[ ]{0,%d}\[(.+)\]: # id = \1
              [ \t]*
              \n?               # maybe *one* newline
              [ \t]*
            <?(.+?)>?           # url = \2
              [ \t]*
            (?:
                \n?             # maybe one newline
                [ \t]*
                (?<=\s)         # lookbehind for whitespace
                ['"(]
                ([^\n]*)        # title = \3
                ['")]
                [ \t]*
            )?  # title is optional
            (?:\n+|\Z)
            """ % less_than_tab, re.X | re.M | re.U)
        return _link_def_re.sub(self._extract_link_def_sub, text) 
Example #21
Source File: filters.py    From teleport with Apache License 2.0 5 votes vote down vote up
def __escape(self, m):
        codepoint = ord(m.group())
        try:
            return self.codepoint2entity[codepoint]
        except (KeyError, IndexError):
            return "&#x%X;" % codepoint 
Example #22
Source File: core.py    From teleport with Apache License 2.0 5 votes vote down vote up
def _chunks_to_int(self):
        """
        Combines the chunks into a single value.

        :raises:
            ValueError - when an invalid value is passed

        :return:
            A tuple (value, bits, unused_bits) where value is an integer with the
            value of the BitString, bits is the bit count of value and unused_bits
            is a tuple of 1s and 0s.
        """

        if not self._indefinite:
            # Fast path
            return self._as_chunk()[0]

        value = 0
        total_bits = 0
        unused_bits = ()

        # X.690 §8.6.3 allows empty indefinite encodings
        for chunk, bits, unused_bits in self._merge_chunks():
            if total_bits & 7:
                # Disallowed by X.690 §8.6.4
                raise ValueError('Only last chunk in a bit string may have unused bits')
            total_bits += bits
            value = (value << bits) | chunk

        return value, total_bits, unused_bits 
Example #23
Source File: markdown2.py    From python-webapp-blog with GNU General Public License v3.0 5 votes vote down vote up
def _strip_link_definitions(self, text):
        # Strips link definitions from text, stores the URLs and titles in
        # hash references.
        less_than_tab = self.tab_width - 1

        # Link defs are in the form:
        #   [id]: url "optional title"
        _link_def_re = re.compile(r"""
            ^[ ]{0,%d}\[(.+)\]: # id = \1
              [ \t]*
              \n?               # maybe *one* newline
              [ \t]*
            <?(.+?)>?           # url = \2
              [ \t]*
            (?:
                \n?             # maybe one newline
                [ \t]*
                (?<=\s)         # lookbehind for whitespace
                ['"(]
                ([^\n]*)        # title = \3
                ['")]
                [ \t]*
            )?  # title is optional
            (?:\n+|\Z)
            """ % less_than_tab, re.X | re.M | re.U)
        return _link_def_re.sub(self._extract_link_def_sub, text) 
Example #24
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertNotEqual(re.compile('^pattern$', flag), None) 
Example #25
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example #26
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_basic_re_sub(self):
        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
                         '9.3 -3 24x100y')
        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
                         '9.3 -3 23x99y')

        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')

        s = r"\1\1"
        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)

        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')

        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))

        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest') 
Example #27
Source File: base64unpack.py    From msticpy with MIT License 5 votes vote down vote up
def _is_known_b64_prefix(
    input_string: str,
) -> Union[Tuple[str, str], Tuple[None, None]]:
    """If this is known file type return the prefix and file type."""
    first160chars = input_string[0:160].replace("\n", "").replace("\r", "")
    for prefix, file_type in _BASE64_HEADER_TYPES.items():
        if first160chars.startswith(prefix):
            return prefix, file_type

    for matching_string, file_type in _BASE64_HEADER_OFFSET_TYPES.items():
        regex_comp = re.compile(matching_string, re.I | re.X)
        if regex_comp.search(first160chars):
            return matching_string, file_type
    return None, None 
Example #28
Source File: iocextract.py    From msticpy with MIT License 5 votes vote down vote up
def _compile_regex(regex):
    return re.compile(regex, re.I | re.X | re.M) 
Example #29
Source File: core.py    From teleport with Apache License 2.0 5 votes vote down vote up
def _as_chunk(self):
        """
        Parse the contents of a primitive BitString encoding as an integer value.
        Allows reconstructing indefinite length values.

        :raises:
            ValueError - when an invalid value is passed

        :return:
            A list with one tuple (value, bits, unused_bits) where value is an integer
            with the value of the BitString, bits is the bit count of value and
            unused_bits is a tuple of 1s and 0s.
        """

        if self._indefinite:
            # return an empty chunk, for cases like \x23\x80\x00\x00
            return []

        unused_bits_len = ord(self.contents[0]) if _PY2 else self.contents[0]
        value = int_from_bytes(self.contents[1:])
        bits = (len(self.contents) - 1) * 8

        if not unused_bits_len:
            return [(value, bits, ())]

        if len(self.contents) == 1:
            # Disallowed by X.690 §8.6.2.3
            raise ValueError('Empty bit string has {0} unused bits'.format(unused_bits_len))

        if unused_bits_len > 7:
            # Disallowed by X.690 §8.6.2.2
            raise ValueError('Bit string has {0} unused bits'.format(unused_bits_len))

        unused_bits = _int_to_bit_tuple(value & ((1 << unused_bits_len) - 1), unused_bits_len)
        value >>= unused_bits_len
        bits -= unused_bits_len

        return [(value, bits, unused_bits)] 
Example #30
Source File: markdown2.py    From dl with Apache License 2.0 5 votes vote down vote up
def _strip_link_definitions(self, text):
        # Strips link definitions from text, stores the URLs and titles in
        # hash references.
        less_than_tab = self.tab_width - 1

        # Link defs are in the form:
        #   [id]: url "optional title"
        _link_def_re = re.compile(r"""
            ^[ ]{0,%d}\[(.+)\]: # id = \1
              [ \t]*
              \n?               # maybe *one* newline
              [ \t]*
            <?(.+?)>?           # url = \2
              [ \t]*
            (?:
                \n?             # maybe one newline
                [ \t]*
                (?<=\s)         # lookbehind for whitespace
                ['"(]
                ([^\n]*)        # title = \3
                ['")]
                [ \t]*
            )?  # title is optional
            (?:\n+|\Z)
            """ % less_than_tab, re.X | re.M | re.U)
        return _link_def_re.sub(self._extract_link_def_sub, text)