Python regex.VERBOSE Examples

The following are 3 code examples of regex.VERBOSE(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module regex , or try the search function

Example #1

Source File: parsed_text_corrector.py From lexpredict-contraxsuite with GNU Affero General Public License v3.0

5 votes

def setup_parser():
        from lexnlp.extract.en.amounts import CURRENCY_SYMBOL_MAP
        symbols = '|'.join([k for k in CURRENCY_SYMBOL_MAP]).replace('$', r'\$')
        ParsedTextCorrector.PATTERN_MONEY_BREAK = ParsedTextCorrector.PATTERN_MONEY_BREAK.format(symbols=symbols)
        ParsedTextCorrector.REGEX_MONEY_BREAK = re.compile(
            ParsedTextCorrector.PATTERN_MONEY_BREAK,
            re.IGNORECASE | re.DOTALL | re.MULTILINE | re.VERBOSE | re.UNICODE)

Example #2

Source File: tokenizer.py From segtok with MIT License

5 votes

def _matches(regex):
    """Regular expression compiling function decorator."""
    def match_decorator(fn):
        automaton = compile(regex, UNICODE | VERBOSE)
        fn.split = automaton.split
        fn.match = automaton.match
        return fn

    return match_decorator

Example #3

Source File: autosum_arxiv.py From autosum with MIT License

5 votes

def split_sentences(text):
    """Returns split sentences list
       Reference:
       http://stackoverflow.com/questions/8465335/a-regex-for-extracting-
              sentence-from-a-paragraph-in-python
    """
    sentenceEnders = regex.compile(r"""
        # Split sentences on whitespace between them.
        (?:               # Group for two positive lookbehinds.
          (?<=[.!?])      # Either an end of sentence punct,
        | (?<=[.!?]['"])  # or end of sentence punct and quote.
        )                 # End group of two positive lookbehinds.
        (?<!  Mr\.   )    # Don't end sentence on "Mr."
        (?<!  Mrs\.  )    # Don't end sentence on "Mrs."
        (?<!  Jr\.   )    # Don't end sentence on "Jr."
        (?<!  Dr\.   )    # Don't end sentence on "Dr."
        (?<!  Prof\. )    # Don't end sentence on "Prof."
        (?<!  Sr\.   )    # Don't end sentence on "Sr."
        (?<!  Sen\.  )
        (?<!  Ms\.   )
        (?<!  Rep\.  )
        (?<!  Gov\.  )
        (?<!  et\ al\.  )
        (?<!  i\.e\.  )
        (?<!  U\.S\.  )
        (?<!  p\.  )      # Don't end sentence on "p." (page)
        \s+               # Split on whitespace between sentences.
        """, regex.IGNORECASE | regex.VERBOSE)
    sentenceList = sentenceEnders.split(text)
    return sentenceList