Python Examples of pygments.lex

Source File: pygments_code_block_directive.py From rst2pdf with MIT License

6 votes

def __iter__(self):
        """parse code string and yield "clasified" tokens
        """
        try:
            tokens = self.lex()
        except IOError:
            log.info("Pygments lexer not found, using fallback")
            # TODO: write message to INFO
            yield ('', self.code)
            return

        for ttype, value in self.join(tokens):
            yield (_get_ttype_class(ttype), value)


# code_block_directive
# --------------------
# ::

Source File: utils.py From aries-cloudagent-python with Apache License 2.0

6 votes

def print_lexer(
    body: str, lexer: Lexer, label: str = None, prefix: str = None, indent: int = None
):
    if COLORIZE:
        prefix_str = prefix + " " if prefix else ""
        if prefix_str or indent:
            prefix_body = prefix_str + " " * (indent or 0)
            lexer.add_filter(PrefixFilter(prefix=prefix_body))
        tokens = list(pygments.lex(body, lexer=lexer))
        if label:
            fmt_label = [("fg:ansimagenta", label)]
            if prefix_str:
                fmt_label.insert(0, ("", prefix_str))
            print_formatted(FormattedText(fmt_label))
        print_formatted(PygmentsTokens(tokens))
    else:
        print_ext(body, label=label, prefix=prefix)

Source File: snippets.py From diff_cover with Apache License 2.0

6 votes

def _parse_src(cls, src_contents, src_filename):
        """
        Return a stream of `(token_type, value)` tuples
        parsed from `src_contents` (str)

        Uses `src_filename` to guess the type of file
        so it can highlight syntax correctly.
        """

        # Parse the source into tokens
        try:
            lexer = guess_lexer_for_filename(src_filename, src_contents)
        except ClassNotFound:
            lexer = TextLexer()

        # Ensure that we don't strip newlines from
        # the source file when lexing.
        lexer.stripnl = False

        return pygments.lex(src_contents, lexer)

Source File: disassembly.py From gxf with MIT License

6 votes

def __init__(self, disassembly, lexer=lexer, msg=None):

        self.lines = []
        if isinstance(disassembly, list):
            self.lines = disassembly
        elif disassembly:
            line = []
            if msg:
                current_function = msg.rsplit(None, 1)[-1][:-1]
            else:
                current_function = None
            with currentfunctiontfilter.current_function(current_function):
                for ttype, value in pygments.lex(disassembly, lexer):
                    if '\n' in value:
                        self.lines.append(DisassemblyLine(line))
                        line = []
                    else:
                        line.append((ttype, value))

        self.linenos = {}
        for i, line in enumerate(self.lines):
            self.linenos[line.address] = line, i

        self.lexer = lexer
        self.msg = msg

Source File: code_manager.py From stata_kernel with GNU General Public License v3.0

6 votes

def tokenize_first_pass(self, code):
        """Tokenize input code for Comments and Delimit blocks

        Args:
            code (str):
                Input string. Should use `\\n` for end of lines.

        Return:
            (List[Tuple[Token, str]]):
                List of token tuples. The only token types currently used in the
                lexer are:
                - Text (plain text)
                - Comment.Single (// and *)
                - Comment.Special (///)
                - Comment.Multiline (/* */)
                - Keyword.Namespace (code inside #delimit ; block)
                - Keyword.Reserved (; delimiter)
        """
        comment_lexer = CommentAndDelimitLexer(stripall=False, stripnl=False)
        return [x for x in lex(code, comment_lexer)]

Source File: code_manager.py From stata_kernel with GNU General Public License v3.0

6 votes

def tokenize_second_pass(self, code):
        """Tokenize clean code for syntactic blocks

        Args:
            code (str):
                Input string. Should have `\\n` as the delimiter. Should have no
                comments. Should use `\\n` for end of lines.

        Return:
            (List[Tuple[Token, str]]):
                List of token tuples. Some of the token types:
                lexer are:
                - Text (plain text)
                - Comment.Single (// and *)
                - Comment.Special (///)
                - Comment.Multiline (/* */)
                - Keyword.Namespace (code inside #delimit ; block)
                - Keyword.Reserved (; delimiter)
        """
        block_lexer = StataLexer(stripall=False, stripnl=False)
        return [x for x in lex(code, block_lexer)]

Source File: printers.py From fuzzowski with GNU General Public License v2.0

6 votes

def print_packets(path: list, nodes: dict) -> None:
    tokens = []
    for e in path[:-1]:
        node = nodes[e.dst]
        p = node.render()
        line = '{} = {}'.format(node.name.replace('-', '_'), repr(p))
        tokens.extend(list(pygments.lex(line, lexer=Python3Lexer())))

    # p = self.fuzz_node.render()
    node = nodes[path[-1].dst]
    p = node.render()
    line = '{} = {}'.format(node.name.replace('-', '_'), repr(p))

    print(pygments.highlight(line, Python3Lexer(), Terminal256Formatter(style='rrt')))

    # tokens.extend(list(pygments.lex(line, lexer=Python3Lexer())))
    # style = style_from_pygments_cls(get_style_by_name('colorful'))
    # print_formatted_text(PygmentsTokens(tokens), style=style)


# --------------------------------------------------------------- #

Source File: code_analyzer.py From aws-extender with MIT License

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: pygments_code_block_directive.py From rst2pdf with MIT License

5 votes

def lex(self):
        # Get lexer for language (use text as fallback)
        try:
            if self.language and str(self.language).lower() != 'none':
                lexer = get_lexer_by_name(self.language.lower(), **self.custom_args)
            else:
                lexer = get_lexer_by_name('text', **self.custom_args)
        except ValueError:
            log.info("no pygments lexer for %s, using 'text'" % self.language)
            # what happens if pygment isn't present ?
            lexer = get_lexer_by_name('text')
        return pygments.lex(self.code, lexer)

Source File: code_analyzer.py From aws-builders-fair-projects with Apache License 2.0

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: printers.py From fuzzowski with GNU General Public License v2.0

5 votes

def print_poc(target: Target, path: list,
              receive_data_after_each_request, receive_data_after_fuzz) -> None:
    tokens = []

    exploit_code = get_exploit_code(target, path, receive_data_after_each_request, receive_data_after_fuzz)
    print(pygments.highlight(exploit_code, Python3Lexer(), Terminal256Formatter(style='rrt')))

    # tokens.extend(list(pygments.lex(exploit_code, lexer=Python3Lexer())))
    # print_formatted_text(PygmentsTokens(tokens))

# --------------------------------------------------------------- #

Source File: printers.py From fuzzowski with GNU General Public License v2.0

5 votes

def print_python(path: list) -> None:
    tokens = []
    block_code = path_to_python(path)
    print(pygments.highlight(block_code, Python3Lexer(), Terminal256Formatter(style='rrt')))

    # tokens.extend(list(pygments.lex(block_code, lexer=Python3Lexer())))
    # print_formatted_text(PygmentsTokens(tokens))

# --------------------------------------------------------------- #

Source File: lexer.py From suplemon with MIT License

5 votes

def lex(self, code, lex):
        """Return tokenified code.

        Return a list of tuples (scope, word) where word is the word to be
        printed and scope the scope name representing the context.

        :param str code: Code to tokenify.
        :param lex: Lexer to use.
        :return:
        """
        if lex is None:
            if not type(code) is str:
                # if not suitable lexer is found, return decoded code
                code = code.decode("utf-8")
            return (("global", code),)

        words = pygments.lex(code, lex)

        scopes = []
        for word in words:
            token = word[0]
            scope = "global"

            if token in self.token_map.keys():
                scope = self.token_map[token]

            scopes.append((scope, word[1]))
        return scopes

Source File: test__meta.py From libnl with GNU Lesser General Public License v2.1

5 votes

def test_print_hunter():
    """Verify that there are no print statements in the codebase."""
    root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    assert 'tests' in os.listdir(root_directory)
    generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory) if '.egg/' not in r and '/.tox/' not in r
                 for s in f if s.endswith('.py') and not s.startswith('example_'))
    regex_print = re.compile(r'^(.*)(?<!\w)print(\(|\s)(.*)$', re.MULTILINE)

    # Find all potential prints in Python files. May or may not be in strings.
    potential_prints = set()
    for file_path in generator:
        with open(file_path) as f:
            for line in f:
                if regex_print.search(line):
                    potential_prints.add(file_path)
                    break
    if not potential_prints:
        return

    # Perform lexical analysis on the source code and find all valid print statements/function calls.
    current_line = list()
    actual_prints = dict()
    for file_path in potential_prints:
        with open(file_path) as f:
            code = f.read(52428800)  # Up to 50 MiB.
        for token, code_piece in lex(code, get_lexer_by_name('Python')):
            if code_piece == '\n':
                current_line = list()  # References new list, doesn't necessarily remove old list.
                continue
            current_line.append(code_piece)
            if (str(token), code_piece) != ('Token.Keyword', 'print'):
                continue
            # If this is reached, there is a print statement in the library!
            if file_path not in actual_prints:
                actual_prints[file_path] = list()
            actual_prints[file_path].append(current_line)  # Keeps reference to current list() alive.
    actual_prints = dict((f, [''.join(l) for l in lst]) for f, lst in actual_prints.items())
    assert not actual_prints

Source File: pygments-tokens.py From python-prompt-toolkit with BSD 3-Clause "New" or "Revised" License

5 votes

def main():
    # Printing a manually constructed list of (Token, text) tuples.
    text = [
        (Token.Keyword, "print"),
        (Token.Punctuation, "("),
        (Token.Literal.String.Double, '"'),
        (Token.Literal.String.Double, "hello"),
        (Token.Literal.String.Double, '"'),
        (Token.Punctuation, ")"),
        (Token.Text, "\n"),
    ]

    print_formatted_text(PygmentsTokens(text))

    # Printing the output of a pygments lexer.
    tokens = list(pygments.lex('print("Hello")', lexer=PythonLexer()))
    print_formatted_text(PygmentsTokens(tokens))

    # With a custom style.
    style = Style.from_dict(
        {
            "pygments.keyword": "underline",
            "pygments.literal.string": "bg:#00ff00 #ffffff",
        }
    )
    print_formatted_text(PygmentsTokens(tokens), style=style)

Source File: test_basic_api.py From pygments with BSD 2-Clause "Simplified" License

5 votes

def test_bare_class_handler():
    from pygments.formatters import HtmlFormatter
    from pygments.lexers import PythonLexer
    try:
        lex('test\n', PythonLexer)
    except TypeError as e:
        assert 'lex() argument must be a lexer instance' in str(e)
    else:
        assert False, 'nothing raised'
    try:
        format([], HtmlFormatter)
    except TypeError as e:
        assert 'format() argument must be a formatter instance' in str(e)
    else:
        assert False, 'nothing raised'

Source File: code_analyzer.py From faces with GNU General Public License v2.0

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: code_analyzer.py From blackmamba with MIT License

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: code_analyzer.py From AWS-Transit-Gateway-Demo-MultiAccount with MIT License

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: code_analyzer.py From AWS-Transit-Gateway-Demo-MultiAccount with MIT License

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: code_analyzer.py From cadquery-freecad-module with GNU Lesser General Public License v3.0

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: code_analyzer.py From bash-lambda-layer with MIT License

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: code_analyzer.py From deepWordBug with Apache License 2.0

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: code_analyzer.py From faces with GNU General Public License v2.0

5 votes

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

Source File: test__meta.py From libnl with GNU Lesser General Public License v2.1

4 votes

def test_todo_issue_validator():
    """Verify that each T.O.D.O is associated with an open GitHub issue."""
    root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    assert 'tests' in os.listdir(root_directory)
    generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory)
                 if '.tox' not in r
                 for s in f
                 if s.endswith('.py') and not s.startswith('example_'))
    regex_todo = re.compile(r'^(.*)(?<!\w)(TODO|FIXME)(?!\w)(.*)$', re.IGNORECASE | re.MULTILINE)

    # Find all potential TODOs in Python files. May or may not be in comments/docstrings.
    potential_todos = set()
    for file_path in generator:
        with open(file_path) as f:
            for line in f:
                if regex_todo.search(line):
                    potential_todos.add(file_path)
                    break
    if not potential_todos:
        return

    # Get all open issues.
    repo_slug = os.environ['TRAVIS_REPO_SLUG']
    assert re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$', repo_slug)
    response = urlopen('https://api.github.com/repos/{0}/issues'.format(repo_slug))
    raw_data = response.read().decode('utf-8')
    parsed_data = json.loads(raw_data)
    open_issues = set(['issues/{0:d}'.format(int(i.get('number'))) for i in parsed_data if i.get('state') == 'open'])

    # Perform lexical analysis on the source code and find all docstrings and comments with TODOs.
    todos_with_no_issues = dict()
    for file_path in potential_todos:
        with open(file_path) as f:
            code = f.read(52428800)  # Up to 50 MiB.
        for token, code_piece in lex(code, get_lexer_by_name('Python')):
            if str(token) not in ('Token.Comment', 'Token.Literal.String.Doc'):
                continue
            if not regex_todo.search(code_piece):
                continue
            code_line = ''.join(b for a in regex_todo.findall(code_piece) for b in a)
            has_issue = bool([i for i in open_issues if i in code_line])
            if has_issue:
                continue  # This t.o.d.o has an open issue, skipping.
            # If this is reached, there is a t.o.d.o without an open issue!
            if file_path not in todos_with_no_issues:
                todos_with_no_issues[file_path] = list()
            todos_with_no_issues[file_path].append(code_line)
    assert not todos_with_no_issues

Source File: bib_manager.py From bibmanager with MIT License

4 votes

def display_bibs(labels, bibs, meta=False):
  r"""
  Display a list of bib entries on screen with flying colors.

  Parameters
  ----------
  labels: List of Strings
      Header labels to show above each Bib() entry.
  bibs: List of Bib() objects
      BibTeX entries to display.
  meta: Bool
      If True, also display the meta-information.

  Examples
  --------
  >>> import bibmanager.bib_manager as bm
  >>> e1 = '''@Misc{JonesEtal2001scipy,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {{SciPy}: Open source scientific tools for {Python}},
         year   = {2001},
       }'''
  >>> e2 = '''@Misc{Jones2001,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {SciPy: Open source scientific tools for Python},
         year   = {2001},
       }'''
  >>> bibs = [bm.Bib(e1), bm.Bib(e2)]
  >>> bm.display_bibs(["DATABASE:\n", "NEW:\n"], bibs)
  ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  DATABASE:
  @Misc{JonesEtal2001scipy,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {{SciPy}: Open source scientific tools for {Python}},
         year   = {2001},
       }

  NEW:
  @Misc{Jones2001,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {SciPy: Open source scientific tools for Python},
         year   = {2001},
       }
  """
  style = prompt_toolkit.styles.style_from_pygments_cls(
              pygments.styles.get_style_by_name(cm.get('style')))
  if labels is None:
      labels = ["" for _ in bibs]
  tokens = [(Token.Comment, u.BANNER)]
  for label,bib in zip(labels, bibs):
      tokens += [(Token.Text, label)]
      if meta:
          tokens += [(Token.Comment, bib.meta())]
      tokens += list(pygments.lex(bib.content, lexer=BibTeXLexer()))
      tokens += [(Token.Text, "\n")]
  print_formatted_text(PygmentsTokens(tokens), end="", style=style,
      output=create_output(sys.stdout))

Source File: __init__.py From autostack with MIT License

4 votes

def print_code_block(code_block):
    '''
    Prints a code block from Stack Overflow with syntax highlighting.

    On Stack Overflow, the code in a HTML 'code' element contains
    a 'span' element for each token. Because of this, it's necessary
    to grab each of the 'code' element's 'span' elements' values to get
    the actual code.

    Parameter {bs4.Tag} code_block: 'soup' of a HTML
    'code' element from a Stack Overflow post.
    '''

    token_colors = {
        'Token.Keyword': 'blue',
        'Token.Name.Builtin.Pseudo': 'blue',
        'Token.Literal.Number.Integer': 'green',
        'Token.Literal.Number.Float': 'green',
        'Token.Comment.Single': 'green',
        'Token.Comment.Hashbang': 'green',
        'Token.Literal.String.Single': 'yellow',
        'Token.Literal.String.Double': 'yellow',
        'Token.Literal.String.Doc': 'yellow'
    }

    print('')

    # Store the code's text.
    code = get_src_code(code_block)

    # Loop over code, and highlight.
    for token, content in pygments.lex(code, PythonLexer()):
        try:
            print(
                colored(content, token_colors[str(token)]),
                end=''
            )
        except KeyError:
            print(
                content,
                end=''
            )

    print('')

Python pygments.lex() Examples