Python pygments.lex() Examples
The following are 27
code examples of pygments.lex().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pygments
, or try the search function
.
Example #1
Source File: pygments_code_block_directive.py From rst2pdf with MIT License | 6 votes |
def __iter__(self): """parse code string and yield "clasified" tokens """ try: tokens = self.lex() except IOError: log.info("Pygments lexer not found, using fallback") # TODO: write message to INFO yield ('', self.code) return for ttype, value in self.join(tokens): yield (_get_ttype_class(ttype), value) # code_block_directive # -------------------- # ::
Example #2
Source File: utils.py From aries-cloudagent-python with Apache License 2.0 | 6 votes |
def print_lexer( body: str, lexer: Lexer, label: str = None, prefix: str = None, indent: int = None ): if COLORIZE: prefix_str = prefix + " " if prefix else "" if prefix_str or indent: prefix_body = prefix_str + " " * (indent or 0) lexer.add_filter(PrefixFilter(prefix=prefix_body)) tokens = list(pygments.lex(body, lexer=lexer)) if label: fmt_label = [("fg:ansimagenta", label)] if prefix_str: fmt_label.insert(0, ("", prefix_str)) print_formatted(FormattedText(fmt_label)) print_formatted(PygmentsTokens(tokens)) else: print_ext(body, label=label, prefix=prefix)
Example #3
Source File: snippets.py From diff_cover with Apache License 2.0 | 6 votes |
def _parse_src(cls, src_contents, src_filename): """ Return a stream of `(token_type, value)` tuples parsed from `src_contents` (str) Uses `src_filename` to guess the type of file so it can highlight syntax correctly. """ # Parse the source into tokens try: lexer = guess_lexer_for_filename(src_filename, src_contents) except ClassNotFound: lexer = TextLexer() # Ensure that we don't strip newlines from # the source file when lexing. lexer.stripnl = False return pygments.lex(src_contents, lexer)
Example #4
Source File: disassembly.py From gxf with MIT License | 6 votes |
def __init__(self, disassembly, lexer=lexer, msg=None): self.lines = [] if isinstance(disassembly, list): self.lines = disassembly elif disassembly: line = [] if msg: current_function = msg.rsplit(None, 1)[-1][:-1] else: current_function = None with currentfunctiontfilter.current_function(current_function): for ttype, value in pygments.lex(disassembly, lexer): if '\n' in value: self.lines.append(DisassemblyLine(line)) line = [] else: line.append((ttype, value)) self.linenos = {} for i, line in enumerate(self.lines): self.linenos[line.address] = line, i self.lexer = lexer self.msg = msg
Example #5
Source File: code_manager.py From stata_kernel with GNU General Public License v3.0 | 6 votes |
def tokenize_first_pass(self, code): """Tokenize input code for Comments and Delimit blocks Args: code (str): Input string. Should use `\\n` for end of lines. Return: (List[Tuple[Token, str]]): List of token tuples. The only token types currently used in the lexer are: - Text (plain text) - Comment.Single (// and *) - Comment.Special (///) - Comment.Multiline (/* */) - Keyword.Namespace (code inside #delimit ; block) - Keyword.Reserved (; delimiter) """ comment_lexer = CommentAndDelimitLexer(stripall=False, stripnl=False) return [x for x in lex(code, comment_lexer)]
Example #6
Source File: code_manager.py From stata_kernel with GNU General Public License v3.0 | 6 votes |
def tokenize_second_pass(self, code): """Tokenize clean code for syntactic blocks Args: code (str): Input string. Should have `\\n` as the delimiter. Should have no comments. Should use `\\n` for end of lines. Return: (List[Tuple[Token, str]]): List of token tuples. Some of the token types: lexer are: - Text (plain text) - Comment.Single (// and *) - Comment.Special (///) - Comment.Multiline (/* */) - Keyword.Namespace (code inside #delimit ; block) - Keyword.Reserved (; delimiter) """ block_lexer = StataLexer(stripall=False, stripnl=False) return [x for x in lex(code, block_lexer)]
Example #7
Source File: printers.py From fuzzowski with GNU General Public License v2.0 | 6 votes |
def print_packets(path: list, nodes: dict) -> None: tokens = [] for e in path[:-1]: node = nodes[e.dst] p = node.render() line = '{} = {}'.format(node.name.replace('-', '_'), repr(p)) tokens.extend(list(pygments.lex(line, lexer=Python3Lexer()))) # p = self.fuzz_node.render() node = nodes[path[-1].dst] p = node.render() line = '{} = {}'.format(node.name.replace('-', '_'), repr(p)) print(pygments.highlight(line, Python3Lexer(), Terminal256Formatter(style='rrt'))) # tokens.extend(list(pygments.lex(line, lexer=Python3Lexer()))) # style = style_from_pygments_cls(get_style_by_name('colorful')) # print_formatted_text(PygmentsTokens(tokens), style=style) # --------------------------------------------------------------- #
Example #8
Source File: code_analyzer.py From aws-extender with MIT License | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #9
Source File: pygments_code_block_directive.py From rst2pdf with MIT License | 5 votes |
def lex(self): # Get lexer for language (use text as fallback) try: if self.language and str(self.language).lower() != 'none': lexer = get_lexer_by_name(self.language.lower(), **self.custom_args) else: lexer = get_lexer_by_name('text', **self.custom_args) except ValueError: log.info("no pygments lexer for %s, using 'text'" % self.language) # what happens if pygment isn't present ? lexer = get_lexer_by_name('text') return pygments.lex(self.code, lexer)
Example #10
Source File: code_analyzer.py From aws-builders-fair-projects with Apache License 2.0 | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #11
Source File: printers.py From fuzzowski with GNU General Public License v2.0 | 5 votes |
def print_poc(target: Target, path: list, receive_data_after_each_request, receive_data_after_fuzz) -> None: tokens = [] exploit_code = get_exploit_code(target, path, receive_data_after_each_request, receive_data_after_fuzz) print(pygments.highlight(exploit_code, Python3Lexer(), Terminal256Formatter(style='rrt'))) # tokens.extend(list(pygments.lex(exploit_code, lexer=Python3Lexer()))) # print_formatted_text(PygmentsTokens(tokens)) # --------------------------------------------------------------- #
Example #12
Source File: printers.py From fuzzowski with GNU General Public License v2.0 | 5 votes |
def print_python(path: list) -> None: tokens = [] block_code = path_to_python(path) print(pygments.highlight(block_code, Python3Lexer(), Terminal256Formatter(style='rrt'))) # tokens.extend(list(pygments.lex(block_code, lexer=Python3Lexer()))) # print_formatted_text(PygmentsTokens(tokens)) # --------------------------------------------------------------- #
Example #13
Source File: lexer.py From suplemon with MIT License | 5 votes |
def lex(self, code, lex): """Return tokenified code. Return a list of tuples (scope, word) where word is the word to be printed and scope the scope name representing the context. :param str code: Code to tokenify. :param lex: Lexer to use. :return: """ if lex is None: if not type(code) is str: # if not suitable lexer is found, return decoded code code = code.decode("utf-8") return (("global", code),) words = pygments.lex(code, lex) scopes = [] for word in words: token = word[0] scope = "global" if token in self.token_map.keys(): scope = self.token_map[token] scopes.append((scope, word[1])) return scopes
Example #14
Source File: test__meta.py From libnl with GNU Lesser General Public License v2.1 | 5 votes |
def test_print_hunter(): """Verify that there are no print statements in the codebase.""" root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) assert 'tests' in os.listdir(root_directory) generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory) if '.egg/' not in r and '/.tox/' not in r for s in f if s.endswith('.py') and not s.startswith('example_')) regex_print = re.compile(r'^(.*)(?<!\w)print(\(|\s)(.*)$', re.MULTILINE) # Find all potential prints in Python files. May or may not be in strings. potential_prints = set() for file_path in generator: with open(file_path) as f: for line in f: if regex_print.search(line): potential_prints.add(file_path) break if not potential_prints: return # Perform lexical analysis on the source code and find all valid print statements/function calls. current_line = list() actual_prints = dict() for file_path in potential_prints: with open(file_path) as f: code = f.read(52428800) # Up to 50 MiB. for token, code_piece in lex(code, get_lexer_by_name('Python')): if code_piece == '\n': current_line = list() # References new list, doesn't necessarily remove old list. continue current_line.append(code_piece) if (str(token), code_piece) != ('Token.Keyword', 'print'): continue # If this is reached, there is a print statement in the library! if file_path not in actual_prints: actual_prints[file_path] = list() actual_prints[file_path].append(current_line) # Keeps reference to current list() alive. actual_prints = dict((f, [''.join(l) for l in lst]) for f, lst in actual_prints.items()) assert not actual_prints
Example #15
Source File: pygments-tokens.py From python-prompt-toolkit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(): # Printing a manually constructed list of (Token, text) tuples. text = [ (Token.Keyword, "print"), (Token.Punctuation, "("), (Token.Literal.String.Double, '"'), (Token.Literal.String.Double, "hello"), (Token.Literal.String.Double, '"'), (Token.Punctuation, ")"), (Token.Text, "\n"), ] print_formatted_text(PygmentsTokens(text)) # Printing the output of a pygments lexer. tokens = list(pygments.lex('print("Hello")', lexer=PythonLexer())) print_formatted_text(PygmentsTokens(tokens)) # With a custom style. style = Style.from_dict( { "pygments.keyword": "underline", "pygments.literal.string": "bg:#00ff00 #ffffff", } ) print_formatted_text(PygmentsTokens(tokens), style=style)
Example #16
Source File: test_basic_api.py From pygments with BSD 2-Clause "Simplified" License | 5 votes |
def test_bare_class_handler(): from pygments.formatters import HtmlFormatter from pygments.lexers import PythonLexer try: lex('test\n', PythonLexer) except TypeError as e: assert 'lex() argument must be a lexer instance' in str(e) else: assert False, 'nothing raised' try: format([], HtmlFormatter) except TypeError as e: assert 'format() argument must be a formatter instance' in str(e) else: assert False, 'nothing raised'
Example #17
Source File: code_analyzer.py From faces with GNU General Public License v2.0 | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #18
Source File: code_analyzer.py From blackmamba with MIT License | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #19
Source File: code_analyzer.py From AWS-Transit-Gateway-Demo-MultiAccount with MIT License | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #20
Source File: code_analyzer.py From AWS-Transit-Gateway-Demo-MultiAccount with MIT License | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #21
Source File: code_analyzer.py From cadquery-freecad-module with GNU Lesser General Public License v3.0 | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #22
Source File: code_analyzer.py From bash-lambda-layer with MIT License | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #23
Source File: code_analyzer.py From deepWordBug with Apache License 2.0 | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #24
Source File: code_analyzer.py From faces with GNU General Public License v2.0 | 5 votes |
def __iter__(self): """Parse self.code and yield "classified" tokens. """ if self.lexer is None: yield ([], self.code) return tokens = pygments.lex(self.code, self.lexer) for tokentype, value in self.merge(tokens): if self.tokennames == 'long': # long CSS class args classes = str(tokentype).lower().split('.') else: # short CSS class args classes = [_get_ttype_class(tokentype)] classes = [cls for cls in classes if cls not in unstyled_tokens] yield (classes, value)
Example #25
Source File: test__meta.py From libnl with GNU Lesser General Public License v2.1 | 4 votes |
def test_todo_issue_validator(): """Verify that each T.O.D.O is associated with an open GitHub issue.""" root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) assert 'tests' in os.listdir(root_directory) generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory) if '.tox' not in r for s in f if s.endswith('.py') and not s.startswith('example_')) regex_todo = re.compile(r'^(.*)(?<!\w)(TODO|FIXME)(?!\w)(.*)$', re.IGNORECASE | re.MULTILINE) # Find all potential TODOs in Python files. May or may not be in comments/docstrings. potential_todos = set() for file_path in generator: with open(file_path) as f: for line in f: if regex_todo.search(line): potential_todos.add(file_path) break if not potential_todos: return # Get all open issues. repo_slug = os.environ['TRAVIS_REPO_SLUG'] assert re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$', repo_slug) response = urlopen('https://api.github.com/repos/{0}/issues'.format(repo_slug)) raw_data = response.read().decode('utf-8') parsed_data = json.loads(raw_data) open_issues = set(['issues/{0:d}'.format(int(i.get('number'))) for i in parsed_data if i.get('state') == 'open']) # Perform lexical analysis on the source code and find all docstrings and comments with TODOs. todos_with_no_issues = dict() for file_path in potential_todos: with open(file_path) as f: code = f.read(52428800) # Up to 50 MiB. for token, code_piece in lex(code, get_lexer_by_name('Python')): if str(token) not in ('Token.Comment', 'Token.Literal.String.Doc'): continue if not regex_todo.search(code_piece): continue code_line = ''.join(b for a in regex_todo.findall(code_piece) for b in a) has_issue = bool([i for i in open_issues if i in code_line]) if has_issue: continue # This t.o.d.o has an open issue, skipping. # If this is reached, there is a t.o.d.o without an open issue! if file_path not in todos_with_no_issues: todos_with_no_issues[file_path] = list() todos_with_no_issues[file_path].append(code_line) assert not todos_with_no_issues
Example #26
Source File: bib_manager.py From bibmanager with MIT License | 4 votes |
def display_bibs(labels, bibs, meta=False): r""" Display a list of bib entries on screen with flying colors. Parameters ---------- labels: List of Strings Header labels to show above each Bib() entry. bibs: List of Bib() objects BibTeX entries to display. meta: Bool If True, also display the meta-information. Examples -------- >>> import bibmanager.bib_manager as bm >>> e1 = '''@Misc{JonesEtal2001scipy, author = {Eric Jones and Travis Oliphant and Pearu Peterson}, title = {{SciPy}: Open source scientific tools for {Python}}, year = {2001}, }''' >>> e2 = '''@Misc{Jones2001, author = {Eric Jones and Travis Oliphant and Pearu Peterson}, title = {SciPy: Open source scientific tools for Python}, year = {2001}, }''' >>> bibs = [bm.Bib(e1), bm.Bib(e2)] >>> bm.display_bibs(["DATABASE:\n", "NEW:\n"], bibs) :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: DATABASE: @Misc{JonesEtal2001scipy, author = {Eric Jones and Travis Oliphant and Pearu Peterson}, title = {{SciPy}: Open source scientific tools for {Python}}, year = {2001}, } NEW: @Misc{Jones2001, author = {Eric Jones and Travis Oliphant and Pearu Peterson}, title = {SciPy: Open source scientific tools for Python}, year = {2001}, } """ style = prompt_toolkit.styles.style_from_pygments_cls( pygments.styles.get_style_by_name(cm.get('style'))) if labels is None: labels = ["" for _ in bibs] tokens = [(Token.Comment, u.BANNER)] for label,bib in zip(labels, bibs): tokens += [(Token.Text, label)] if meta: tokens += [(Token.Comment, bib.meta())] tokens += list(pygments.lex(bib.content, lexer=BibTeXLexer())) tokens += [(Token.Text, "\n")] print_formatted_text(PygmentsTokens(tokens), end="", style=style, output=create_output(sys.stdout))
Example #27
Source File: __init__.py From autostack with MIT License | 4 votes |
def print_code_block(code_block): ''' Prints a code block from Stack Overflow with syntax highlighting. On Stack Overflow, the code in a HTML 'code' element contains a 'span' element for each token. Because of this, it's necessary to grab each of the 'code' element's 'span' elements' values to get the actual code. Parameter {bs4.Tag} code_block: 'soup' of a HTML 'code' element from a Stack Overflow post. ''' token_colors = { 'Token.Keyword': 'blue', 'Token.Name.Builtin.Pseudo': 'blue', 'Token.Literal.Number.Integer': 'green', 'Token.Literal.Number.Float': 'green', 'Token.Comment.Single': 'green', 'Token.Comment.Hashbang': 'green', 'Token.Literal.String.Single': 'yellow', 'Token.Literal.String.Double': 'yellow', 'Token.Literal.String.Doc': 'yellow' } print('') # Store the code's text. code = get_src_code(code_block) # Loop over code, and highlight. for token, content in pygments.lex(code, PythonLexer()): try: print( colored(content, token_colors[str(token)]), end='' ) except KeyError: print( content, end='' ) print('')