Python tokenize.generate_tokens() Examples
The following are 30
code examples of tokenize.generate_tokens().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tokenize
, or try the search function
.
Example #1
Source File: cgitb.py From BinderFilter with MIT License | 6 votes |
def scanvars(reader, frame, locals): """Scan one logical line of Python and look up values of variables used.""" vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ for ttype, token, start, end, line in tokenize.generate_tokens(reader): if ttype == tokenize.NEWLINE: break if ttype == tokenize.NAME and token not in keyword.kwlist: if lasttoken == '.': if parent is not __UNDEF__: value = getattr(parent, token, __UNDEF__) vars.append((prefix + token, prefix, value)) else: where, value = lookup(token, frame, locals) vars.append((token, where, value)) elif token == '.': prefix += lasttoken + '.' parent = value else: parent, prefix = None, '' lasttoken = token return vars
Example #2
Source File: trace.py From Computable with MIT License | 6 votes |
def find_strings(filename): """Return a dict of possible docstring positions. The dict maps line numbers to strings. There is an entry for line that contains only a string or a part of a triple-quoted string. """ d = {} # If the first token is a string, then it's the module docstring. # Add this special case so that the test in the loop passes. prev_ttype = token.INDENT f = open(filename) for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline): if ttype == token.STRING: if prev_ttype == token.INDENT: sline, scol = start eline, ecol = end for i in range(sline, eline + 1): d[i] = 1 prev_ttype = ttype f.close() return d
Example #3
Source File: autopep8.py From python-netsurv with MIT License | 6 votes |
def fix_e402(self, result): (line_index, offset, target) = get_index_offset_contents(result, self.source) for i in range(1, 100): line = "".join(self.source[line_index:line_index+i]) try: generate_tokens("".join(line)) except (SyntaxError, tokenize.TokenError): continue break if not (target in self.imports and self.imports[target] != line_index): mod_offset = get_module_imports_on_top_of_file(self.source, line_index) self.source[mod_offset] = line + self.source[mod_offset] for offset in range(i): self.source[line_index+offset] = ''
Example #4
Source File: importer.py From importmagic with BSD 2-Clause "Simplified" License | 6 votes |
def _parse(self, source): reader = StringIO(source) # parse until EOF or TokenError (allows incomplete modules) tokens = [] try: tokens.extend(tokenize.generate_tokens(reader.readline)) except tokenize.TokenError: # TokenError happens always at EOF, for unclosed strings or brackets. # We don't care about that here, since we still can recover the whole # source code. pass self._tokens = tokens it = Iterator(self._tokens) self._imports_begin, self._imports_end = self._find_import_range(it) it = Iterator(self._tokens, start=self._imports_begin, end=self._imports_end) self._parse_imports(it)
Example #5
Source File: trace.py From meddle with MIT License | 6 votes |
def find_strings(filename): """Return a dict of possible docstring positions. The dict maps line numbers to strings. There is an entry for line that contains only a string or a part of a triple-quoted string. """ d = {} # If the first token is a string, then it's the module docstring. # Add this special case so that the test in the loop passes. prev_ttype = token.INDENT f = open(filename) for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline): if ttype == token.STRING: if prev_ttype == token.INDENT: sline, scol = start eline, ecol = end for i in range(sline, eline + 1): d[i] = 1 prev_ttype = ttype f.close() return d
Example #6
Source File: cgitb.py From meddle with MIT License | 6 votes |
def scanvars(reader, frame, locals): """Scan one logical line of Python and look up values of variables used.""" vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ for ttype, token, start, end, line in tokenize.generate_tokens(reader): if ttype == tokenize.NEWLINE: break if ttype == tokenize.NAME and token not in keyword.kwlist: if lasttoken == '.': if parent is not __UNDEF__: value = getattr(parent, token, __UNDEF__) vars.append((prefix + token, prefix, value)) else: where, value = lookup(token, frame, locals) vars.append((token, where, value)) elif token == '.': prefix += lasttoken + '.' parent = value else: parent, prefix = None, '' lasttoken = token return vars
Example #7
Source File: __init__.py From pyta with GNU General Public License v3.0 | 6 votes |
def _verify_pre_check(filepath): """Check student code for certain issues.""" # Make sure the program doesn't crash for students. # Could use some improvement for better logging and error reporting. try: # Check for inline "pylint:" comment, which may indicate a student # trying to disable a check. with tokenize.open(os.path.expanduser(filepath)) as f: for tok_type, content, _, _, _ in tokenize.generate_tokens(f.readline): if tok_type != tokenize.COMMENT: continue match = pylint.constants.OPTION_RGX.search(content) if match is not None: print('[ERROR] String "pylint:" found in comment. ' + 'No check run on file `{}.`\n'.format(filepath)) return False except IndentationError as e: print('[ERROR] python_ta could not check your code due to an ' + 'indentation error at line {}.'.format(e.lineno)) return False except tokenize.TokenError as e: print('[ERROR] python_ta could not check your code due to a ' + 'syntax error in your file.') return False return True
Example #8
Source File: autopep8.py From python-netsurv with MIT License | 6 votes |
def _find_logical(source_lines): # Make a variable which is the index of all the starts of lines. logical_start = [] logical_end = [] last_newline = True parens = 0 for t in generate_tokens(''.join(source_lines)): if t[0] in [tokenize.COMMENT, tokenize.DEDENT, tokenize.INDENT, tokenize.NL, tokenize.ENDMARKER]: continue if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]: last_newline = True logical_end.append((t[3][0] - 1, t[2][1])) continue if last_newline and not parens: logical_start.append((t[2][0] - 1, t[2][1])) last_newline = False if t[0] == tokenize.OP: if t[1] in '([{': parens += 1 elif t[1] in '}])': parens -= 1 return (logical_start, logical_end)
Example #9
Source File: trace.py From ironpython2 with Apache License 2.0 | 6 votes |
def find_strings(filename): """Return a dict of possible docstring positions. The dict maps line numbers to strings. There is an entry for line that contains only a string or a part of a triple-quoted string. """ d = {} # If the first token is a string, then it's the module docstring. # Add this special case so that the test in the loop passes. prev_ttype = token.INDENT f = open(filename) for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline): if ttype == token.STRING: if prev_ttype == token.INDENT: sline, scol = start eline, ecol = end for i in range(sline, eline + 1): d[i] = 1 prev_ttype = ttype f.close() return d
Example #10
Source File: cgitb.py From ironpython2 with Apache License 2.0 | 6 votes |
def scanvars(reader, frame, locals): """Scan one logical line of Python and look up values of variables used.""" vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ for ttype, token, start, end, line in tokenize.generate_tokens(reader): if ttype == tokenize.NEWLINE: break if ttype == tokenize.NAME and token not in keyword.kwlist: if lasttoken == '.': if parent is not __UNDEF__: value = getattr(parent, token, __UNDEF__) vars.append((prefix + token, prefix, value)) else: where, value = lookup(token, frame, locals) vars.append((token, where, value)) elif token == '.': prefix += lasttoken + '.' parent = value else: parent, prefix = None, '' lasttoken = token return vars
Example #11
Source File: cgitb.py From Computable with MIT License | 6 votes |
def scanvars(reader, frame, locals): """Scan one logical line of Python and look up values of variables used.""" vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ for ttype, token, start, end, line in tokenize.generate_tokens(reader): if ttype == tokenize.NEWLINE: break if ttype == tokenize.NAME and token not in keyword.kwlist: if lasttoken == '.': if parent is not __UNDEF__: value = getattr(parent, token, __UNDEF__) vars.append((prefix + token, prefix, value)) else: where, value = lookup(token, frame, locals) vars.append((token, where, value)) elif token == '.': prefix += lasttoken + '.' parent = value else: parent, prefix = None, '' lasttoken = token return vars
Example #12
Source File: autopep8.py From python-netsurv with MIT License | 6 votes |
def fix_e402(self, result): (line_index, offset, target) = get_index_offset_contents(result, self.source) for i in range(1, 100): line = "".join(self.source[line_index:line_index+i]) try: generate_tokens("".join(line)) except (SyntaxError, tokenize.TokenError): continue break if not (target in self.imports and self.imports[target] != line_index): mod_offset = get_module_imports_on_top_of_file(self.source, line_index) self.source[mod_offset] = line + self.source[mod_offset] for offset in range(i): self.source[line_index+offset] = ''
Example #13
Source File: processor.py From linter-pylama with MIT License | 6 votes |
def file_tokens(self): """The complete set of tokens for a file. Accessing this attribute *may* raise an InvalidSyntax exception. :raises: flake8.exceptions.InvalidSyntax """ if self._file_tokens is None: line_iter = iter(self.lines) try: self._file_tokens = list(tokenize.generate_tokens( lambda: next(line_iter) )) except tokenize.TokenError as exc: raise exceptions.InvalidSyntax(exc.message, exception=exc) return self._file_tokens
Example #14
Source File: check_whitespace.py From D-VAE with MIT License | 6 votes |
def get_parse_error(code): """ Checks code for ambiguous tabs or other basic parsing issues. :param code: a string containing a file's worth of Python code :returns: a string containing a description of the first parse error encountered, or None if the code is ok """ # note that this uses non-public elements from stdlib's tabnanny, because tabnanny # is (very frustratingly) written only to be used as a script, but using it that way # in this context requires writing temporarily files, running subprocesses, blah blah blah code_buffer = StringIO(code) try: tabnanny.process_tokens(tokenize.generate_tokens(code_buffer.readline)) except tokenize.TokenError as err: return "Could not parse code: %s" % err except IndentationError as err: return "Indentation error: %s" % err except tabnanny.NannyNag as err: return "Ambiguous tab at line %d; line is '%s'." % (err.get_lineno(), err.get_line()) return None
Example #15
Source File: trace.py From BinderFilter with MIT License | 6 votes |
def find_strings(filename): """Return a dict of possible docstring positions. The dict maps line numbers to strings. There is an entry for line that contains only a string or a part of a triple-quoted string. """ d = {} # If the first token is a string, then it's the module docstring. # Add this special case so that the test in the loop passes. prev_ttype = token.INDENT f = open(filename) for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline): if ttype == token.STRING: if prev_ttype == token.INDENT: sline, scol = start eline, ecol = end for i in range(sline, eline + 1): d[i] = 1 prev_ttype = ttype f.close() return d
Example #16
Source File: source.py From py with MIT License | 6 votes |
def deindent(lines, offset=None): if offset is None: for line in lines: line = line.expandtabs() s = line.lstrip() if s: offset = len(line)-len(s) break else: offset = 0 if offset == 0: return list(lines) newlines = [] def readline_generator(lines): for line in lines: yield line + '\n' while True: yield '' it = readline_generator(lines) try: for _, _, (sline, _), (eline, _), _ in tokenize.generate_tokens(lambda: next(it)): if sline > len(lines): break # End of input reached if sline > len(newlines): line = lines[sline - 1].expandtabs() if line.lstrip() and line[:offset].isspace(): line = line[offset:] # Deindent newlines.append(line) for i in range(sline, eline): # Don't deindent continuing lines of # multiline tokens (i.e. multiline strings) newlines.append(lines[i]) except (IndentationError, tokenize.TokenError): pass # Add any lines we didn't see. E.g. if an exception was raised. newlines.extend(lines[len(newlines):]) return newlines
Example #17
Source File: utils.py From typed-argument-parser with MIT License | 5 votes |
def tokenize_source(obj: object) -> Generator: """Returns a generator for the tokens of the object's source code.""" source = inspect.getsource(obj) token_generator = tokenize.generate_tokens(StringIO(source).readline) return token_generator
Example #18
Source File: tokens.py From vnpy_crypto with MIT License | 5 votes |
def normalize_token_spacing(code): tokens = [(t[0], t[1]) for t in tokenize.generate_tokens(StringIO(code).readline)] return pretty_untokenize(tokens)
Example #19
Source File: tokens.py From vnpy_crypto with MIT License | 5 votes |
def python_tokenize(code): # Since formulas can only contain Python expressions, and Python # expressions cannot meaningfully contain newlines, we'll just remove all # the newlines up front to avoid any complications: code = code.replace("\n", " ").strip() it = tokenize.generate_tokens(StringIO(code).readline) try: for (pytype, string, (_, start), (_, end), code) in it: if pytype == tokenize.ENDMARKER: break origin = Origin(code, start, end) assert pytype not in (tokenize.NL, tokenize.NEWLINE) if pytype == tokenize.ERRORTOKEN: raise PatsyError("error tokenizing input " "(maybe an unclosed string?)", origin) if pytype == tokenize.COMMENT: raise PatsyError("comments are not allowed", origin) yield (pytype, string, origin) else: # pragma: no cover raise ValueError("stream ended without ENDMARKER?!?") except tokenize.TokenError as e: # TokenError is raised iff the tokenizer thinks that there is # some sort of multi-line construct in progress (e.g., an # unclosed parentheses, which in Python lets a virtual line # continue past the end of the physical line), and it hits the # end of the source text. We have our own error handling for # such cases, so just treat this as an end-of-stream. # # Just in case someone adds some other error case: assert e.args[0].startswith("EOF in multi-line") return
Example #20
Source File: expr.py From vnpy_crypto with MIT License | 5 votes |
def tokenize_string(source): """Tokenize a Python source code string. Parameters ---------- source : str A Python source code string """ line_reader = StringIO(source).readline for toknum, tokval, _, _, _ in tokenize.generate_tokens(line_reader): yield toknum, tokval
Example #21
Source File: template.py From nightmare with GNU General Public License v2.0 | 5 votes |
def __init__(self, text): self.text = text readline = iter([text]).next self.tokens = tokenize.generate_tokens(readline) self.index = 0
Example #22
Source File: source.py From python-netsurv with MIT License | 5 votes |
def deindent(lines, offset=None): if offset is None: for line in lines: line = line.expandtabs() s = line.lstrip() if s: offset = len(line)-len(s) break else: offset = 0 if offset == 0: return list(lines) newlines = [] def readline_generator(lines): for line in lines: yield line + '\n' while True: yield '' it = readline_generator(lines) try: for _, _, (sline, _), (eline, _), _ in tokenize.generate_tokens(lambda: next(it)): if sline > len(lines): break # End of input reached if sline > len(newlines): line = lines[sline - 1].expandtabs() if line.lstrip() and line[:offset].isspace(): line = line[offset:] # Deindent newlines.append(line) for i in range(sline, eline): # Don't deindent continuing lines of # multiline tokens (i.e. multiline strings) newlines.append(lines[i]) except (IndentationError, tokenize.TokenError): pass # Add any lines we didn't see. E.g. if an exception was raised. newlines.extend(lines[len(newlines):]) return newlines
Example #23
Source File: autopep8.py From python-netsurv with MIT License | 5 votes |
def fix_w605(self, result): (line_index, _, target) = get_index_offset_contents(result, self.source) try: tokens = list(generate_tokens(target)) except (SyntaxError, tokenize.TokenError): return for (pos, _msg) in get_w605_position(tokens): self.source[line_index] = '{}r{}'.format( target[:pos], target[pos:])
Example #24
Source File: autopep8.py From python-netsurv with MIT License | 5 votes |
def join_logical_line(logical_line): """Return single line based on logical line input.""" indentation = _get_indentation(logical_line) return indentation + untokenize_without_newlines( generate_tokens(logical_line.lstrip())) + '\n'
Example #25
Source File: pycodestyle.py From python-netsurv with MIT License | 5 votes |
def generate_tokens(self): """Tokenize file, run physical line checks and yield tokens.""" if self._io_error: self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) tokengen = tokenize.generate_tokens(self.readline) try: for token in tokengen: if token[2][0] > self.total_lines: return self.noqa = token[4] and noqa(token[4]) self.maybe_check_physical(token) yield token except (SyntaxError, tokenize.TokenError): self.report_invalid_syntax()
Example #26
Source File: autopep8.py From python-netsurv with MIT License | 5 votes |
def _get_indentword(source): """Return indentation type.""" indent_word = ' ' # Default in case source has no indentation try: for t in generate_tokens(source): if t[0] == token.INDENT: indent_word = t[1] break except (SyntaxError, tokenize.TokenError): pass return indent_word
Example #27
Source File: autopep8.py From python-netsurv with MIT License | 5 votes |
def multiline_string_lines(source, include_docstrings=False): """Return line numbers that are within multiline strings. The line numbers are indexed at 1. Docstrings are ignored. """ line_numbers = set() previous_token_type = '' try: for t in generate_tokens(source): token_type = t[0] start_row = t[2][0] end_row = t[3][0] if token_type == tokenize.STRING and start_row != end_row: if ( include_docstrings or previous_token_type != tokenize.INDENT ): # We increment by one since we want the contents of the # string. line_numbers |= set(range(1 + start_row, 1 + end_row)) previous_token_type = token_type except (SyntaxError, tokenize.TokenError): pass return line_numbers
Example #28
Source File: source.py From python-netsurv with MIT License | 5 votes |
def getstatementrange_ast(lineno, source, assertion=False, astnode=None): if astnode is None: content = str(source) # See #4260: # don't produce duplicate warnings when compiling source to find ast with warnings.catch_warnings(): warnings.simplefilter("ignore") astnode = compile(content, "source", "exec", _AST_FLAG) start, end = get_statement_startend2(lineno, astnode) # we need to correct the end: # - ast-parsing strips comments # - there might be empty lines # - we might have lesser indented code blocks at the end if end is None: end = len(source.lines) if end > start + 1: # make sure we don't span differently indented code blocks # by using the BlockFinder helper used which inspect.getsource() uses itself block_finder = inspect.BlockFinder() # if we start with an indented line, put blockfinder to "started" mode block_finder.started = source.lines[start][0].isspace() it = ((x + "\n") for x in source.lines[start:end]) try: for tok in tokenize.generate_tokens(lambda: next(it)): block_finder.tokeneater(*tok) except (inspect.EndOfBlock, IndentationError): end = block_finder.last + start except Exception: pass # the end might still point to a comment or empty line, correct it while end: line = source.lines[end - 1].lstrip() if line.startswith("#") or not line: end -= 1 else: break return astnode, start, end
Example #29
Source File: autopep8.py From python-netsurv with MIT License | 5 votes |
def generate_tokens(self, text): """A stand-in for tokenize.generate_tokens().""" if text != self.last_text: string_io = io.StringIO(text) self.last_tokens = list( tokenize.generate_tokens(string_io.readline) ) self.last_text = text return self.last_tokens
Example #30
Source File: autopep8.py From python-netsurv with MIT License | 5 votes |
def commented_out_code_lines(source): """Return line numbers of comments that are likely code. Commented-out code is bad practice, but modifying it just adds even more clutter. """ line_numbers = [] try: for t in generate_tokens(source): token_type = t[0] token_string = t[1] start_row = t[2][0] line = t[4] # Ignore inline comments. if not line.lstrip().startswith('#'): continue if token_type == tokenize.COMMENT: stripped_line = token_string.lstrip('#').strip() if ( ' ' in stripped_line and '#' not in stripped_line and check_syntax(stripped_line) ): line_numbers.append(start_row) except (SyntaxError, tokenize.TokenError): pass return line_numbers