Python tokenize.tokenize() Examples
The following are 30
code examples of tokenize.tokenize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tokenize
, or try the search function
.
Example #1
Source File: pythondoc.py From InternationalizationScript-iOS with MIT License | 6 votes |
def skip_decorator(self, type, token, start, end, line): if token == "(": self.decorator_parens = self.decorator_parens + 1 elif token == ")": self.decorator_parens = self.decorator_parens - 1 if self.decorator_parens or type != tokenize.NEWLINE: return self.skip_decorator return self.process_subject ## # (Token handler helper) Processes a PythonDoc comment. This # method creates an "info" element based on the current comment, # and attaches it to the current subject element. # # @param subject_name Subject name (or None if the name is not known). # @param subject_elem The current subject element. # @return The info element. Note that this element has already # been attached to the subject element. # @defreturn Element
Example #2
Source File: AutoIndent.py From ironpython2 with Apache License 2.0 | 6 votes |
def run(self): OPENERS=('class', 'def', 'for', 'if', 'try', 'while') INDENT=tokenize.INDENT NAME=tokenize.NAME save_tabsize = tokenize.tabsize tokenize.tabsize = self.tabwidth try: try: for (typ, token, start, end, line) in token_generator(self.readline): if typ == NAME and token in OPENERS: self.blkopenline = line elif type == INDENT and self.blkopenline: self.indentedline = line break except (tokenize.TokenError, IndentationError): # since we cut off the tokenizer early, we can trigger # spurious errors pass finally: tokenize.tabsize = save_tabsize return self.blkopenline, self.indentedline
Example #3
Source File: config_scope.py From sacred with MIT License | 6 votes |
def find_doc_for(ast_entry, body_lines): lineno = ast_entry.lineno - 1 line_io = io.BytesIO(body_lines[lineno].encode()) try: tokens = tokenize(line_io.readline) or [] line_comments = [t.string for t in tokens if t.type == COMMENT] if line_comments: formatted_lcs = [l[1:].strip() for l in line_comments] filtered_lcs = [l for l in formatted_lcs if not is_ignored(l)] if filtered_lcs: return filtered_lcs[0] except TokenError: pass lineno -= 1 while lineno >= 0: if iscomment(body_lines[lineno]): comment = body_lines[lineno].strip("# ") if not is_ignored(comment): return comment if not body_lines[lineno].strip() == "": return None lineno -= 1 return None
Example #4
Source File: gftools-rangify.py From gftools with Apache License 2.0 | 6 votes |
def main(): if len(sys.argv) != 2: sys.exit("Usage: rangify <nam file>") codepoints_data = list(tokenize.tokenize(open(sys.argv[1], 'rb').readline)) codepoints = get_codepoints(codepoints_data) codepoints.sort() seqs = [] seq = (None,) for cp in codepoints: if seq[0] is None: seq = (cp,cp) elif seq[1] == cp - 1: seq = (seq[0], cp) else: seqs.append(seq) seq = (None,) for seq in seqs: print(seq)
Example #5
Source File: reindent.py From D-VAE with MIT License | 6 votes |
def __init__(self, f): self.find_stmt = 1 # next token begins a fresh stmt? self.level = 0 # current indent level # Raw file lines. self.raw = f.readlines() # File lines, rstripped & tab-expanded. Dummy at start is so # that we can use tokenize's 1-based line numbering easily. # Note that a line is all-blank iff it's "\n". self.lines = [_rstrip(line).expandtabs() + "\n" for line in self.raw] self.lines.insert(0, None) self.index = 1 # index into self.lines of next line # List of (lineno, indentlevel) pairs, one for each stmt and # comment line. indentlevel is -1 for comment lines, as a # signal that tokenize doesn't know what to do about them; # indeed, they're our headache! self.stats = []
Example #6
Source File: pygettext.py From oss-ftp with MIT License | 6 votes |
def __waiting(self, ttype, tstring, lineno): opts = self.__options # Do docstring extractions, if enabled if opts.docstrings and not opts.nodocstrings.get(self.__curfile): # module docstring? if self.__freshmodule: if ttype == tokenize.STRING: self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__freshmodule = 0 elif ttype not in (tokenize.COMMENT, tokenize.NL): self.__freshmodule = 0 return # class docstring? if ttype == tokenize.NAME and tstring in ('class', 'def'): self.__state = self.__suiteseen return if ttype == tokenize.NAME and tstring in opts.keywords: self.__state = self.__keywordseen
Example #7
Source File: pygettext.py From oss-ftp with MIT License | 6 votes |
def __openseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == ')': # We've seen the last of the translatable strings. Record the # line number of the first line of the strings and update the list # of messages seen. Reset state for the next batch. If there # were no strings inside _(), then just ignore this entry. if self.__data: self.__addentry(EMPTYSTRING.join(self.__data)) self.__state = self.__waiting elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]: # warn if we see anything else than STRING or whitespace print >> sys.stderr, _( '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' ) % { 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno } self.__state = self.__waiting
Example #8
Source File: evaluate.py From python_autocomplete with MIT License | 6 votes |
def __init__(self, model, lstm_layers, lstm_size): self.__model = model # Initial state self._h0 = torch.zeros((lstm_layers, 1, lstm_size), device=device) self._c0 = torch.zeros((lstm_layers, 1, lstm_size), device=device) # Last line of source code read self._last_line = "" self._tokens: List[tokenize.TokenInfo] = [] # Last token, because we need to input that to the model for inference self._last_token = 0 # Last bit of the input string self._untokenized = "" # For timing self.time_add = 0 self.time_predict = 0 self.time_check = 0
Example #9
Source File: reader.py From py2nb with BSD 3-Clause "New" or "Revised" License | 6 votes |
def read(filename): """ Read a regular Python file with special formatting and performance preprocessing on it. The result is a string that conforms to the IPython notebook version 3 python script format. """ with open(filename, 'rb') as fin: token_gen = _generate_tokens(fin.readline) cvt_docstr_gen = convert_toplevel_docstring(token_gen) nl_gen = fix_newlines(cvt_docstr_gen) out = list(nl_gen) formatted = tokenize.untokenize(out).decode('utf-8') return fix_empty_lines(formatted) # ============================================================================= # Helpers # =============================================================================
Example #10
Source File: reader.py From py2nb with BSD 3-Clause "New" or "Revised" License | 6 votes |
def convert_toplevel_docstring(tokens): for token in tokens: # For each string if token.type == tokenize.STRING: text = token.string # Must be a docstring if text.startswith('"""') or text.startswith("'''"): startline, startcol = token.start # Starting column MUST be 0 if startcol == 0: endline, endcol = token.end lines = ['# ' + line for line in text.strip('"\' \n').split('\n')] text = '\n'.join(lines) fmt = '# <markdowncell>\n{0}\n# <codecell>'.format(text) yield TokenInfo(type=tokenize.COMMENT, start=(startline, startcol), end=(endline, endcol), string=fmt, line='#') # To next token continue # Return untouched yield token
Example #11
Source File: reindent.py From attention-lvcsr with MIT License | 6 votes |
def __init__(self, f): self.find_stmt = 1 # next token begins a fresh stmt? self.level = 0 # current indent level # Raw file lines. self.raw = f.readlines() # File lines, rstripped & tab-expanded. Dummy at start is so # that we can use tokenize's 1-based line numbering easily. # Note that a line is all-blank iff it's "\n". self.lines = [_rstrip(line).expandtabs() + "\n" for line in self.raw] self.lines.insert(0, None) self.index = 1 # index into self.lines of next line # List of (lineno, indentlevel) pairs, one for each stmt and # comment line. indentlevel is -1 for comment lines, as a # signal that tokenize doesn't know what to do about them; # indeed, they're our headache! self.stats = []
Example #12
Source File: split.py From flynt with MIT License | 6 votes |
def get_chunks(code) -> Generator[Chunk, None, None]: g = tokenize.tokenize(io.BytesIO(code.encode("utf-8")).readline) chunk = Chunk() try: for item in g: t = PyToken(item) reuse = chunk.append(t) if chunk.complete: yield chunk chunk = Chunk() if reuse: reuse = chunk.append(t) # assert not reuse if chunk.complete: yield chunk chunk = Chunk() yield chunk except tokenize.TokenError as e: if state.verbose: traceback.print_exc() print(e)
Example #13
Source File: aot.py From learn_python3_spider with MIT License | 6 votes |
def indentify(s): out = [] stack = [] l = ['', s] for (tokenType, tokenString, (startRow, startColumn), (endRow, endColumn), logicalLine) in tokenize(l.pop): if tokenString in ['[', '(', '{']: stack.append(tokenString) elif tokenString in [']', ')', '}']: stack.pop() if tokenString == '\0': out.append(' '*len(stack)) else: out.append(tokenString) return ''.join(out) ########### # Unjelly # ###########
Example #14
Source File: recipe-491274.py From code with MIT License | 6 votes |
def format(self): """ Parse and send the colored source. """ # store line offsets in self.lines self.lines = [0, 0] pos = 0 while 1: pos = string.find(self.raw, '\n', pos) + 1 if not pos: break self.lines.append(pos) self.lines.append(len(self.raw)) # parse the source and write it self.pos = 0 text = cStringIO.StringIO(self.raw) self.out.write('<pre class="code">\n') try: tokenize.tokenize(text.readline, self) except tokenize.TokenError, ex: msg = ex[0] line = ex[1][0] self.out.write("<h3>ERROR: %s</h3>%s\n" % ( msg, self.raw[self.lines[line]:]))
Example #15
Source File: pythondoc.py From InternationalizationScript-iOS with MIT License | 6 votes |
def handle_token(self, *args): # dispatch incoming tokens to the current handler if DEBUG > 1: print self.handler.im_func.func_name, self.indent, print tokenize.tok_name[args[0]], repr(args[1]) if args[0] == tokenize.DEDENT: self.indent = self.indent - 1 while self.scope and self.scope[-1][0] >= self.indent: del self.scope[-1] del self.stack[-1] self.handler = apply(self.handler, args) if args[0] == tokenize.INDENT: self.indent = self.indent + 1 ## # (Token handler) Scans for encoding directive.
Example #16
Source File: pythondoc.py From InternationalizationScript-iOS with MIT License | 6 votes |
def look_for_pythondoc(self, type, token, start, end, line): if type == tokenize.COMMENT and string.rstrip(token) == "##": # found a comment: set things up for comment processing self.comment_start = start self.comment = [] return self.process_comment_body else: # deal with "bare" subjects if token == "def" or token == "class": self.subject_indent = self.indent self.subject_parens = 0 self.subject_start = self.comment_start = None self.subject = [] return self.process_subject(type, token, start, end, line) return self.look_for_pythondoc ## # (Token handler) Processes a comment body. This handler adds # comment lines to the current comment.
Example #17
Source File: pythondoc.py From InternationalizationScript-iOS with MIT License | 6 votes |
def handle_token(self, *args): # dispatch incoming tokens to the current handler if DEBUG > 1: print self.handler.im_func.func_name, self.indent, print tokenize.tok_name[args[0]], repr(args[1]) if args[0] == tokenize.DEDENT: self.indent = self.indent - 1 while self.scope and self.scope[-1][0] >= self.indent: del self.scope[-1] del self.stack[-1] self.handler = apply(self.handler, args) if args[0] == tokenize.INDENT: self.indent = self.indent + 1 ## # (Token handler) Scans for encoding directive.
Example #18
Source File: pythondoc.py From InternationalizationScript-iOS with MIT License | 6 votes |
def look_for_pythondoc(self, type, token, start, end, line): if type == tokenize.COMMENT and string.rstrip(token) == "##": # found a comment: set things up for comment processing self.comment_start = start self.comment = [] return self.process_comment_body else: # deal with "bare" subjects if token == "def" or token == "class": self.subject_indent = self.indent self.subject_parens = 0 self.subject_start = self.comment_start = None self.subject = [] return self.process_subject(type, token, start, end, line) return self.look_for_pythondoc ## # (Token handler) Processes a comment body. This handler adds # comment lines to the current comment.
Example #19
Source File: reader.py From py2nb with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fix_newlines(tokens): first = True curline = 1 for token in tokens: if first: first = False curline = token.end[0] + 1 else: # Fill NEWLINE token in between while curline < token.start[0]: yield TokenInfo(type=tokenize.NEWLINE, string='\n', start=(curline, 0), end=(curline, 0), line='\n', ) curline += 1 curline = token.end[0] + 1 yield token
Example #20
Source File: aot.py From Safejumper-for-Desktop with GNU General Public License v2.0 | 6 votes |
def indentify(s): out = [] stack = [] l = ['', s] for (tokenType, tokenString, (startRow, startColumn), (endRow, endColumn), logicalLine) in tokenize(l.pop): if tokenString in ['[', '(', '{']: stack.append(tokenString) elif tokenString in [']', ')', '}']: stack.pop() if tokenString == '\0': out.append(' '*len(stack)) else: out.append(tokenString) return ''.join(out) ########### # Unjelly # ###########
Example #21
Source File: inspect.py From ironpython2 with Apache License 2.0 | 5 votes |
def tokeneater(self, type, token, srow_scol, erow_ecol, line): srow, scol = srow_scol erow, ecol = erow_ecol if not self.started: # look for the first "def", "class" or "lambda" if token in ("def", "class", "lambda"): if token == "lambda": self.islambda = True self.started = True self.passline = True # skip to the end of the line elif type == tokenize.NEWLINE: self.passline = False # stop skipping when a NEWLINE is seen self.last = srow if self.islambda: # lambdas always end at the first NEWLINE raise EndOfBlock elif self.passline: pass elif type == tokenize.INDENT: self.indent = self.indent + 1 self.passline = True elif type == tokenize.DEDENT: self.indent = self.indent - 1 # the end of matching indent/dedent pairs end a block # (note that this only works for "def"/"class" blocks, # not e.g. for "if: else:" or "try: finally:" blocks) if self.indent <= 0: raise EndOfBlock elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL): # any other token on the same indentation level end the previous # block as well, except the pseudo-tokens COMMENT and NL. raise EndOfBlock
Example #22
Source File: inspect.py From meddle with MIT License | 5 votes |
def getblock(lines): """Extract the block of code at the top of the given list of lines.""" blockfinder = BlockFinder() try: tokenize.tokenize(iter(lines).next, blockfinder.tokeneater) except (EndOfBlock, IndentationError): pass return lines[:blockfinder.last]
Example #23
Source File: reader.py From py2nb with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _generate_tokens(readline): return map(lambda x: TokenInfo(*x), tokenize.generate_tokens(readline))
Example #24
Source File: reindent.py From attention-lvcsr with MIT License | 5 votes |
def getline(self): if self.index >= len(self.lines): line = "" else: line = self.lines[self.index] self.index += 1 return line # Line-eater for tokenize.
Example #25
Source File: reindent.py From attention-lvcsr with MIT License | 5 votes |
def tokeneater(self, type, token, pos, end, line, INDENT=tokenize.INDENT, DEDENT=tokenize.DEDENT, NEWLINE=tokenize.NEWLINE, COMMENT=tokenize.COMMENT, NL=tokenize.NL): sline, scol = pos if type == NEWLINE: # A program statement, or ENDMARKER, will eventually follow, # after some (possibly empty) run of tokens of the form # (NL | COMMENT)* (INDENT | DEDENT+)? self.find_stmt = 1 elif type == INDENT: self.find_stmt = 1 self.level += 1 elif type == DEDENT: self.find_stmt = 1 self.level -= 1 elif type == COMMENT: if self.find_stmt: self.stats.append((sline, -1)) # but we're still looking for a new stmt, so leave # find_stmt alone elif type == NL: pass elif self.find_stmt: # This is the first "real token" following a NEWLINE, so it # must be the first token of the next program statement, or an # ENDMARKER. self.find_stmt = 0 if line: # not endmarker self.stats.append((sline, self.level)) # Count number of leading blanks.
Example #26
Source File: inspect.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def getblock(lines): """Extract the block of code at the top of the given list of lines.""" blockfinder = BlockFinder() try: tokens = tokenize.generate_tokens(iter(lines).__next__) for _token in tokens: blockfinder.tokeneater(*_token) except (EndOfBlock, IndentationError): pass return lines[:blockfinder.last]
Example #27
Source File: inspect.py From Imogen with MIT License | 5 votes |
def getblock(lines): """Extract the block of code at the top of the given list of lines.""" blockfinder = BlockFinder() try: tokens = tokenize.generate_tokens(iter(lines).__next__) for _token in tokens: blockfinder.tokeneater(*_token) except (EndOfBlock, IndentationError): pass return lines[:blockfinder.last]
Example #28
Source File: manager.py From bandit with Apache License 2.0 | 5 votes |
def _parse_file(self, fname, fdata, new_files_list): try: # parse the current file data = fdata.read() lines = data.splitlines() self.metrics.begin(fname) self.metrics.count_locs(lines) if self.ignore_nosec: nosec_lines = set() else: try: fdata.seek(0) if six.PY2: tokens = tokenize.generate_tokens(fdata.readline) else: tokens = tokenize.tokenize(fdata.readline) nosec_lines = set( lineno for toktype, tokval, (lineno, _), _, _ in tokens if toktype == tokenize.COMMENT and '#nosec' in tokval or '# nosec' in tokval) except tokenize.TokenError: nosec_lines = set() score = self._execute_ast_visitor(fname, data, nosec_lines) self.scores.append(score) self.metrics.count_issues([score, ]) except KeyboardInterrupt: sys.exit(2) except SyntaxError: self.skipped.append((fname, "syntax error while parsing AST from file")) new_files_list.remove(fname) except Exception as e: LOG.error("Exception occurred when executing tests against " "%s. Run \"bandit --debug %s\" to see the full " "traceback.", fname, fname) self.skipped.append((fname, 'exception while scanning file')) new_files_list.remove(fname) LOG.debug(" Exception string: %s", e) LOG.debug(" Exception traceback: %s", traceback.format_exc())
Example #29
Source File: tokenizer.py From python_autocomplete with MIT License | 5 votes |
def parse_string(content: str) -> List[ParsedToken]: """ Encode source code """ g = tokenize.tokenize(BytesIO(content.encode('utf-8')).readline) return parse(g)
Example #30
Source File: reindent.py From attention-lvcsr with MIT License | 5 votes |
def write(self, f): f.writelines(self.after) # Line-getter for tokenize.