Python Examples of tokenize.generate

Source File: cgitb.py From BinderFilter with MIT License

6 votes

def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars

Source File: trace.py From Computable with MIT License

6 votes

def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d

Source File: autopep8.py From python-netsurv with MIT License

6 votes

def fix_e402(self, result):
        (line_index, offset, target) = get_index_offset_contents(result,
                                                                 self.source)
        for i in range(1, 100):
            line = "".join(self.source[line_index:line_index+i])
            try:
                generate_tokens("".join(line))
            except (SyntaxError, tokenize.TokenError):
                continue
            break
        if not (target in self.imports and self.imports[target] != line_index):
            mod_offset = get_module_imports_on_top_of_file(self.source,
                                                           line_index)
            self.source[mod_offset] = line + self.source[mod_offset]
        for offset in range(i):
            self.source[line_index+offset] = ''

Source File: importer.py From importmagic with BSD 2-Clause "Simplified" License

6 votes

def _parse(self, source):
        reader = StringIO(source)
        # parse until EOF or TokenError (allows incomplete modules)
        tokens = []
        try:
            tokens.extend(tokenize.generate_tokens(reader.readline))
        except tokenize.TokenError:
            # TokenError happens always at EOF, for unclosed strings or brackets.
            # We don't care about that here, since we still can recover the whole
            # source code.
            pass
        self._tokens = tokens
        it = Iterator(self._tokens)
        self._imports_begin, self._imports_end = self._find_import_range(it)
        it = Iterator(self._tokens, start=self._imports_begin, end=self._imports_end)
        self._parse_imports(it)

Source File: trace.py From meddle with MIT License

6 votes

def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d

Source File: cgitb.py From meddle with MIT License

6 votes

def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars

Source File: __init__.py From pyta with GNU General Public License v3.0

6 votes

def _verify_pre_check(filepath):
    """Check student code for certain issues."""
    # Make sure the program doesn't crash for students.
    # Could use some improvement for better logging and error reporting.
    try:
        # Check for inline "pylint:" comment, which may indicate a student
        # trying to disable a check.
        with tokenize.open(os.path.expanduser(filepath)) as f:
            for tok_type, content, _, _, _ in tokenize.generate_tokens(f.readline):
                if tok_type != tokenize.COMMENT:
                    continue
                match = pylint.constants.OPTION_RGX.search(content)
                if match is not None:
                    print('[ERROR] String "pylint:" found in comment. ' +
                          'No check run on file `{}.`\n'.format(filepath))
                    return False
    except IndentationError as e:
        print('[ERROR] python_ta could not check your code due to an ' +
              'indentation error at line {}.'.format(e.lineno))
        return False
    except tokenize.TokenError as e:
        print('[ERROR] python_ta could not check your code due to a ' +
              'syntax error in your file.')
        return False
    return True

Source File: autopep8.py From python-netsurv with MIT License

6 votes

def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end)

Source File: trace.py From ironpython2 with Apache License 2.0

6 votes

def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d

Source File: cgitb.py From ironpython2 with Apache License 2.0

6 votes

def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars

Source File: cgitb.py From Computable with MIT License

6 votes

def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars

Source File: autopep8.py From python-netsurv with MIT License

6 votes

def fix_e402(self, result):
        (line_index, offset, target) = get_index_offset_contents(result,
                                                                 self.source)
        for i in range(1, 100):
            line = "".join(self.source[line_index:line_index+i])
            try:
                generate_tokens("".join(line))
            except (SyntaxError, tokenize.TokenError):
                continue
            break
        if not (target in self.imports and self.imports[target] != line_index):
            mod_offset = get_module_imports_on_top_of_file(self.source,
                                                           line_index)
            self.source[mod_offset] = line + self.source[mod_offset]
        for offset in range(i):
            self.source[line_index+offset] = ''

Source File: processor.py From linter-pylama with MIT License

6 votes

def file_tokens(self):
        """The complete set of tokens for a file.

        Accessing this attribute *may* raise an InvalidSyntax exception.

        :raises: flake8.exceptions.InvalidSyntax
        """
        if self._file_tokens is None:
            line_iter = iter(self.lines)
            try:
                self._file_tokens = list(tokenize.generate_tokens(
                    lambda: next(line_iter)
                ))
            except tokenize.TokenError as exc:
                raise exceptions.InvalidSyntax(exc.message, exception=exc)

        return self._file_tokens

Source File: check_whitespace.py From D-VAE with MIT License

6 votes

def get_parse_error(code):
    """
    Checks code for ambiguous tabs or other basic parsing issues.

    :param code: a string containing a file's worth of Python code
    :returns: a string containing a description of the first parse error encountered,
              or None if the code is ok
    """
    # note that this uses non-public elements from stdlib's tabnanny, because tabnanny
    # is (very frustratingly) written only to be used as a script, but using it that way
    # in this context requires writing temporarily files, running subprocesses, blah blah blah
    code_buffer = StringIO(code)
    try:
        tabnanny.process_tokens(tokenize.generate_tokens(code_buffer.readline))
    except tokenize.TokenError as err:
        return "Could not parse code: %s" % err
    except IndentationError as err:
        return "Indentation error: %s" % err
    except tabnanny.NannyNag as err:
        return "Ambiguous tab at line %d; line is '%s'." % (err.get_lineno(), err.get_line())
    return None

Source File: trace.py From BinderFilter with MIT License

6 votes

def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d

Source File: source.py From py with MIT License

6 votes

def deindent(lines, offset=None):
    if offset is None:
        for line in lines:
            line = line.expandtabs()
            s = line.lstrip()
            if s:
                offset = len(line)-len(s)
                break
        else:
            offset = 0
    if offset == 0:
        return list(lines)
    newlines = []
    def readline_generator(lines):
        for line in lines:
            yield line + '\n'
        while True:
            yield ''

    it = readline_generator(lines)

    try:
        for _, _, (sline, _), (eline, _), _ in tokenize.generate_tokens(lambda: next(it)):
            if sline > len(lines):
                break # End of input reached
            if sline > len(newlines):
                line = lines[sline - 1].expandtabs()
                if line.lstrip() and line[:offset].isspace():
                    line = line[offset:] # Deindent
                newlines.append(line)

            for i in range(sline, eline):
                # Don't deindent continuing lines of
                # multiline tokens (i.e. multiline strings)
                newlines.append(lines[i])
    except (IndentationError, tokenize.TokenError):
        pass
    # Add any lines we didn't see. E.g. if an exception was raised.
    newlines.extend(lines[len(newlines):])
    return newlines

Source File: utils.py From typed-argument-parser with MIT License

5 votes

def tokenize_source(obj: object) -> Generator:
    """Returns a generator for the tokens of the object's source code."""
    source = inspect.getsource(obj)
    token_generator = tokenize.generate_tokens(StringIO(source).readline)

    return token_generator

Source File: tokens.py From vnpy_crypto with MIT License

5 votes

def normalize_token_spacing(code):
    tokens = [(t[0], t[1])
              for t in tokenize.generate_tokens(StringIO(code).readline)]
    return pretty_untokenize(tokens)

Source File: tokens.py From vnpy_crypto with MIT License

5 votes

def python_tokenize(code):
    # Since formulas can only contain Python expressions, and Python
    # expressions cannot meaningfully contain newlines, we'll just remove all
    # the newlines up front to avoid any complications:
    code = code.replace("\n", " ").strip()
    it = tokenize.generate_tokens(StringIO(code).readline)
    try:
        for (pytype, string, (_, start), (_, end), code) in it:
            if pytype == tokenize.ENDMARKER:
                break
            origin = Origin(code, start, end)
            assert pytype not in (tokenize.NL, tokenize.NEWLINE)
            if pytype == tokenize.ERRORTOKEN:
                raise PatsyError("error tokenizing input "
                                 "(maybe an unclosed string?)",
                                 origin)
            if pytype == tokenize.COMMENT:
                raise PatsyError("comments are not allowed", origin)
            yield (pytype, string, origin)
        else: # pragma: no cover
            raise ValueError("stream ended without ENDMARKER?!?")
    except tokenize.TokenError as e:
        # TokenError is raised iff the tokenizer thinks that there is
        # some sort of multi-line construct in progress (e.g., an
        # unclosed parentheses, which in Python lets a virtual line
        # continue past the end of the physical line), and it hits the
        # end of the source text. We have our own error handling for
        # such cases, so just treat this as an end-of-stream.
        # 
        # Just in case someone adds some other error case:
        assert e.args[0].startswith("EOF in multi-line")
        return

Source File: expr.py From vnpy_crypto with MIT License

5 votes

def tokenize_string(source):
    """Tokenize a Python source code string.

    Parameters
    ----------
    source : str
        A Python source code string
    """
    line_reader = StringIO(source).readline
    for toknum, tokval, _, _, _ in tokenize.generate_tokens(line_reader):
        yield toknum, tokval

Source File: template.py From nightmare with GNU General Public License v2.0

5 votes

def __init__(self, text):
        self.text = text
        readline = iter([text]).next
        self.tokens = tokenize.generate_tokens(readline)
        self.index = 0

Source File: source.py From python-netsurv with MIT License

5 votes

def deindent(lines, offset=None):
    if offset is None:
        for line in lines:
            line = line.expandtabs()
            s = line.lstrip()
            if s:
                offset = len(line)-len(s)
                break
        else:
            offset = 0
    if offset == 0:
        return list(lines)
    newlines = []
    def readline_generator(lines):
        for line in lines:
            yield line + '\n'
        while True:
            yield ''

    it = readline_generator(lines)

    try:
        for _, _, (sline, _), (eline, _), _ in tokenize.generate_tokens(lambda: next(it)):
            if sline > len(lines):
                break # End of input reached
            if sline > len(newlines):
                line = lines[sline - 1].expandtabs()
                if line.lstrip() and line[:offset].isspace():
                    line = line[offset:] # Deindent
                newlines.append(line)

            for i in range(sline, eline):
                # Don't deindent continuing lines of
                # multiline tokens (i.e. multiline strings)
                newlines.append(lines[i])
    except (IndentationError, tokenize.TokenError):
        pass
    # Add any lines we didn't see. E.g. if an exception was raised.
    newlines.extend(lines[len(newlines):])
    return newlines

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def fix_w605(self, result):
        (line_index, _, target) = get_index_offset_contents(result,
                                                            self.source)
        try:
            tokens = list(generate_tokens(target))
        except (SyntaxError, tokenize.TokenError):
            return
        for (pos, _msg) in get_w605_position(tokens):
            self.source[line_index] = '{}r{}'.format(
                target[:pos], target[pos:])

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def join_logical_line(logical_line):
    """Return single line based on logical line input."""
    indentation = _get_indentation(logical_line)

    return indentation + untokenize_without_newlines(
        generate_tokens(logical_line.lstrip())) + '\n'

Source File: pycodestyle.py From python-netsurv with MIT License

5 votes

def generate_tokens(self):
        """Tokenize file, run physical line checks and yield tokens."""
        if self._io_error:
            self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
        tokengen = tokenize.generate_tokens(self.readline)
        try:
            for token in tokengen:
                if token[2][0] > self.total_lines:
                    return
                self.noqa = token[4] and noqa(token[4])
                self.maybe_check_physical(token)
                yield token
        except (SyntaxError, tokenize.TokenError):
            self.report_invalid_syntax()

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def _get_indentword(source):
    """Return indentation type."""
    indent_word = '    '  # Default in case source has no indentation
    try:
        for t in generate_tokens(source):
            if t[0] == token.INDENT:
                indent_word = t[1]
                break
    except (SyntaxError, tokenize.TokenError):
        pass
    return indent_word

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def multiline_string_lines(source, include_docstrings=False):
    """Return line numbers that are within multiline strings.

    The line numbers are indexed at 1.

    Docstrings are ignored.

    """
    line_numbers = set()
    previous_token_type = ''
    try:
        for t in generate_tokens(source):
            token_type = t[0]
            start_row = t[2][0]
            end_row = t[3][0]

            if token_type == tokenize.STRING and start_row != end_row:
                if (
                    include_docstrings or
                    previous_token_type != tokenize.INDENT
                ):
                    # We increment by one since we want the contents of the
                    # string.
                    line_numbers |= set(range(1 + start_row, 1 + end_row))

            previous_token_type = token_type
    except (SyntaxError, tokenize.TokenError):
        pass

    return line_numbers

Source File: source.py From python-netsurv with MIT License

5 votes

def getstatementrange_ast(lineno, source, assertion=False, astnode=None):
    if astnode is None:
        content = str(source)
        # See #4260:
        # don't produce duplicate warnings when compiling source to find ast
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            astnode = compile(content, "source", "exec", _AST_FLAG)

    start, end = get_statement_startend2(lineno, astnode)
    # we need to correct the end:
    # - ast-parsing strips comments
    # - there might be empty lines
    # - we might have lesser indented code blocks at the end
    if end is None:
        end = len(source.lines)

    if end > start + 1:
        # make sure we don't span differently indented code blocks
        # by using the BlockFinder helper used which inspect.getsource() uses itself
        block_finder = inspect.BlockFinder()
        # if we start with an indented line, put blockfinder to "started" mode
        block_finder.started = source.lines[start][0].isspace()
        it = ((x + "\n") for x in source.lines[start:end])
        try:
            for tok in tokenize.generate_tokens(lambda: next(it)):
                block_finder.tokeneater(*tok)
        except (inspect.EndOfBlock, IndentationError):
            end = block_finder.last + start
        except Exception:
            pass

    # the end might still point to a comment or empty line, correct it
    while end:
        line = source.lines[end - 1].lstrip()
        if line.startswith("#") or not line:
            end -= 1
        else:
            break
    return astnode, start, end

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def generate_tokens(self, text):
        """A stand-in for tokenize.generate_tokens()."""
        if text != self.last_text:
            string_io = io.StringIO(text)
            self.last_tokens = list(
                tokenize.generate_tokens(string_io.readline)
            )
            self.last_text = text
        return self.last_tokens

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def commented_out_code_lines(source):
    """Return line numbers of comments that are likely code.

    Commented-out code is bad practice, but modifying it just adds even
    more clutter.

    """
    line_numbers = []
    try:
        for t in generate_tokens(source):
            token_type = t[0]
            token_string = t[1]
            start_row = t[2][0]
            line = t[4]

            # Ignore inline comments.
            if not line.lstrip().startswith('#'):
                continue

            if token_type == tokenize.COMMENT:
                stripped_line = token_string.lstrip('#').strip()
                if (
                    ' ' in stripped_line and
                    '#' not in stripped_line and
                    check_syntax(stripped_line)
                ):
                    line_numbers.append(start_row)
    except (SyntaxError, tokenize.TokenError):
        pass

    return line_numbers

Python tokenize.generate_tokens() Examples