Python tokenize.ENDMARKER Examples

The following are 19 code examples of tokenize.ENDMARKER(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tokenize , or try the search function .
Example #1
Source File: autopep8.py    From filmkodi with Apache License 2.0 6 votes vote down vote up
def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end) 
Example #2
Source File: autopep8.py    From python-netsurv with MIT License 6 votes vote down vote up
def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end) 
Example #3
Source File: autopep8.py    From python-netsurv with MIT License 6 votes vote down vote up
def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end) 
Example #4
Source File: autopep8.py    From PyDev.Debugger with Eclipse Public License 1.0 6 votes vote down vote up
def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end) 
Example #5
Source File: parser.py    From linter-pylama with MIT License 5 votes vote down vote up
def _parse_from_import_names(self, is_future_import):
        """Parse the 'y' part in a 'from x import y' statement."""
        if self.current.value == '(':
            self.consume(tk.OP)
            expected_end_kinds = (tk.OP, )
        else:
            expected_end_kinds = (tk.NEWLINE, tk.ENDMARKER)
        while self.current.kind not in expected_end_kinds and not (
                    self.current.kind == tk.OP and self.current.value == ';'):
            if self.current.kind != tk.NAME:
                self.stream.move()
                continue
            self.log.debug("parsing import, token is %r (%s)",
                           self.current.kind, self.current.value)
            if is_future_import:
                self.log.debug('found future import: %s', self.current.value)
                self.future_imports.add(self.current.value)
            self.consume(tk.NAME)
            self.log.debug("parsing import, token is %r (%s)",
                           self.current.kind, self.current.value)
            if self.current.kind == tk.NAME and self.current.value == 'as':
                self.consume(tk.NAME)  # as
                if self.current.kind == tk.NAME:
                    self.consume(tk.NAME)  # new name, irrelevant
            if self.current.value == ',':
                self.consume(tk.OP)
            self.log.debug("parsing import, token is %r (%s)",
                           self.current.kind, self.current.value) 
Example #6
Source File: tokens.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def python_tokenize(code):
    # Since formulas can only contain Python expressions, and Python
    # expressions cannot meaningfully contain newlines, we'll just remove all
    # the newlines up front to avoid any complications:
    code = code.replace("\n", " ").strip()
    it = tokenize.generate_tokens(StringIO(code).readline)
    try:
        for (pytype, string, (_, start), (_, end), code) in it:
            if pytype == tokenize.ENDMARKER:
                break
            origin = Origin(code, start, end)
            assert pytype not in (tokenize.NL, tokenize.NEWLINE)
            if pytype == tokenize.ERRORTOKEN:
                raise PatsyError("error tokenizing input "
                                 "(maybe an unclosed string?)",
                                 origin)
            if pytype == tokenize.COMMENT:
                raise PatsyError("comments are not allowed", origin)
            yield (pytype, string, origin)
        else: # pragma: no cover
            raise ValueError("stream ended without ENDMARKER?!?")
    except tokenize.TokenError as e:
        # TokenError is raised iff the tokenizer thinks that there is
        # some sort of multi-line construct in progress (e.g., an
        # unclosed parentheses, which in Python lets a virtual line
        # continue past the end of the physical line), and it hits the
        # end of the source text. We have our own error handling for
        # such cases, so just treat this as an end-of-stream.
        # 
        # Just in case someone adds some other error case:
        assert e.args[0].startswith("EOF in multi-line")
        return 
Example #7
Source File: QuestParser.py    From Pirates-Online-Rewritten with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def getLineOfTokens(gen):
    tokens = []
    nextNeg = 0
    token = gen.next()
    if token[0] == tokenize.ENDMARKER:
        return None
    while token[0] != tokenize.NEWLINE and token[0] != tokenize.NL:
        if token[0] == tokenize.COMMENT:
            pass
        elif token[0] == tokenize.OP and token[1] == '-':
            nextNeg = 1
        elif token[0] == tokenize.NUMBER:
            if nextNeg:
                tokens.append(-eval(token[1]))
                nextNeg = 0
            else:
                tokens.append(eval(token[1]))
        elif token[0] == tokenize.STRING:
            tokens.append(eval(token[1]))
        elif token[0] == tokenize.NAME:
            tokens.append(token[1])
        else:
            notify.warning('Ignored token type: %s on line: %s' % (tokenize.tok_name[token[0]], token[2][0]))
        token = gen.next()

    return tokens 
Example #8
Source File: tokens.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def python_tokenize(code):
    # Since formulas can only contain Python expressions, and Python
    # expressions cannot meaningfully contain newlines, we'll just remove all
    # the newlines up front to avoid any complications:
    code = code.replace("\n", " ").strip()
    it = tokenize.generate_tokens(StringIO(code).readline)
    try:
        for (pytype, string, (_, start), (_, end), code) in it:
            if pytype == tokenize.ENDMARKER:
                break
            origin = Origin(code, start, end)
            assert pytype not in (tokenize.NL, tokenize.NEWLINE)
            if pytype == tokenize.ERRORTOKEN:
                raise PatsyError("error tokenizing input "
                                 "(maybe an unclosed string?)",
                                 origin)
            if pytype == tokenize.COMMENT:
                raise PatsyError("comments are not allowed", origin)
            yield (pytype, string, origin)
        else: # pragma: no cover
            raise ValueError("stream ended without ENDMARKER?!?")
    except tokenize.TokenError as e:
        # TokenError is raised iff the tokenizer thinks that there is
        # some sort of multi-line construct in progress (e.g., an
        # unclosed parentheses, which in Python lets a virtual line
        # continue past the end of the physical line), and it hits the
        # end of the source text. We have our own error handling for
        # such cases, so just treat this as an end-of-stream.
        # 
        # Just in case someone adds some other error case:
        assert e.args[0].startswith("EOF in multi-line")
        return 
Example #9
Source File: raw.py    From linter-pylama with MIT License 5 votes vote down vote up
def is_single_token(token_number, tokens):
    '''Is this a single token matching token_number followed by ENDMARKER or NL
    tokens.
    '''
    return (TOKEN_NUMBER(tokens[0]) == token_number and
            all(TOKEN_NUMBER(t) in (tokenize.ENDMARKER, tokenize.NL)
                for t in tokens[1:])) 
Example #10
Source File: tokens.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def pretty_untokenize(typed_tokens):
    text = []
    prev_was_space_delim = False
    prev_wants_space = False
    prev_was_open_paren_or_comma = False
    prev_was_object_like = False
    brackets = []
    for token_type, token in typed_tokens:
        assert token_type not in (tokenize.INDENT, tokenize.DEDENT,
                                  tokenize.NEWLINE, tokenize.NL)
        if token_type == tokenize.ENDMARKER:
            continue
        if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):
            if prev_wants_space or prev_was_space_delim:
                text.append(" ")
            text.append(token)
            prev_wants_space = False
            prev_was_space_delim = True
        else:
            if token in ("(", "[", "{"):
                brackets.append(token)
            elif brackets and token in (")", "]", "}"):
                brackets.pop()
            this_wants_space_before = (token in _python_space_before)
            this_wants_space_after = (token in _python_space_after)
            # Special case for slice syntax: foo[:10]
            # Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..."
            if token == ":" and brackets and brackets[-1] == "[":
                this_wants_space_after = False
            # Special case for foo(*args), foo(a, *args):
            if token in ("*", "**") and prev_was_open_paren_or_comma:
                this_wants_space_before = False
                this_wants_space_after = False
            # Special case for "a = foo(b=1)":
            if token == "=" and not brackets:
                this_wants_space_before = True
                this_wants_space_after = True
            # Special case for unary -, +. Our heuristic is that if we see the
            # + or - after something that looks like an object (a NAME,
            # NUMBER, STRING, or close paren) then it is probably binary,
            # otherwise it is probably unary.
            if token in ("+", "-") and not prev_was_object_like:
                this_wants_space_before = False
                this_wants_space_after = False
            if prev_wants_space or this_wants_space_before:
                text.append(" ")
            text.append(token)
            prev_wants_space = this_wants_space_after
            prev_was_space_delim = False
        if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)
            or token == ")"):
            prev_was_object_like = True
        else:
            prev_was_object_like = False
        prev_was_open_paren_or_comma = token in ("(", ",")
    return "".join(text) 
Example #11
Source File: autopep8.py    From python-netsurv with MIT License 4 votes vote down vote up
def _reindent_stats(tokens):
    """Return list of (lineno, indentlevel) pairs.

    One for each stmt and comment line. indentlevel is -1 for comment
    lines, as a signal that tokenize doesn't know what to do about them;
    indeed, they're our headache!

    """
    find_stmt = 1  # Next token begins a fresh stmt?
    level = 0  # Current indent level.
    stats = []

    for t in tokens:
        token_type = t[0]
        sline = t[2][0]
        line = t[4]

        if token_type == tokenize.NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            find_stmt = 1

        elif token_type == tokenize.INDENT:
            find_stmt = 1
            level += 1

        elif token_type == tokenize.DEDENT:
            find_stmt = 1
            level -= 1

        elif token_type == tokenize.COMMENT:
            if find_stmt:
                stats.append((sline, -1))
                # But we're still looking for a new stmt, so leave
                # find_stmt alone.

        elif token_type == tokenize.NL:
            pass

        elif find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            find_stmt = 0
            if line:   # Not endmarker.
                stats.append((sline, level))

    return stats 
Example #12
Source File: autopep8.py    From PyDev.Debugger with Eclipse Public License 1.0 4 votes vote down vote up
def _reindent_stats(tokens):
    """Return list of (lineno, indentlevel) pairs.

    One for each stmt and comment line. indentlevel is -1 for comment
    lines, as a signal that tokenize doesn't know what to do about them;
    indeed, they're our headache!

    """
    find_stmt = 1  # Next token begins a fresh stmt?
    level = 0  # Current indent level.
    stats = []

    for t in tokens:
        token_type = t[0]
        sline = t[2][0]
        line = t[4]

        if token_type == tokenize.NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            find_stmt = 1

        elif token_type == tokenize.INDENT:
            find_stmt = 1
            level += 1

        elif token_type == tokenize.DEDENT:
            find_stmt = 1
            level -= 1

        elif token_type == tokenize.COMMENT:
            if find_stmt:
                stats.append((sline, -1))
                # But we're still looking for a new stmt, so leave
                # find_stmt alone.

        elif token_type == tokenize.NL:
            pass

        elif find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            find_stmt = 0
            if line:   # Not endmarker.
                stats.append((sline, level))

    return stats 
Example #13
Source File: rewrite.py    From pytest with MIT License 4 votes vote down vote up
def _get_assertion_exprs(src: bytes) -> Dict[int, str]:
    """Returns a mapping from {lineno: "assertion test expression"}"""
    ret = {}  # type: Dict[int, str]

    depth = 0
    lines = []  # type: List[str]
    assert_lineno = None  # type: Optional[int]
    seen_lines = set()  # type: Set[int]

    def _write_and_reset() -> None:
        nonlocal depth, lines, assert_lineno, seen_lines
        assert assert_lineno is not None
        ret[assert_lineno] = "".join(lines).rstrip().rstrip("\\")
        depth = 0
        lines = []
        assert_lineno = None
        seen_lines = set()

    tokens = tokenize.tokenize(io.BytesIO(src).readline)
    for tp, source, (lineno, offset), _, line in tokens:
        if tp == tokenize.NAME and source == "assert":
            assert_lineno = lineno
        elif assert_lineno is not None:
            # keep track of depth for the assert-message `,` lookup
            if tp == tokenize.OP and source in "([{":
                depth += 1
            elif tp == tokenize.OP and source in ")]}":
                depth -= 1

            if not lines:
                lines.append(line[offset:])
                seen_lines.add(lineno)
            # a non-nested comma separates the expression from the message
            elif depth == 0 and tp == tokenize.OP and source == ",":
                # one line assert with message
                if lineno in seen_lines and len(lines) == 1:
                    offset_in_trimmed = offset + len(lines[-1]) - len(line)
                    lines[-1] = lines[-1][:offset_in_trimmed]
                # multi-line assert with message
                elif lineno in seen_lines:
                    lines[-1] = lines[-1][:offset]
                # multi line assert with escapd newline before message
                else:
                    lines.append(line[:offset])
                _write_and_reset()
            elif tp in {tokenize.NEWLINE, tokenize.ENDMARKER}:
                _write_and_reset()
            elif lines and lineno not in seen_lines:
                lines.append(line)
                seen_lines.add(lineno)

    return ret 
Example #14
Source File: rewrite.py    From python-netsurv with MIT License 4 votes vote down vote up
def _get_assertion_exprs(src: bytes):  # -> Dict[int, str]
    """Returns a mapping from {lineno: "assertion test expression"}"""
    ret = {}

    depth = 0
    lines = []
    assert_lineno = None
    seen_lines = set()

    def _write_and_reset() -> None:
        nonlocal depth, lines, assert_lineno, seen_lines
        ret[assert_lineno] = "".join(lines).rstrip().rstrip("\\")
        depth = 0
        lines = []
        assert_lineno = None
        seen_lines = set()

    tokens = tokenize.tokenize(io.BytesIO(src).readline)
    for tp, src, (lineno, offset), _, line in tokens:
        if tp == tokenize.NAME and src == "assert":
            assert_lineno = lineno
        elif assert_lineno is not None:
            # keep track of depth for the assert-message `,` lookup
            if tp == tokenize.OP and src in "([{":
                depth += 1
            elif tp == tokenize.OP and src in ")]}":
                depth -= 1

            if not lines:
                lines.append(line[offset:])
                seen_lines.add(lineno)
            # a non-nested comma separates the expression from the message
            elif depth == 0 and tp == tokenize.OP and src == ",":
                # one line assert with message
                if lineno in seen_lines and len(lines) == 1:
                    offset_in_trimmed = offset + len(lines[-1]) - len(line)
                    lines[-1] = lines[-1][:offset_in_trimmed]
                # multi-line assert with message
                elif lineno in seen_lines:
                    lines[-1] = lines[-1][:offset]
                # multi line assert with escapd newline before message
                else:
                    lines.append(line[:offset])
                _write_and_reset()
            elif tp in {tokenize.NEWLINE, tokenize.ENDMARKER}:
                _write_and_reset()
            elif lines and lineno not in seen_lines:
                lines.append(line)
                seen_lines.add(lineno)

    return ret 
Example #15
Source File: config_parser.py    From gin-config with Apache License 2.0 4 votes vote down vote up
def parse_statement(self):
    """Parse a single statement.

    Returns:
      Either a `BindingStatement`, `ImportStatement`, `IncludeStatement`, or
      `None` if no more statements can be parsed (EOF reached).
    """
    self._skip_whitespace_and_comments()
    if self._current_token.kind == tokenize.ENDMARKER:
      return None

    # Save off location, but ignore char_num for any statement-level errors.
    stmt_loc = self._current_location(ignore_char_num=True)
    binding_key_or_keyword = self._parse_selector()
    statement = None
    if self._current_token.value != '=':
      if binding_key_or_keyword == 'import':
        module = self._parse_selector(scoped=False)
        statement = ImportStatement(module, stmt_loc)
      elif binding_key_or_keyword == 'include':
        str_loc = self._current_location()
        success, filename = self._maybe_parse_basic_type()
        if not success or not isinstance(filename, str):
          self._raise_syntax_error('Expected file path as string.', str_loc)
        statement = IncludeStatement(filename, stmt_loc)
      else:
        self._raise_syntax_error("Expected '='.")
    else:  # We saw an '='.
      self._advance_one_token()
      value = self.parse_value()
      scope, selector, arg_name = parse_binding_key(binding_key_or_keyword)
      statement = BindingStatement(scope, selector, arg_name, value, stmt_loc)

    assert statement, 'Internal parsing error.'

    if (self._current_token.kind != tokenize.NEWLINE and
        self._current_token.kind != tokenize.ENDMARKER):
      self._raise_syntax_error('Expected newline.')
    elif self._current_token.kind == tokenize.NEWLINE:
      self._advance_one_token()

    return statement 
Example #16
Source File: autopep8.py    From python-netsurv with MIT License 4 votes vote down vote up
def _reindent_stats(tokens):
    """Return list of (lineno, indentlevel) pairs.

    One for each stmt and comment line. indentlevel is -1 for comment
    lines, as a signal that tokenize doesn't know what to do about them;
    indeed, they're our headache!

    """
    find_stmt = 1  # Next token begins a fresh stmt?
    level = 0  # Current indent level.
    stats = []

    for t in tokens:
        token_type = t[0]
        sline = t[2][0]
        line = t[4]

        if token_type == tokenize.NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            find_stmt = 1

        elif token_type == tokenize.INDENT:
            find_stmt = 1
            level += 1

        elif token_type == tokenize.DEDENT:
            find_stmt = 1
            level -= 1

        elif token_type == tokenize.COMMENT:
            if find_stmt:
                stats.append((sline, -1))
                # But we're still looking for a new stmt, so leave
                # find_stmt alone.

        elif token_type == tokenize.NL:
            pass

        elif find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            find_stmt = 0
            if line:   # Not endmarker.
                stats.append((sline, level))

    return stats 
Example #17
Source File: tokens.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def pretty_untokenize(typed_tokens):
    text = []
    prev_was_space_delim = False
    prev_wants_space = False
    prev_was_open_paren_or_comma = False
    prev_was_object_like = False
    brackets = []
    for token_type, token in typed_tokens:
        assert token_type not in (tokenize.INDENT, tokenize.DEDENT,
                                  tokenize.NEWLINE, tokenize.NL)
        if token_type == tokenize.ENDMARKER:
            continue
        if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):
            if prev_wants_space or prev_was_space_delim:
                text.append(" ")
            text.append(token)
            prev_wants_space = False
            prev_was_space_delim = True
        else:
            if token in ("(", "[", "{"):
                brackets.append(token)
            elif brackets and token in (")", "]", "}"):
                brackets.pop()
            this_wants_space_before = (token in _python_space_before)
            this_wants_space_after = (token in _python_space_after)
            # Special case for slice syntax: foo[:10]
            # Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..."
            if token == ":" and brackets and brackets[-1] == "[":
                this_wants_space_after = False
            # Special case for foo(*args), foo(a, *args):
            if token in ("*", "**") and prev_was_open_paren_or_comma:
                this_wants_space_before = False
                this_wants_space_after = False
            # Special case for "a = foo(b=1)":
            if token == "=" and not brackets:
                this_wants_space_before = True
                this_wants_space_after = True
            # Special case for unary -, +. Our heuristic is that if we see the
            # + or - after something that looks like an object (a NAME,
            # NUMBER, STRING, or close paren) then it is probably binary,
            # otherwise it is probably unary.
            if token in ("+", "-") and not prev_was_object_like:
                this_wants_space_before = False
                this_wants_space_after = False
            if prev_wants_space or this_wants_space_before:
                text.append(" ")
            text.append(token)
            prev_wants_space = this_wants_space_after
            prev_was_space_delim = False
        if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)
            or token == ")"):
            prev_was_object_like = True
        else:
            prev_was_object_like = False
        prev_was_open_paren_or_comma = token in ("(", ",")
    return "".join(text) 
Example #18
Source File: autopep8.py    From filmkodi with Apache License 2.0 4 votes vote down vote up
def _reindent_stats(tokens):
    """Return list of (lineno, indentlevel) pairs.

    One for each stmt and comment line. indentlevel is -1 for comment lines, as
    a signal that tokenize doesn't know what to do about them; indeed, they're
    our headache!

    """
    find_stmt = 1  # Next token begins a fresh stmt?
    level = 0  # Current indent level.
    stats = []

    for t in tokens:
        token_type = t[0]
        sline = t[2][0]
        line = t[4]

        if token_type == tokenize.NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            find_stmt = 1

        elif token_type == tokenize.INDENT:
            find_stmt = 1
            level += 1

        elif token_type == tokenize.DEDENT:
            find_stmt = 1
            level -= 1

        elif token_type == tokenize.COMMENT:
            if find_stmt:
                stats.append((sline, -1))
                # But we're still looking for a new stmt, so leave
                # find_stmt alone.

        elif token_type == tokenize.NL:
            pass

        elif find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            find_stmt = 0
            if line:   # Not endmarker.
                stats.append((sline, level))

    return stats 
Example #19
Source File: typechecks.py    From h2o4gpu with Apache License 2.0 4 votes vote down vote up
def _get_lambda_source_code(lambda_fn, src):
    """Attempt to find the source code of the ``lambda_fn`` within the string ``src``."""

    def gen_lambdas():

        def gen():
            yield src + "\n"

        g = gen()
        step = 0
        tokens = []
        for tok in tokenize.generate_tokens(
                getattr(g, "next", getattr(g, "__next__", None))):
            if step == 0:
                if tok[0] == tokenize.NAME and tok[1] == "lambda":
                    step = 1
                    tokens = [tok]
                    level = 0
            elif step == 1:
                if tok[0] == tokenize.NAME:
                    tokens.append(tok)
                    step = 2
                else:
                    step = 0
            elif step == 2:
                if tok[0] == tokenize.OP and tok[1] == ":":
                    tokens.append(tok)
                    step = 3
                else:
                    step = 0
            elif step == 3:
                if level == 0 and (tok[0] == tokenize.OP and tok[1] in ",)" or
                                   tok[0] == tokenize.ENDMARKER):
                    yield tokenize.untokenize(tokens).strip()
                    step = 0
                else:
                    tokens.append(tok)
                    if tok[0] == tokenize.OP:
                        if tok[1] in "[({": level += 2
                        if tok[1] in "])}": level -= 1
        assert not tokens

    actual_code = lambda_fn.__code__.co_code
    for lambda_src in gen_lambdas():
        try:
            fn = eval(lambda_src, globals(), locals())
            if fn.__code__.co_code == actual_code:
                return lambda_src.split(":", 1)[1].strip()
        except Exception:
            pass
    return "<lambda>"