Python Examples of tokenize.COMMENT

Source File: raw_metrics.py From linter-pylama with MIT License

6 votes

def get_type(tokens, start_index):
    """return the line type : docstring, comment, code, empty"""
    i = start_index
    tok_type = tokens[i][0]
    start = tokens[i][2]
    pos = start
    line_type = None
    while i < len(tokens) and tokens[i][2][0] == start[0]:
        tok_type = tokens[i][0]
        pos = tokens[i][3]
        if line_type is None:
            if tok_type == tokenize.STRING:
                line_type = 'docstring_lines'
            elif tok_type == tokenize.COMMENT:
                line_type = 'comment_lines'
            elif tok_type in JUNK:
                pass
            else:
                line_type = 'code_lines'
        i += 1
    if line_type is None:
        line_type = 'empty_lines'
    elif i < len(tokens) and tokens[i][0] == tokenize.NEWLINE:
        i += 1
    return i, pos[0] - start[0] + 1, line_type

Source File: __init__.py From pyta with GNU General Public License v3.0

6 votes

def _verify_pre_check(filepath):
    """Check student code for certain issues."""
    # Make sure the program doesn't crash for students.
    # Could use some improvement for better logging and error reporting.
    try:
        # Check for inline "pylint:" comment, which may indicate a student
        # trying to disable a check.
        with tokenize.open(os.path.expanduser(filepath)) as f:
            for tok_type, content, _, _, _ in tokenize.generate_tokens(f.readline):
                if tok_type != tokenize.COMMENT:
                    continue
                match = pylint.constants.OPTION_RGX.search(content)
                if match is not None:
                    print('[ERROR] String "pylint:" found in comment. ' +
                          'No check run on file `{}.`\n'.format(filepath))
                    return False
    except IndentationError as e:
        print('[ERROR] python_ta could not check your code due to an ' +
              'indentation error at line {}.'.format(e.lineno))
        return False
    except tokenize.TokenError as e:
        print('[ERROR] python_ta could not check your code due to a ' +
              'syntax error in your file.')
        return False
    return True

Source File: config_scope.py From sacred with MIT License

6 votes

def find_doc_for(ast_entry, body_lines):
    lineno = ast_entry.lineno - 1
    line_io = io.BytesIO(body_lines[lineno].encode())
    try:
        tokens = tokenize(line_io.readline) or []
        line_comments = [t.string for t in tokens if t.type == COMMENT]

        if line_comments:
            formatted_lcs = [l[1:].strip() for l in line_comments]
            filtered_lcs = [l for l in formatted_lcs if not is_ignored(l)]
            if filtered_lcs:
                return filtered_lcs[0]
    except TokenError:
        pass

    lineno -= 1
    while lineno >= 0:
        if iscomment(body_lines[lineno]):
            comment = body_lines[lineno].strip("# ")
            if not is_ignored(comment):
                return comment
        if not body_lines[lineno].strip() == "":
            return None
        lineno -= 1
    return None

Source File: strings.py From python-netsurv with MIT License

6 votes

def process_tokens(self, tokens):
        encoding = "ascii"
        for i, (tok_type, token, start, _, line) in enumerate(tokens):
            if tok_type == tokenize.ENCODING:
                # this is always the first token processed
                encoding = token
            elif tok_type == tokenize.STRING:
                # 'token' is the whole un-parsed token; we can look at the start
                # of it to see whether it's a raw or unicode string etc.
                self.process_string_token(token, start[0])
                # We figure the next token, ignoring comments & newlines:
                j = i + 1
                while j < len(tokens) and tokens[j].type in (
                    tokenize.NEWLINE,
                    tokenize.NL,
                    tokenize.COMMENT,
                ):
                    j += 1
                next_token = tokens[j] if j < len(tokens) else None
                if encoding != "ascii":
                    # We convert `tokenize` character count into a byte count,
                    # to match with astroid `.col_offset`
                    start = (start[0], len(line[: start[1]].encode(encoding)))
                self.string_tokens[start] = (str_eval(token), next_token)

Source File: pygettext.py From oss-ftp with MIT License

6 votes

def __openseen(self, ttype, tstring, lineno):
        if ttype == tokenize.OP and tstring == ')':
            # We've seen the last of the translatable strings.  Record the
            # line number of the first line of the strings and update the list
            # of messages seen.  Reset state for the next batch.  If there
            # were no strings inside _(), then just ignore this entry.
            if self.__data:
                self.__addentry(EMPTYSTRING.join(self.__data))
            self.__state = self.__waiting
        elif ttype == tokenize.STRING:
            self.__data.append(safe_eval(tstring))
        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
                           token.NEWLINE, tokenize.NL]:
            # warn if we see anything else than STRING or whitespace
            print >> sys.stderr, _(
                '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
                ) % {
                'token': tstring,
                'file': self.__curfile,
                'lineno': self.__lineno
                }
            self.__state = self.__waiting

Source File: pycodestyle.py From linter-pylama with MIT License

6 votes

def _break_around_binary_operators(tokens):
    """Private function to reduce duplication.

    This factors out the shared details between
    :func:`break_before_binary_operator` and
    :func:`break_after_binary_operator`.
    """
    line_break = False
    unary_context = True
    # Previous non-newline token types and text
    previous_token_type = None
    previous_text = None
    for token_type, text, start, end, line in tokens:
        if token_type == tokenize.COMMENT:
            continue
        if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
            line_break = True
        else:
            yield (token_type, text, previous_token_type, previous_text,
                   line_break, unary_context, start)
            unary_context = text in '([{,;'
            line_break = False
            previous_token_type = token_type
            previous_text = text

Source File: autopep8.py From python-netsurv with MIT License

6 votes

def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end)

Source File: pycodestyle.py From python-netsurv with MIT License

6 votes

def _break_around_binary_operators(tokens):
    """Private function to reduce duplication.

    This factors out the shared details between
    :func:`break_before_binary_operator` and
    :func:`break_after_binary_operator`.
    """
    line_break = False
    unary_context = True
    # Previous non-newline token types and text
    previous_token_type = None
    previous_text = None
    for token_type, text, start, end, line in tokens:
        if token_type == tokenize.COMMENT:
            continue
        if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
            line_break = True
        else:
            yield (token_type, text, previous_token_type, previous_text,
                   line_break, unary_context, start)
            unary_context = text in '([{,;'
            line_break = False
            previous_token_type = token_type
            previous_text = text

Source File: autopep8.py From python-netsurv with MIT License

6 votes

def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end)

Source File: gradelib.py From xqueue-watcher with GNU Affero General Public License v3.0

6 votes

def _count_tokens(code, string):
    """
    Return a count of how many times `string` appears as a keyword in `code`.
    """
    count = 0

    try:
        for ttyp, ttok, __, __, __ in _tokens(code):
            if ttyp in (tokenize.COMMENT, tokenize.STRING):
                continue
            if ttok == string:
                count += 1
    except:
        # The input code was bad in some way. It will fail later on.
        pass
    return count

Source File: autopep8.py From PyDev.Debugger with Eclipse Public License 1.0

6 votes

def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end)

Source File: gradelib.py From xqueue-watcher with GNU Affero General Public License v3.0

6 votes

def count_non_comment_lines(at_least=None, at_most=None, exactly=None, error_msg=None):
    """
    Returns an input check function that checks that the number of non-comment,
    non-blank source lines conforms to the rules in the arguments.
    """
    def check(code):
        linenums = set()
        for ttyp, ttok, (srow, __), __, __ in _tokens(code):
            if ttyp in (tokenize.COMMENT, tokenize.STRING):
                # Comments and strings don't count toward line count. If a string
                # is the only thing on a line, then it's probably a docstring, so
                # don't count it.
                continue
            if not ttok.strip():
                # Tokens that are only whitespace don't count.
                continue
            linenums.add(srow)
        num = len(linenums)
        return _check_occurs(None, num, at_least, at_most, exactly, error_msg)
    return check

Source File: pygettext.py From oss-ftp with MIT License

6 votes

def __waiting(self, ttype, tstring, lineno):
        opts = self.__options
        # Do docstring extractions, if enabled
        if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
            # module docstring?
            if self.__freshmodule:
                if ttype == tokenize.STRING:
                    self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
                    self.__freshmodule = 0
                elif ttype not in (tokenize.COMMENT, tokenize.NL):
                    self.__freshmodule = 0
                return
            # class docstring?
            if ttype == tokenize.NAME and tstring in ('class', 'def'):
                self.__state = self.__suiteseen
                return
        if ttype == tokenize.NAME and tstring in opts.keywords:
            self.__state = self.__keywordseen

Source File: pycodestyle.py From python-netsurv with MIT License

6 votes

def _break_around_binary_operators(tokens):
    """Private function to reduce duplication.

    This factors out the shared details between
    :func:`break_before_binary_operator` and
    :func:`break_after_binary_operator`.
    """
    line_break = False
    unary_context = True
    # Previous non-newline token types and text
    previous_token_type = None
    previous_text = None
    for token_type, text, start, end, line in tokens:
        if token_type == tokenize.COMMENT:
            continue
        if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
            line_break = True
        else:
            yield (token_type, text, previous_token_type, previous_text,
                   line_break, unary_context, start)
            unary_context = text in '([{,;'
            line_break = False
            previous_token_type = token_type
            previous_text = text

Source File: strings.py From python-netsurv with MIT License

6 votes

def process_tokens(self, tokens):
        encoding = "ascii"
        for i, (tok_type, token, start, _, line) in enumerate(tokens):
            if tok_type == tokenize.ENCODING:
                # this is always the first token processed
                encoding = token
            elif tok_type == tokenize.STRING:
                # 'token' is the whole un-parsed token; we can look at the start
                # of it to see whether it's a raw or unicode string etc.
                self.process_string_token(token, start[0])
                # We figure the next token, ignoring comments & newlines:
                j = i + 1
                while j < len(tokens) and tokens[j].type in (
                    tokenize.NEWLINE,
                    tokenize.NL,
                    tokenize.COMMENT,
                ):
                    j += 1
                next_token = tokens[j] if j < len(tokens) else None
                if encoding != "ascii":
                    # We convert `tokenize` character count into a byte count,
                    # to match with astroid `.col_offset`
                    start = (start[0], len(line[: start[1]].encode(encoding)))
                self.string_tokens[start] = (str_eval(token), next_token)

Source File: reindent.py From Upgrade-to-Python3 with Apache License 2.0

5 votes

def tokeneater(self, type, token, slinecol, end, line,
                   INDENT=tokenize.INDENT,
                   DEDENT=tokenize.DEDENT,
                   NEWLINE=tokenize.NEWLINE,
                   COMMENT=tokenize.COMMENT,
                   NL=tokenize.NL):

        if type == NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            self.find_stmt = 1

        elif type == INDENT:
            self.find_stmt = 1
            self.level += 1

        elif type == DEDENT:
            self.find_stmt = 1
            self.level -= 1

        elif type == COMMENT:
            if self.find_stmt:
                self.stats.append((slinecol[0], -1))
                # but we're still looking for a new stmt, so leave
                # find_stmt alone

        elif type == NL:
            pass

        elif self.find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            self.find_stmt = 0
            if line:   # not endmarker
                self.stats.append((slinecol[0], self.level))


# Count number of leading blanks.

Source File: codecontainer.py From ironpython2 with Apache License 2.0

5 votes

def _ProcessToken(self, type, token, spos, epos, line):
        srow, scol = spos
        erow, ecol = epos
        self.GetText() # Prime us.
        linenum = srow - 1 # Lines zero based for us too.
        realCharPos = self.lineOffsets[linenum] + scol
        numskipped = realCharPos - self.lastPos
        if numskipped==0:
            pass
        elif numskipped==1:
            self.attrs.append(axdebug.SOURCETEXT_ATTR_COMMENT)
        else:
            self.attrs.append((axdebug.SOURCETEXT_ATTR_COMMENT, numskipped))
        kwSize = len(token)
        self.lastPos = realCharPos + kwSize
        attr = 0

        if type==tokenize.NAME:
            if token in _keywords:
                attr = axdebug.SOURCETEXT_ATTR_KEYWORD
        elif type==tokenize.STRING:
            attr = axdebug.SOURCETEXT_ATTR_STRING
        elif type==tokenize.NUMBER:
            attr = axdebug.SOURCETEXT_ATTR_NUMBER
        elif type==tokenize.OP:
            attr = axdebug.SOURCETEXT_ATTR_OPERATOR
        elif type==tokenize.COMMENT:
            attr = axdebug.SOURCETEXT_ATTR_COMMENT
        # else attr remains zero...
        if kwSize==0:
            pass
        elif kwSize==1:
            self.attrs.append(attr)
        else:
            self.attrs.append((attr, kwSize))

Source File: inspect.py From BinderFilter with MIT License

5 votes

def tokeneater(self, type, token, srow_scol, erow_ecol, line):
        srow, scol = srow_scol
        erow, ecol = erow_ecol
        if not self.started:
            # look for the first "def", "class" or "lambda"
            if token in ("def", "class", "lambda"):
                if token == "lambda":
                    self.islambda = True
                self.started = True
            self.passline = True    # skip to the end of the line
        elif type == tokenize.NEWLINE:
            self.passline = False   # stop skipping when a NEWLINE is seen
            self.last = srow
            if self.islambda:       # lambdas always end at the first NEWLINE
                raise EndOfBlock
        elif self.passline:
            pass
        elif type == tokenize.INDENT:
            self.indent = self.indent + 1
            self.passline = True
        elif type == tokenize.DEDENT:
            self.indent = self.indent - 1
            # the end of matching indent/dedent pairs end a block
            # (note that this only works for "def"/"class" blocks,
            #  not e.g. for "if: else:" or "try: finally:" blocks)
            if self.indent <= 0:
                raise EndOfBlock
        elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL):
            # any other token on the same indentation level end the previous
            # block as well, except the pseudo-tokens COMMENT and NL.
            raise EndOfBlock

Source File: tokens.py From vnpy_crypto with MIT License

5 votes

def python_tokenize(code):
    # Since formulas can only contain Python expressions, and Python
    # expressions cannot meaningfully contain newlines, we'll just remove all
    # the newlines up front to avoid any complications:
    code = code.replace("\n", " ").strip()
    it = tokenize.generate_tokens(StringIO(code).readline)
    try:
        for (pytype, string, (_, start), (_, end), code) in it:
            if pytype == tokenize.ENDMARKER:
                break
            origin = Origin(code, start, end)
            assert pytype not in (tokenize.NL, tokenize.NEWLINE)
            if pytype == tokenize.ERRORTOKEN:
                raise PatsyError("error tokenizing input "
                                 "(maybe an unclosed string?)",
                                 origin)
            if pytype == tokenize.COMMENT:
                raise PatsyError("comments are not allowed", origin)
            yield (pytype, string, origin)
        else: # pragma: no cover
            raise ValueError("stream ended without ENDMARKER?!?")
    except tokenize.TokenError as e:
        # TokenError is raised iff the tokenizer thinks that there is
        # some sort of multi-line construct in progress (e.g., an
        # unclosed parentheses, which in Python lets a virtual line
        # continue past the end of the physical line), and it hits the
        # end of the source text. We have our own error handling for
        # such cases, so just treat this as an end-of-stream.
        # 
        # Just in case someone adds some other error case:
        assert e.args[0].startswith("EOF in multi-line")
        return

Source File: reindent.py From D-VAE with MIT License

5 votes

def tokeneater(self, type, token, pos, end, line,
                   INDENT=tokenize.INDENT,
                   DEDENT=tokenize.DEDENT,
                   NEWLINE=tokenize.NEWLINE,
                   COMMENT=tokenize.COMMENT,
                   NL=tokenize.NL):
        sline, scol = pos
        if type == NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            self.find_stmt = 1

        elif type == INDENT:
            self.find_stmt = 1
            self.level += 1

        elif type == DEDENT:
            self.find_stmt = 1
            self.level -= 1

        elif type == COMMENT:
            if self.find_stmt:
                self.stats.append((sline, -1))
                # but we're still looking for a new stmt, so leave
                # find_stmt alone

        elif type == NL:
            pass

        elif self.find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            self.find_stmt = 0
            if line:   # not endmarker
                self.stats.append((sline, self.level))

# Count number of leading blanks.

Source File: inspect.py From ironpython2 with Apache License 2.0

5 votes

def tokeneater(self, type, token, srow_scol, erow_ecol, line):
        srow, scol = srow_scol
        erow, ecol = erow_ecol
        if not self.started:
            # look for the first "def", "class" or "lambda"
            if token in ("def", "class", "lambda"):
                if token == "lambda":
                    self.islambda = True
                self.started = True
            self.passline = True    # skip to the end of the line
        elif type == tokenize.NEWLINE:
            self.passline = False   # stop skipping when a NEWLINE is seen
            self.last = srow
            if self.islambda:       # lambdas always end at the first NEWLINE
                raise EndOfBlock
        elif self.passline:
            pass
        elif type == tokenize.INDENT:
            self.indent = self.indent + 1
            self.passline = True
        elif type == tokenize.DEDENT:
            self.indent = self.indent - 1
            # the end of matching indent/dedent pairs end a block
            # (note that this only works for "def"/"class" blocks,
            #  not e.g. for "if: else:" or "try: finally:" blocks)
            if self.indent <= 0:
                raise EndOfBlock
        elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL):
            # any other token on the same indentation level end the previous
            # block as well, except the pseudo-tokens COMMENT and NL.
            raise EndOfBlock

Source File: utils.py From typed-argument-parser with MIT License

5 votes

def get_class_variables(cls: type) -> OrderedDict:
    """Returns an OrderedDict mapping class variables to their additional information (currently just comments)."""
    # Get mapping from line number to tokens
    line_to_tokens = source_line_to_tokens(cls)

    # Get class variable column number
    class_variable_column = get_class_column(cls)

    # Extract class variables
    variable_to_comment = OrderedDict()
    for tokens in line_to_tokens.values():
        for i, token in enumerate(tokens):

            # Skip whitespace
            if token['token'].strip() == '':
                continue

            # Match class variable
            if (token['token_type'] == tokenize.NAME and
                    token['start_column'] == class_variable_column and
                    len(tokens) > i and
                    tokens[i + 1]['token'] in ['=', ':']):

                class_variable = token['token']
                variable_to_comment[class_variable] = {'comment': ''}

                # Find the comment (if it exists)
                for j in range(i + 1, len(tokens)):
                    if tokens[j]['token_type'] == tokenize.COMMENT:
                        # Leave out "#" and whitespace from comment
                        variable_to_comment[class_variable]['comment'] = tokens[j]['token'][1:].strip()
                        break

            break

    return variable_to_comment

Source File: pycodestyle.py From python-netsurv with MIT License

5 votes

def whitespace_before_comment(logical_line, tokens):
    r"""Separate inline comments by at least two spaces.

    An inline comment is a comment on the same line as a statement.
    Inline comments should be separated by at least two spaces from the
    statement. They should start with a # and a single space.

    Each line of a block comment starts with a # and a single space
    (unless it is indented text inside the comment).

    Okay: x = x + 1  # Increment x
    Okay: x = x + 1    # Increment x
    Okay: # Block comment
    E261: x = x + 1 # Increment x
    E262: x = x + 1  #Increment x
    E262: x = x + 1  #  Increment x
    E265: #Block comment
    E266: ### Block comment
    """
    prev_end = (0, 0)
    for token_type, text, start, end, line in tokens:
        if token_type == tokenize.COMMENT:
            inline_comment = line[:start[1]].strip()
            if inline_comment:
                if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
                    yield (prev_end,
                           "E261 at least two spaces before inline comment")
            symbol, sp, comment = text.partition(' ')
            bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
            if inline_comment:
                if bad_prefix or comment[:1] in WHITESPACE:
                    yield start, "E262 inline comment should start with '# '"
            elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
                if bad_prefix != '#':
                    yield start, "E265 block comment should start with '# '"
                elif comment:
                    yield start, "E266 too many leading '#' for block comment"
        elif token_type != tokenize.NL:
            prev_end = end

Source File: format.py From python-netsurv with MIT License

5 votes

def _token_followed_by_eol(tokens, position):
    return (
        tokens.type(position + 1) == tokenize.NL
        or tokens.type(position + 1) == tokenize.COMMENT
        and tokens.type(position + 2) == tokenize.NL
    )

Source File: format.py From python-netsurv with MIT License

5 votes

def _last_token_on_line_is(tokens, line_end, token):
    return (
        line_end > 0
        and tokens.token(line_end - 1) == token
        or line_end > 1
        and tokens.token(line_end - 2) == token
        and tokens.type(line_end - 1) == tokenize.COMMENT
    )

Source File: misc.py From python-netsurv with MIT License

5 votes

def process_tokens(self, tokens):
        """inspect the source to find fixme problems"""
        if not self.config.notes:
            return
        comments = (
            token_info for token_info in tokens if token_info.type == tokenize.COMMENT
        )
        for comment in comments:
            comment_text = comment.string[1:].lstrip()  # trim '#' and whitespaces

            # handle pylint disable clauses
            disable_option_match = OPTION_RGX.search(comment_text)
            if disable_option_match:
                try:
                    _, value = disable_option_match.group(1).split("=", 1)
                    values = [_val.strip().upper() for _val in value.split(",")]
                    if set(values) & set(self.config.notes):
                        continue
                except ValueError:
                    self.add_message(
                        "bad-inline-option",
                        args=disable_option_match.group(1).strip(),
                        line=comment.string,
                    )
                    continue

            # emit warnings if necessary
            match = self._fixme_pattern.search("#" + comment_text.lower())
            if match:
                note = match.group(1)
                self.add_message(
                    "fixme",
                    col_offset=comment.string.lower().index(note.lower()),
                    args=comment_text,
                    line=comment.start[0],
                )

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def commented_out_code_lines(source):
    """Return line numbers of comments that are likely code.

    Commented-out code is bad practice, but modifying it just adds even
    more clutter.

    """
    line_numbers = []
    try:
        for t in generate_tokens(source):
            token_type = t[0]
            token_string = t[1]
            start_row = t[2][0]
            line = t[4]

            # Ignore inline comments.
            if not line.lstrip().startswith('#'):
                continue

            if token_type == tokenize.COMMENT:
                stripped_line = token_string.lstrip('#').strip()
                if (
                    ' ' in stripped_line and
                    '#' not in stripped_line and
                    check_syntax(stripped_line)
                ):
                    line_numbers.append(start_row)
    except (SyntaxError, tokenize.TokenError):
        pass

    return line_numbers

Source File: autopep8.py From python-netsurv with MIT License

5 votes

def reflow(
        self, reflowed_lines, continued_indent, extent,
        break_after_open_bracket=False,
        is_list_comp_or_if_expr=False,
        next_is_dot=False
    ):
        if self._atom.token_type == tokenize.COMMENT:
            reflowed_lines.add_comment(self)
            return

        total_size = extent if extent else self.size

        if self._atom.token_string not in ',:([{}])':
            # Some atoms will need an extra 1-sized space token after them.
            total_size += 1

        prev_item = reflowed_lines.previous_item()
        if (
            not is_list_comp_or_if_expr and
            not reflowed_lines.fits_on_current_line(total_size) and
            not (next_is_dot and
                 reflowed_lines.fits_on_current_line(self.size + 1)) and
            not reflowed_lines.line_empty() and
            not self.is_colon and
            not (prev_item and prev_item.is_name and
                 unicode(self) == '(')
        ):
            # Start a new line if there is already something on the line and
            # adding this atom would make it go over the max line length.
            reflowed_lines.add_line_break(continued_indent)
        else:
            reflowed_lines.add_space_if_needed(unicode(self))

        reflowed_lines.add(self, len(continued_indent),
                           break_after_open_bracket)

Source File: inspect.py From jawfish with MIT License

5 votes

def tokeneater(self, type, token, srowcol, erowcol, line):
        if not self.started:
            # look for the first "def", "class" or "lambda"
            if token in ("def", "class", "lambda"):
                if token == "lambda":
                    self.islambda = True
                self.started = True
            self.passline = True    # skip to the end of the line
        elif type == tokenize.NEWLINE:
            self.passline = False   # stop skipping when a NEWLINE is seen
            self.last = srowcol[0]
            if self.islambda:       # lambdas always end at the first NEWLINE
                raise EndOfBlock
        elif self.passline:
            pass
        elif type == tokenize.INDENT:
            self.indent = self.indent + 1
            self.passline = True
        elif type == tokenize.DEDENT:
            self.indent = self.indent - 1
            # the end of matching indent/dedent pairs end a block
            # (note that this only works for "def"/"class" blocks,
            #  not e.g. for "if: else:" or "try: finally:" blocks)
            if self.indent <= 0:
                raise EndOfBlock
        elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL):
            # any other token on the same indentation level end the previous
            # block as well, except the pseudo-tokens COMMENT and NL.
            raise EndOfBlock

Source File: pycodestyle.py From python-netsurv with MIT License

5 votes

def explicit_line_join(logical_line, tokens):
    r"""Avoid explicit line join between brackets.

    The preferred way of wrapping long lines is by using Python's
    implied line continuation inside parentheses, brackets and braces.
    Long lines can be broken over multiple lines by wrapping expressions
    in parentheses.  These should be used in preference to using a
    backslash for line continuation.

    E502: aaa = [123, \\n       123]
    E502: aaa = ("bbb " \\n       "ccc")

    Okay: aaa = [123,\n       123]
    Okay: aaa = ("bbb "\n       "ccc")
    Okay: aaa = "bbb " \\n    "ccc"
    Okay: aaa = 123  # \\
    """
    prev_start = prev_end = parens = 0
    comment = False
    backslash = None
    for token_type, text, start, end, line in tokens:
        if token_type == tokenize.COMMENT:
            comment = True
        if start[0] != prev_start and parens and backslash and not comment:
            yield backslash, "E502 the backslash is redundant between brackets"
        if end[0] != prev_end:
            if line.rstrip('\r\n').endswith('\\'):
                backslash = (end[0], len(line.splitlines()[-1]) - 1)
            else:
                backslash = None
            prev_start = prev_end = end[0]
        else:
            prev_start = start[0]
        if token_type == tokenize.OP:
            if text in '([{':
                parens += 1
            elif text in ')]}':
                parens -= 1

Python tokenize.COMMENT Examples