Python Examples of sqlparse.tokens.Punctuation

Source File: parseutils.py From litecli with BSD 3-Clause "New" or "Revised" License

7 votes

def extract_tables(sql):
    """Extract the table names from an SQL statment.

    Returns a list of (schema, table, alias) tuples

    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return []

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == "insert"
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)
    return list(extract_table_identifiers(stream))

Source File: grouping.py From SublimeText-SQLTools with GNU General Public License v3.0

7 votes

def group_identifier_list(tlist):
    m_role = T.Keyword, ('null', 'role')
    sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
              sql.IdentifierList, sql.Operation)
    ttypes = (T_NUMERICAL + T_STRING + T_NAME +
              (T.Keyword, T.Comment, T.Wildcard))

    def match(token):
        return token.match(T.Punctuation, ',')

    def valid(token):
        return imt(token, i=sqlcls, m=m_role, t=ttypes)

    def post(tlist, pidx, tidx, nidx):
        return pidx, nidx

    valid_prev = valid_next = valid
    _group(tlist, sql.IdentifierList, match,
           valid_prev, valid_next, post, extend=True)

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

6 votes

def _process(self, tlist):
        token = self._get_next_comment(tlist)
        while token:
            tidx = tlist.token_index(token)
            prev = tlist.token_prev(tidx, False)
            next_ = tlist.token_next(tidx, False)
            # Replace by whitespace if prev and next exist and if they're not
            # whitespaces. This doesn't apply if prev or next is a paranthesis.
            if (prev is not None and next_ is not None
                and not prev.is_whitespace() and not next_.is_whitespace()
                and not (prev.match(T.Punctuation, '(')
                         or next_.match(T.Punctuation, ')'))):
                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
            else:
                tlist.tokens.pop(tidx)
            token = self._get_next_comment(tlist)

Source File: filters.py From codenn with MIT License

6 votes

def _process(self, tlist):
        token = self._get_next_comment(tlist)
        while token:
            tidx = tlist.token_index(token)
            prev = tlist.token_prev(tidx, False)
            next_ = tlist.token_next(tidx, False)
            # Replace by whitespace if prev and next exist and if they're not
            # whitespaces. This doesn't apply if prev or next is a paranthesis.
            if (prev is not None and next_ is not None
                and not prev.is_whitespace() and not next_.is_whitespace()
                and not (prev.match(T.Punctuation, '(')
                         or next_.match(T.Punctuation, ')'))):
                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
            else:
                tlist.tokens.pop(tidx)
            token = self._get_next_comment(tlist)

Source File: mysql_handler.py From schematizer with Apache License 2.0

6 votes

def _get_create_definition_tokens(self, stmt):
        lparen_token = stmt.token_next_by_type(0, T.Punctuation)
        if not lparen_token or lparen_token.value != '(':
            yield

        index = stmt.token_index(lparen_token)
        def_tokens = []
        for token in stmt.tokens[index + 1:]:
            if token.value == ')':
                break

            if isinstance(token, sql.ColumnsDefinition):
                yield token
            elif token.match(T.Punctuation, ','):
                yield def_tokens
                def_tokens = []
            elif not token.is_whitespace():
                def_tokens.append(token)

        if def_tokens:
            yield def_tokens

Source File: grouping.py From SublimeText-SQLTools with GNU General Public License v3.0

6 votes

def group_period(tlist):
    def match(token):
        return token.match(T.Punctuation, '.')

    def valid_prev(token):
        sqlcls = sql.SquareBrackets, sql.Identifier
        ttypes = T.Name, T.String.Symbol
        return imt(token, i=sqlcls, t=ttypes)

    def valid_next(token):
        # issue261, allow invalid next token
        return True

    def post(tlist, pidx, tidx, nidx):
        # next_ validation is being performed here. issue261
        sqlcls = sql.SquareBrackets, sql.Function
        ttypes = T.Name, T.String.Symbol, T.Wildcard
        next_ = tlist[nidx] if nidx is not None else None
        valid_next = imt(next_, i=sqlcls, t=ttypes)

        return (pidx, nidx) if valid_next else (pidx, tidx)

    _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)

Source File: sql_util.py From editsql with MIT License

6 votes

def strip_whitespace_front(token_list):
    new_token_list = []
    found_valid = False

    for token in token_list:
        if not (token.is_whitespace or token.ttype ==
                token_types.Punctuation) or found_valid:
            found_valid = True
            new_token_list.append(token)

    return new_token_list

# strip_whitespace
# Strips whitespace from a token list.
#
# Inputs:
#    token_list: the token list.
#
# Outputs:
#    new token list with no whitespace/punctuation surrounding.

Source File: sql_util.py From atis with MIT License

6 votes

def strip_whitespace_front(token_list):
    new_token_list = []
    found_valid = False

    for token in token_list:
        if not (token.is_whitespace or token.ttype ==
                token_types.Punctuation) or found_valid:
            found_valid = True
            new_token_list.append(token)

    return new_token_list

# strip_whitespace
# Strips whitespace from a token list.
#
# Inputs:
#    token_list: the token list.
#
# Outputs:
#    new token list with no whitespace/punctuation surrounding.

Source File: tables.py From pgcli with BSD 3-Clause "New" or "Revised" License

6 votes

def extract_tables(sql):
    """Extract the table names from an SQL statment.

    Returns a list of TableReference namedtuples

    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return ()

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == "insert"
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)

    # Kludge: sqlparse mistakenly identifies insert statements as
    # function calls due to the parenthesized column list, e.g. interprets
    # "insert into foo (bar, baz)" as a function call to foo with arguments
    # (bar, baz). So don't allow any identifiers in insert statements
    # to have is_function=True
    identifiers = extract_table_identifiers(stream, allow_functions=not insert_stmt)
    # In the case 'sche.<cursor>', we get an empty TableReference; remove that
    return tuple(i for i in identifiers if i.name)

Source File: extract_tables.py From Archery with Apache License 2.0

6 votes

def extract_tables(sql):
    """Extract the table names from an SQL statment.
    Returns a list of TableReference namedtuples
    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return ()

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == "insert"
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)

    # Kludge: sqlparse mistakenly identifies insert statements as
    # function calls due to the parenthesized column list, e.g. interprets
    # "insert into foo (bar, baz)" as a function call to foo with arguments
    # (bar, baz). So don't allow any identifiers in insert statements
    # to have is_function=True
    identifiers = extract_table_identifiers(stream, allow_functions=not insert_stmt)
    # In the case 'sche.<cursor>', we get an empty TableReference; remove that
    return tuple(i for i in identifiers if i.name)

Source File: sql_parse.py From incubator-superset with Apache License 2.0

6 votes

def _extract_limit_from_query(statement: TokenList) -> Optional[int]:
    """
    Extract limit clause from SQL statement.

    :param statement: SQL statement
    :return: Limit extracted from query, None if no limit present in statement
    """
    idx, _ = statement.token_next_by(m=(Keyword, "LIMIT"))
    if idx is not None:
        _, token = statement.token_next(idx=idx)
        if token:
            if isinstance(token, IdentifierList):
                # In case of "LIMIT <offset>, <limit>", find comma and extract
                # first succeeding non-whitespace token
                idx, _ = token.token_next_by(m=(sqlparse.tokens.Punctuation, ","))
                _, token = token.token_next(idx=idx)
            if token and token.ttype == sqlparse.tokens.Literal.Number.Integer:
                return int(token.value)
    return None

Source File: others.py From SublimeText-SQLTools with GNU General Public License v3.0

6 votes

def _process(tlist):
        def get_next_comment():
            # TODO(andi) Comment types should be unified, see related issue38
            return tlist.token_next_by(i=sql.Comment, t=T.Comment)

        tidx, token = get_next_comment()
        while token:
            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
            nidx, next_ = tlist.token_next(tidx, skip_ws=False)
            # Replace by whitespace if prev and next exist and if they're not
            # whitespaces. This doesn't apply if prev or next is a paranthesis.
            if (prev_ is None or next_ is None or
                    prev_.is_whitespace or prev_.match(T.Punctuation, '(') or
                    next_.is_whitespace or next_.match(T.Punctuation, ')')):
                tlist.tokens.remove(token)
            else:
                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')

            tidx, token = get_next_comment()

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

6 votes

def __custom_process_parenthesis_order(self, parenthesis):
        open_punc = parenthesis.token_next_match(0, T.Punctuation, '(')
        close_punc = parenthesis.token_next_match(open_punc, T.Punctuation, ')')

        self.indent += 2
        parenthesis.insert_after(open_punc, self.nl())

        for token in parenthesis.tokens_between(open_punc, close_punc)[1:-1]:
            if isinstance(token, Phrase):
                parenthesis.insert_before(token, self.nl())
                self._process_phrase(token, kwds=False)
                parenthesis.insert_after(token, self.nl_with_indent(1))
            elif isinstance(token, sql.Identifier) and len(token.tokens)==1 and isinstance(token.tokens[0], Phrase):
                # 中がPhraseのIdentifier
                child_token = token.tokens[0]
                parenthesis.insert_before(token, self.nl())
                self._process_phrase(child_token, kwds=False)
                parenthesis.insert_after(token, self.nl_with_indent(1))
            elif token.is_group():
                self._process(token)

        self.indent -= 1
        parenthesis.insert_before(close_punc, self.nl())
        self.indent -= 1

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

6 votes

def _stripws_default(self, tlist):
        last_was_ws = False
        last_ws_token = None
        last_was_punctuation = False
        for token in tlist.tokens[:]:
            if token.is_whitespace():
                if last_was_ws or last_was_punctuation:  # 前tokenがwhitespaceまたはPunctuationの場合、空白を除去
                    tlist.tokens.remove(token)
                    continue
                else:
                    token.value = "\t"
            if tu.is_punctuation(token):
                if last_ws_token:
                    tlist.tokens.remove(last_ws_token) # Punctuation前のwhitespaceを除去
            last_was_ws = token.is_whitespace()
            last_ws_token = token if last_was_ws else None
            last_was_punctuation = tu.is_punctuation(token)

        self.__custom_stripws_tokenlist(tlist)

Source File: tokenutils.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

6 votes

def is_dmlddl_parenthesis(token):
    """
        DMLかDDLの括弧判定
    """
    if not is_parenthesis(token):
        return False


    open_punc = token.token_next_match(0, T.Punctuation, '(')
    first = token_next_enable(token, open_punc)
    if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
        return True

    if is_with(first):
        return True

    if is_parenthesis(first):
        return is_dmlddl_parenthesis(first)

    return False

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def __process_parenthesis_for_join_using(self, tlist):
        open_punc = tlist.token_next_match(0, T.Punctuation, '(')
        tlist.insert_after(open_punc, self.nl_with_indent(1))
        self._process_default(tlist)

        close_punc = tlist.token_next_match(open_punc, T.Punctuation, ')')
        tlist.insert_before(close_punc, self.nl())

Source File: parseutils.py From athenacli with BSD 3-Clause "New" or "Revised" License

5 votes

def extract_tables(sql):
    """Extract the table names from an SQL statment.
    Returns a list of (schema, table, alias) tuples
    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return []

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == 'insert'
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)
    return list(extract_table_identifiers(stream))

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def __process_parenthesis_for_with_query_cols(self, tlist):
        """
            WITHのqueryカラム名
        """
        open_punc = tlist.token_next_match(0, T.Punctuation, '(')
        self.indent += 1
        tlist.insert_after(open_punc, self.nl())
        self._process_default(tlist)

        close_punc = tlist.token_next_match(open_punc, T.Punctuation, ')')
        tlist.insert_before(close_punc, self.nl())
        self.indent -= 1

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def __process_parenthesis_for_insert(self, tlist):
        open_punc = tlist.token_next_match(0, T.Punctuation, '(')
        tlist.insert_after(open_punc, self.nl())
        self._process_default(tlist)

        close_punc = tlist.token_next_match(open_punc, T.Punctuation, ')')
        tlist.insert_before(close_punc, self.nl())

Source File: sql.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def get_identifiers(self):
        """Returns the identifiers.

        Whitespaces and punctuations are not included in this generator.
        """
        for x in self.tokens:
            if not x.is_whitespace() and not x.match(T.Punctuation, ','):
                yield x

Source File: sql.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def get_typecast(self):
        """Returns the typecast or ``None`` of this object as a string."""
        marker = self.token_next_match(0, T.Punctuation, '::')
        if marker is None:
            return None
        next_ = self.token_next(self.token_index(marker), False)
        if next_ is None:
            return None
        return str(next_)

Source File: sql.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def get_parent_name(self):
        """Return name of the parent object if any.

        A parent object is identified by the first occuring dot.
        """
        dot = self.token_next_match(0, T.Punctuation, '.')
        if dot is None:
            return None
        prev_ = self.token_prev(self.token_index(dot))
        if prev_ is None:  # something must be verry wrong here..
            return None
        return self._remove_quotes(prev_.value)

Source File: sql.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def get_real_name(self):
        """Returns the real name (object name) of this identifier."""
        # a.b
        dot = self.token_next_match(0, T.Punctuation, '.')
        if dot is not None:
            return self._get_first_name(self.token_index(dot))

        return self._get_first_name()

Source File: grouping.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
                    cls, include_semicolon=False, recurse=False):

    #bugfix recurse
    # [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
    #                  cls, include_semicolon) for sgroup in tlist.get_sublists()
    [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
                     cls, include_semicolon, recurse) for sgroup in tlist.get_sublists()
     if recurse]
    if isinstance(tlist, cls):
        idx = 1
    else:
        idx = 0
    token = tlist.token_next_match(idx, start_ttype, start_value)
    while token:
        tidx = tlist.token_index(token)
        end = _find_matching(tidx, tlist, start_ttype, start_value,
                             end_ttype, end_value)
        if end is None:
            idx = tidx + 1
        else:
            if include_semicolon:
                next_ = tlist.token_next(tlist.token_index(end))
                if next_ and next_.match(T.Punctuation, ';'):
                    end = next_
            group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
            _group_matching(group, start_ttype, start_value,
                            end_ttype, end_value, cls, include_semicolon)
            idx = tlist.token_index(group) + 1
        token = tlist.token_next_match(idx, start_ttype, start_value)

Source File: grouping.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def _group_left_right(tlist, ttype, value, cls,
                      check_right=lambda t: True,
                      check_left=lambda t: True,
                      include_semicolon=False):
    [_group_left_right(sgroup, ttype, value, cls, check_right, check_left,
                       include_semicolon) for sgroup in tlist.get_sublists()
     if not isinstance(sgroup, cls)]
    idx = 0
    token = tlist.token_next_match(idx, ttype, value)
    while token:
        right = tlist.token_next(tlist.token_index(token))
        left = tlist.token_prev(tlist.token_index(token))
        if right is None or not check_right(right):
            token = tlist.token_next_match(tlist.token_index(token) + 1,
                                           ttype, value)
        elif left is None or not check_left(left):
            token = tlist.token_next_match(tlist.token_index(token) + 1,
                                           ttype, value)
        else:
            if include_semicolon:
                sright = tlist.token_next_match(tlist.token_index(right),
                                                T.Punctuation, ';')
                if sright is not None:
                    # only overwrite "right" if a semicolon is actually
                    # present.
                    right = sright
            tokens = tlist.tokens_between(left, right)[1:]
            if not isinstance(left, cls):
                new = cls([left])
                new_idx = tlist.token_index(left)
                tlist.tokens.remove(left)
                tlist.tokens.insert(new_idx, new)
                left = new
            left.tokens.extend(tokens)
            for t in tokens:
                tlist.tokens.remove(t)
            token = tlist.token_next_match(tlist.token_index(left) + 1,
                                           ttype, value)

Source File: filter.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def process(self, stack, stream):
        "Process the stream"
        consume_ws = False
        splitlevel = 0
        stmt = None
        stmt_tokens = []

        # Run over all stream tokens
        for ttype, value in stream:
            # Yield token if we finished a statement and there's no whitespaces
            if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
                stmt.tokens = stmt_tokens
                yield stmt

                # Reset filter and prepare to process next statement
                self._reset()
                consume_ws = False
                splitlevel = 0
                stmt = None

            # Create a new statement if we are not currently in one of them
            if stmt is None:
                stmt = Statement()
                stmt_tokens = []

            # Change current split level (increase, decrease or remain equal)
            splitlevel += self._change_splitlevel(ttype, value)

            # Append the token to the current statement
            stmt_tokens.append(Token(ttype, value))

            # Check if we get the end of a statement
            if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
                consume_ws = True

        # Yield pending statement (if any)
        if stmt is not None:
            stmt.tokens = stmt_tokens
            yield stmt

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def _process_parenthesis(self, tlist):
        first = tlist.token_next(0)
        indented = False
        if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
            self.indent += 1
            tlist.tokens.insert(0, self.nl())
            indented = True
        num_offset = self._get_offset(
            tlist.token_next_match(0, T.Punctuation, '('))
        self.offset += num_offset
        self._process_default(tlist, stmts=not indented)
        if indented:
            self.indent -= 1
        self.offset -= num_offset

Source File: filters.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def _stripws_identifierlist(self, tlist):
        # Removes newlines before commas, see issue140
        last_nl = None
        for token in tlist.tokens[:]:
            if (token.ttype is T.Punctuation
                and token.value == ','
                and last_nl is not None):
                tlist.tokens.remove(last_nl)
            if token.is_whitespace():
                last_nl = token
            else:
                last_nl = None
        return self._stripws_default(tlist)

Source File: sql_parse.py From incubator-superset with Apache License 2.0

5 votes

def _get_table(tlist: TokenList) -> Optional[Table]:
        """
        Return the table if valid, i.e., conforms to the [[catalog.]schema.]table
        construct.

        :param tlist: The SQL tokens
        :returns: The table if the name conforms
        """

        # Strip the alias if present.
        idx = len(tlist.tokens)

        if tlist.has_alias():
            ws_idx, _ = tlist.token_next_by(t=Whitespace)

            if ws_idx != -1:
                idx = ws_idx

        tokens = tlist.tokens[:idx]

        if (
            len(tokens) in (1, 3, 5)
            and all(imt(token, t=[Name, String]) for token in tokens[::2])
            and all(imt(token, m=(Punctuation, ".")) for token in tokens[1::2])
        ):
            return Table(*[remove_quotes(token.value) for token in tokens[::-2]])

        return None

Source File: sql_metadata.py From sql-metadata with MIT License

5 votes

def get_query_limit_and_offset(query: str) -> Optional[Tuple[int, int]]:
    """
    :type query str
    :rtype: (int, int)
    """
    limit = None
    offset = None
    last_keyword = None
    last_token = None

    # print(query)
    for token in get_query_tokens(query):
        # print([token, token.ttype, last_keyword])

        if token.is_keyword and token.value.upper() in ['LIMIT', 'OFFSET']:
            last_keyword = token.value.upper()
        elif token.ttype is Number.Integer:
            # print([token, last_keyword, last_token_was_integer])
            if last_keyword == 'LIMIT':
                # LIMIT <limit>
                limit = int(token.value)
                last_keyword = None
            elif last_keyword == 'OFFSET':
                # OFFSET <offset>
                offset = int(token.value)
                last_keyword = None
            elif last_token and last_token.ttype is Punctuation:
                # LIMIT <offset>,<limit>
                offset = limit
                limit = int(token.value)

        last_token = token

    if limit is None:
        return None

    return limit, offset or 0


# SQL queries normalization (#16)

Python sqlparse.tokens.Punctuation() Examples