Python Examples of sqlparse.sql.IdentifierList

Source File: grouping.py From SublimeText-SQLTools with GNU General Public License v3.0

7 votes

def group_identifier_list(tlist):
    m_role = T.Keyword, ('null', 'role')
    sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
              sql.IdentifierList, sql.Operation)
    ttypes = (T_NUMERICAL + T_STRING + T_NAME +
              (T.Keyword, T.Comment, T.Wildcard))

    def match(token):
        return token.match(T.Punctuation, ',')

    def valid(token):
        return imt(token, i=sqlcls, m=m_role, t=ttypes)

    def post(tlist, pidx, tidx, nidx):
        return pidx, nidx

    valid_prev = valid_next = valid
    _group(tlist, sql.IdentifierList, match,
           valid_prev, valid_next, post, extend=True)

Source File: ParseUtils.py From SublimeText-SQLTools with GNU General Public License v3.0

6 votes

def _extract_table_identifiers(token_stream):
    for item in token_stream:
        if isinstance(item, IdentifierList):
            for ident in item.get_identifiers():
                try:
                    alias = ident.get_alias()
                    schema_name = ident.get_parent_name()
                    real_name = ident.get_real_name()
                except AttributeError:
                    continue
                if real_name:
                    yield Reference(schema_name, real_name,
                                    alias, _identifier_is_function(ident))
        elif isinstance(item, Identifier):
            yield Reference(item.get_parent_name(), item.get_real_name(),
                            item.get_alias(), _identifier_is_function(item))
        elif isinstance(item, Function):
            yield Reference(item.get_parent_name(), item.get_real_name(),
                            item.get_alias(), _identifier_is_function(item))

Source File: sql_parse.py From incubator-superset with Apache License 2.0

6 votes

def _extract_limit_from_query(statement: TokenList) -> Optional[int]:
    """
    Extract limit clause from SQL statement.

    :param statement: SQL statement
    :return: Limit extracted from query, None if no limit present in statement
    """
    idx, _ = statement.token_next_by(m=(Keyword, "LIMIT"))
    if idx is not None:
        _, token = statement.token_next(idx=idx)
        if token:
            if isinstance(token, IdentifierList):
                # In case of "LIMIT <offset>, <limit>", find comma and extract
                # first succeeding non-whitespace token
                idx, _ = token.token_next_by(m=(sqlparse.tokens.Punctuation, ","))
                _, token = token.token_next(idx=idx)
            if token and token.ttype == sqlparse.tokens.Literal.Number.Integer:
                return int(token.value)
    return None

Source File: grouping.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

6 votes

def init_group_token(self, token):
        tokens = token.get_target_tokens()
        with_token = tokens[0]
        start_prev = with_token
        end = None
        for tkn in tokens[1:]:
            if tu.is_comma(tkn):
                start = tu.token_next_enable(token, start_prev)
                token.group_tokens(sql.Identifier, token.tokens_between(start, end))
                start_prev = tkn
                continue
            end = tkn

        start = tu.token_next_enable(token, with_token)
        end = tu.token_prev_enable(token)
        token.group_tokens(sql.IdentifierList, token.tokens_between(start, end))

Source File: test_grouping.py From codenn with MIT License

5 votes

def test_identifier_consumes_ordering():  # issue89
    p = sqlparse.parse('select * from foo order by c1 desc, c2, c3')[0]
    assert isinstance(p.tokens[-1], sql.IdentifierList)
    ids = list(p.tokens[-1].get_identifiers())
    assert len(ids) == 3
    assert ids[0].get_name() == 'c1'
    assert ids[0].get_ordering() == 'DESC'
    assert ids[1].get_name() == 'c2'
    assert ids[1].get_ordering() is None

Source File: sql_tokens.py From djongo with GNU Affero General Public License v3.0

5 votes

def __iter__(self):
        tok = self._token[1:-1][0]
        if isinstance(tok, IdentifierList):
            for aid in tok.get_identifiers():
                yield self.get_value(aid)

        else:
            yield self.get_value(tok)

Source File: sql_tokens.py From djongo with GNU Affero General Public License v3.0

5 votes

def tokens2sql(token: Token,
                   query: 'query_module.BaseQuery'
                   ) -> Iterator[all_token_types]:
        from .functions import SQLFunc
        if isinstance(token, Identifier):
            # Bug fix for sql parse
            if isinstance(token[0], Parenthesis):
                try:
                    int(token[0][1].value)
                except ValueError:
                    raise
                else:
                    yield SQLConstIdentifier(token, query)
            elif isinstance(token[0], Function):
                yield SQLFunc.token2sql(token, query)
            else:
                yield SQLIdentifier(token, query)
        elif isinstance(token, Function):
            yield SQLFunc.token2sql(token, query)
        elif isinstance(token, Comparison):
            yield SQLComparison(token, query)
        elif isinstance(token, IdentifierList):
            for tok in token.get_identifiers():
                yield from SQLToken.tokens2sql(tok, query)
        elif isinstance(token, Parenthesis):
            yield SQLPlaceholder(token, query)
        else:
            raise SQLDecodeError(f'Unsupported: {token.value}')

Source File: extract_tables.py From Archery with Apache License 2.0

5 votes

def extract_from_part(parsed, stop_at_punctuation=True):
    tbl_prefix_seen = False
    for item in parsed.tokens:
        if tbl_prefix_seen:
            if is_subselect(item):
                for x in extract_from_part(item, stop_at_punctuation):
                    yield x
            elif stop_at_punctuation and item.ttype is Punctuation:
                raise StopIteration
            # An incomplete nested select won't be recognized correctly as a
            # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes
            # the second FROM to trigger this elif condition resulting in a
            # StopIteration. So we need to ignore the keyword if the keyword
            # FROM.
            # Also 'SELECT * FROM abc JOIN def' will trigger this elif
            # condition. So we need to ignore the keyword JOIN and its variants
            # INNER JOIN, FULL OUTER JOIN, etc.
            elif item.ttype is Keyword and (not item.value.upper() == "FROM") and (
                not item.value.upper().endswith("JOIN")
            ):
                tbl_prefix_seen = False
            else:
                yield item
        elif item.ttype is Keyword or item.ttype is Keyword.DML:
            item_val = item.value.upper()
            if (
                item_val in ("COPY", "FROM", "INTO", "UPDATE", "TABLE")
                or item_val.endswith("JOIN")
            ):
                tbl_prefix_seen = True
        # 'SELECT a, FROM abc' will detect FROM as part of the column list.
        # So this check here is necessary.
        elif isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                if identifier.ttype is Keyword and identifier.value.upper() == "FROM":
                    tbl_prefix_seen = True
                    break

Source File: sql_parse.py From incubator-superset with Apache License 2.0

5 votes

def _is_identifier(token: Token) -> bool:
        return isinstance(token, (IdentifierList, Identifier))

Source File: tables.py From mssql-cli with BSD 3-Clause "New" or "Revised" License

5 votes

def extract_from_part(parsed, stop_at_punctuation=True):
    tbl_prefix_seen = False
    for item in parsed.tokens:
        if tbl_prefix_seen:
            if is_subselect(item):
                for x in extract_from_part(item, stop_at_punctuation):
                    yield x
            elif stop_at_punctuation and item.ttype is Punctuation:
                # An incomplete nested select won't be recognized correctly as a
                # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes
                # the second FROM to trigger this elif condition resulting in a
                # StopIteration. So we need to ignore the keyword if the keyword
                # FROM.
                # Also 'SELECT * FROM abc JOIN def' will trigger this elif
                # condition. So we need to ignore the keyword JOIN and its variants
                # INNER JOIN, FULL OUTER JOIN, etc.
                return
            elif item.ttype is Keyword and (
                    not item.value.upper() == 'FROM') and \
                    (not item.value.upper().endswith('JOIN')):
                tbl_prefix_seen = False
            else:
                yield item
        elif item.ttype is Keyword or item.ttype is Keyword.DML:
            item_val = item.value.upper()
            if (item_val in ('COPY', 'FROM', 'INTO', 'UPDATE', 'TABLE') or
                    item_val.endswith('JOIN')):
                tbl_prefix_seen = True
        # 'SELECT a, FROM abc' will detect FROM as part of the column list.
        # So this check here is necessary.
        elif isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                if (identifier.ttype is Keyword and
                        identifier.value.upper() == 'FROM'):
                    tbl_prefix_seen = True
                    break

Source File: grouping.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def _adjust_identifier(self, gptoken, tlist):
        if not tu.is_identifier(tlist):
            return

        def token_next_enable(token, func):
            nexts = tu.flatten_tokens_next(self.curr_stmt, token)
            for tkn in nexts:
                if tu.is_enable(tkn):
                    return tu.token_top_matching(tkn, token, func)

        next_comma = token_next_enable(gptoken, tu.is_comma)
        if not next_comma or not tu.is_identifier_list(next_comma.parent):
            return

        if tu.is_identifier_list(tlist.parent):
            if tlist.parent == next_comma.parent:
                return
            identifier_list = tlist.parent
            for tkn in next_comma.parent.tokens[:]:
                _move_append_token(identifier_list, tkn)
        else:
            identifier_list = tlist.parent.group_tokens(sql.IdentifierList, [tlist])
            for tkn in next_comma.parent.tokens[:]:
                _move_append_token(identifier_list, tkn)

        self._process(identifier_list)

Source File: tokenutils.py From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License

5 votes

def is_identifier_list(token):
    """
        IdentifierList判定
    """
    return isinstance(token, sql.IdentifierList)

Source File: parseutils.py From athenacli with BSD 3-Clause "New" or "Revised" License

5 votes

def extract_table_identifiers(token_stream):
    """yields tuples of (schema_name, table_name, table_alias)"""

    for item in token_stream:
        if isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                # Sometimes Keywords (such as FROM ) are classified as
                # identifiers which don't have the get_real_name() method.
                try:
                    schema_name = identifier.get_parent_name()
                    real_name = identifier.get_real_name()
                except AttributeError:
                    continue
                if real_name:
                    yield (schema_name, real_name, identifier.get_alias())
        elif isinstance(item, Identifier):
            real_name = item.get_real_name()
            schema_name = item.get_parent_name()

            if real_name:
                yield (schema_name, real_name, item.get_alias())
            else:
                name = item.get_name()
                yield (None, name, item.get_alias() or name)
        elif isinstance(item, Function):
            yield (None, item.get_name(), item.get_name())

# extract_tables is inspired from examples in the sqlparse lib.

Source File: parseutils.py From athenacli with BSD 3-Clause "New" or "Revised" License

5 votes

def extract_from_part(parsed, stop_at_punctuation=True):
    tbl_prefix_seen = False
    for item in parsed.tokens:
        if tbl_prefix_seen:
            if is_subselect(item):
                for x in extract_from_part(item, stop_at_punctuation):
                    yield x
            elif stop_at_punctuation and item.ttype is Punctuation:
                return
            # An incomplete nested select won't be recognized correctly as a
            # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes
            # the second FROM to trigger this elif condition resulting in a
            # StopIteration. So we need to ignore the keyword if the keyword
            # FROM.
            # Also 'SELECT * FROM abc JOIN def' will trigger this elif
            # condition. So we need to ignore the keyword JOIN and its variants
            # INNER JOIN, FULL OUTER JOIN, etc.
            elif item.ttype is Keyword and (
                    not item.value.upper() == 'FROM') and (
                    not item.value.upper().endswith('JOIN')):
                return
            else:
                yield item
        elif ((item.ttype is Keyword or item.ttype is Keyword.DML) and
                item.value.upper() in ('COPY', 'FROM', 'INTO', 'UPDATE', 'TABLE', 'JOIN',)):
            tbl_prefix_seen = True
        # 'SELECT a, FROM abc' will detect FROM as part of the column list.
        # So this check here is necessary.
        elif isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                if (identifier.ttype is Keyword and
                        identifier.value.upper() == 'FROM'):
                    tbl_prefix_seen = True
                    break

Source File: test_grouping.py From codenn with MIT License

5 votes

def test_idlist_function(self):  # see issue10 too
        p = sqlparse.parse('foo(1) x, bar')[0]
        self.assert_(isinstance(p.tokens[0], sql.IdentifierList))

Source File: test_grouping.py From codenn with MIT License

5 votes

def test_identifier_list_other(self):  # issue2
        p = sqlparse.parse("select *, null, 1, 'foo', bar from mytable, x")[0]
        self.assert_(isinstance(p.tokens[2], sql.IdentifierList))
        l = p.tokens[2]
        self.assertEqual(len(l.tokens), 13)

Source File: test_grouping.py From codenn with MIT License

5 votes

def test_identifier_list_case(self):
        p = sqlparse.parse('a, case when 1 then 2 else 3 end as b, c')[0]
        self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
        p = sqlparse.parse('(a, case when 1 then 2 else 3 end as b, c)')[0]
        self.assert_(isinstance(p.tokens[0].tokens[1], sql.IdentifierList))

Source File: parseutils.py From litecli with BSD 3-Clause "New" or "Revised" License

5 votes

def extract_from_part(parsed, stop_at_punctuation=True):
    tbl_prefix_seen = False
    for item in parsed.tokens:
        if tbl_prefix_seen:
            if is_subselect(item):
                for x in extract_from_part(item, stop_at_punctuation):
                    yield x
            elif stop_at_punctuation and item.ttype is Punctuation:
                return
            # An incomplete nested select won't be recognized correctly as a
            # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes
            # the second FROM to trigger this elif condition resulting in a
            # `return`. So we need to ignore the keyword if the keyword
            # FROM.
            # Also 'SELECT * FROM abc JOIN def' will trigger this elif
            # condition. So we need to ignore the keyword JOIN and its variants
            # INNER JOIN, FULL OUTER JOIN, etc.
            elif (
                item.ttype is Keyword
                and (not item.value.upper() == "FROM")
                and (not item.value.upper().endswith("JOIN"))
            ):
                return
            else:
                yield item
        elif (
            item.ttype is Keyword or item.ttype is Keyword.DML
        ) and item.value.upper() in ("COPY", "FROM", "INTO", "UPDATE", "TABLE", "JOIN"):
            tbl_prefix_seen = True
        # 'SELECT a, FROM abc' will detect FROM as part of the column list.
        # So this check here is necessary.
        elif isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                if identifier.ttype is Keyword and identifier.value.upper() == "FROM":
                    tbl_prefix_seen = True
                    break

Source File: test_grouping.py From codenn with MIT License

5 votes

def test_identifier_wildcard(self):
        p = sqlparse.parse('a.*, b.id')[0]
        self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
        self.assert_(isinstance(p.tokens[0].tokens[0], sql.Identifier))
        self.assert_(isinstance(p.tokens[0].tokens[-1], sql.Identifier))

Source File: test_grouping.py From codenn with MIT License

5 votes

def test_identifiers(self):
        s = 'select foo.bar from "myscheme"."table" where fail. order'
        parsed = sqlparse.parse(s)[0]
        self.ndiffAssertEqual(s, unicode(parsed))
        self.assert_(isinstance(parsed.tokens[2], sql.Identifier))
        self.assert_(isinstance(parsed.tokens[6], sql.Identifier))
        self.assert_(isinstance(parsed.tokens[8], sql.Where))
        s = 'select * from foo where foo.id = 1'
        parsed = sqlparse.parse(s)[0]
        self.ndiffAssertEqual(s, unicode(parsed))
        self.assert_(isinstance(parsed.tokens[-1].tokens[-1].tokens[0],
                                sql.Identifier))
        s = 'select * from (select "foo"."id" from foo)'
        parsed = sqlparse.parse(s)[0]
        self.ndiffAssertEqual(s, unicode(parsed))
        self.assert_(isinstance(parsed.tokens[-1].tokens[3], sql.Identifier))

        s = "INSERT INTO `test` VALUES('foo', 'bar');"
        parsed = sqlparse.parse(s)[0]
        types = [l.ttype for l in parsed.tokens if not l.is_whitespace()]
        self.assertEquals(types, [T.DML, T.Keyword, None,
                                  T.Keyword, None, T.Punctuation])

        s = "select 1.0*(a+b) as col, sum(c)/sum(d) from myschema.mytable"
        parsed = sqlparse.parse(s)[0]
        self.assertEqual(len(parsed.tokens), 7)
        self.assert_(isinstance(parsed.tokens[2], sql.IdentifierList))
        self.assertEqual(len(parsed.tokens[2].tokens), 4)
        identifiers = list(parsed.tokens[2].get_identifiers())
        self.assertEqual(len(identifiers), 2)
        self.assertEquals(identifiers[0].get_alias(), u"col")

Source File: test_regressions.py From codenn with MIT License

5 votes

def test_issue40(self):
        # make sure identifier lists in subselects are grouped
        p = sqlparse.parse(('SELECT id, name FROM '
                            '(SELECT id, name FROM bar) as foo'))[0]
        self.assertEqual(len(p.tokens), 7)
        self.assertEqual(p.tokens[2].__class__, sql.IdentifierList)
        self.assertEqual(p.tokens[-1].__class__, sql.Identifier)
        self.assertEqual(p.tokens[-1].get_name(), u'foo')
        sp = p.tokens[-1].tokens[0]
        self.assertEqual(sp.tokens[3].__class__, sql.IdentifierList)
        # make sure that formatting works as expected
        self.ndiffAssertEqual(
            sqlparse.format(('SELECT id, name FROM '
                             '(SELECT id, name FROM bar)'),
                            reindent=True),
            ('SELECT id,\n'
             '       name\n'
             'FROM\n'
             '  (SELECT id,\n'
             '          name\n'
             '   FROM bar)'))
        self.ndiffAssertEqual(
            sqlparse.format(('SELECT id, name FROM '
                             '(SELECT id, name FROM bar) as foo'),
                            reindent=True),
            ('SELECT id,\n'
             '       name\n'
             'FROM\n'
             '  (SELECT id,\n'
             '          name\n'
             '   FROM bar) as foo'))

Source File: test_grouping.py From codenn with MIT License

5 votes

def test_identifier_list_with_inline_comments(self):  # issue163
        p = sqlparse.parse('foo /* a comment */, bar')[0]
        self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
        self.assert_(isinstance(p.tokens[0].tokens[0], sql.Identifier))
        self.assert_(isinstance(p.tokens[0].tokens[3], sql.Identifier))

Source File: extract_table_names.py From codenn with MIT License

5 votes

def extract_table_identifiers(token_stream):
    for item in token_stream:
        if isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                yield identifier.get_name()
        elif isinstance(item, Identifier):
            yield item.get_name()
        # It's a bug to check for Keyword here, but in the example
        # above some tables names are identified as keywords...
        elif item.ttype is Keyword:
            yield item.value

Source File: ParseUtils.py From SublimeText-SQLTools with GNU General Public License v3.0

5 votes

def _extract_from_part(parsed):
    tbl_prefix_seen = False
    for item in parsed.tokens:
        if item.is_group:
            for x in _extract_from_part(item):
                yield x
        if tbl_prefix_seen:
            if _is_subselect(item):
                for x in _extract_from_part(item):
                    yield x
            # An incomplete nested select won't be recognized correctly as a
            # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes
            # the second FROM to trigger this elif condition resulting in a
            # StopIteration. So we need to ignore the keyword if the keyword
            # FROM.
            # Also 'SELECT * FROM abc JOIN def' will trigger this elif
            # condition. So we need to ignore the keyword JOIN and its variants
            # INNER JOIN, FULL OUTER JOIN, etc.
            elif item.ttype is Keyword and (
                    not item.value.upper() == 'FROM') and (
                    not item.value.upper().endswith('JOIN')):
                tbl_prefix_seen = False
            else:
                yield item
        elif item.ttype is Keyword or item.ttype is Keyword.DML:
            item_val = item.value.upper()
            if (item_val in ('COPY', 'FROM', 'INTO', 'UPDATE', 'TABLE') or
                    item_val.endswith('JOIN')):
                tbl_prefix_seen = True
        # 'SELECT a, FROM abc' will detect FROM as part of the column list.
        # So this check here is necessary.
        elif isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                if (identifier.ttype is Keyword and
                        identifier.value.upper() == 'FROM'):
                    tbl_prefix_seen = True
                    break

Source File: parseutils.py From litecli with BSD 3-Clause "New" or "Revised" License

5 votes

def extract_table_identifiers(token_stream):
    """yields tuples of (schema_name, table_name, table_alias)"""

    for item in token_stream:
        if isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                # Sometimes Keywords (such as FROM ) are classified as
                # identifiers which don't have the get_real_name() method.
                try:
                    schema_name = identifier.get_parent_name()
                    real_name = identifier.get_real_name()
                except AttributeError:
                    continue
                if real_name:
                    yield (schema_name, real_name, identifier.get_alias())
        elif isinstance(item, Identifier):
            real_name = item.get_real_name()
            schema_name = item.get_parent_name()

            if real_name:
                yield (schema_name, real_name, item.get_alias())
            else:
                name = item.get_name()
                yield (None, name, item.get_alias() or name)
        elif isinstance(item, Function):
            yield (None, item.get_name(), item.get_name())


# extract_tables is inspired from examples in the sqlparse lib.

Source File: tables.py From mssql-cli with BSD 3-Clause "New" or "Revised" License

4 votes

def extract_table_identifiers(token_stream, allow_functions=True):
    """yields tuples of TableReference namedtuples"""

    # We need to do some massaging of the names because postgres is case-
    # insensitive and '"Foo"' is not the same table as 'Foo' (while 'foo' is)
    def parse_identifier(item):
        name = item.get_real_name()
        schema_name = item.get_parent_name()
        alias = item.get_alias()
        if not name:
            schema_name = None
            name = item.get_name()
            alias = alias or name
        schema_quoted = schema_name and item.value[0] == '"'
        if schema_name and not schema_quoted:
            schema_name = schema_name.lower()
        quote_count = item.value.count('"')
        name_quoted = quote_count > 2 or (quote_count and not schema_quoted)
        alias_quoted = alias and item.value[-1] == '"'
        if alias_quoted or name_quoted and not alias and name.islower():
            alias = '"' + (alias or name) + '"'
        if name and not name_quoted and not name.islower():
            if not alias:
                alias = name
            name = name.lower()
        return schema_name, name, alias

    for item in token_stream:
        if isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                # Sometimes Keywords (such as FROM ) are classified as
                # identifiers which don't have the get_real_name() method.
                try:
                    schema_name = identifier.get_parent_name()
                    real_name = identifier.get_real_name()
                    is_function = (allow_functions and
                                   _identifier_is_function(identifier))
                except AttributeError:
                    continue
                if real_name:
                    yield TableReference(schema_name, real_name,
                                         identifier.get_alias(), is_function)
        elif isinstance(item, Identifier):
            schema_name, real_name, alias = parse_identifier(item)
            is_function = allow_functions and _identifier_is_function(item)

            yield TableReference(schema_name, real_name, alias, is_function)
        elif isinstance(item, Function):
            schema_name, real_name, alias = parse_identifier(item)
            yield TableReference(None, real_name, alias, allow_functions)


# extract_tables is inspired from examples in the sqlparse lib.

Source File: tables.py From pgcli with BSD 3-Clause "New" or "Revised" License

4 votes

def extract_table_identifiers(token_stream, allow_functions=True):
    """yields tuples of TableReference namedtuples"""

    # We need to do some massaging of the names because postgres is case-
    # insensitive and '"Foo"' is not the same table as 'Foo' (while 'foo' is)
    def parse_identifier(item):
        name = item.get_real_name()
        schema_name = item.get_parent_name()
        alias = item.get_alias()
        if not name:
            schema_name = None
            name = item.get_name()
            alias = alias or name
        schema_quoted = schema_name and item.value[0] == '"'
        if schema_name and not schema_quoted:
            schema_name = schema_name.lower()
        quote_count = item.value.count('"')
        name_quoted = quote_count > 2 or (quote_count and not schema_quoted)
        alias_quoted = alias and item.value[-1] == '"'
        if alias_quoted or name_quoted and not alias and name.islower():
            alias = '"' + (alias or name) + '"'
        if name and not name_quoted and not name.islower():
            if not alias:
                alias = name
            name = name.lower()
        return schema_name, name, alias

    try:
        for item in token_stream:
            if isinstance(item, IdentifierList):
                for identifier in item.get_identifiers():
                    # Sometimes Keywords (such as FROM ) are classified as
                    # identifiers which don't have the get_real_name() method.
                    try:
                        schema_name = identifier.get_parent_name()
                        real_name = identifier.get_real_name()
                        is_function = allow_functions and _identifier_is_function(
                            identifier
                        )
                    except AttributeError:
                        continue
                    if real_name:
                        yield TableReference(
                            schema_name, real_name, identifier.get_alias(), is_function
                        )
            elif isinstance(item, Identifier):
                schema_name, real_name, alias = parse_identifier(item)
                is_function = allow_functions and _identifier_is_function(item)

                yield TableReference(schema_name, real_name, alias, is_function)
            elif isinstance(item, Function):
                schema_name, real_name, alias = parse_identifier(item)
                yield TableReference(None, real_name, alias, allow_functions)
    except StopIteration:
        return


# extract_tables is inspired from examples in the sqlparse lib.

Source File: operators.py From djongo with GNU Affero General Public License v3.0

4 votes

def _token2op(self,
                  tok: Token,
                  statement: SQLStatement) -> '_Op':
        op = None
        kw = {'statement': statement, 'query': self.query}
        if tok.match(tokens.Keyword, 'AND'):
            op = AndOp(**kw)

        elif tok.match(tokens.Keyword, 'OR'):
            op = OrOp(**kw)

        elif tok.match(tokens.Keyword, 'IN'):
            op = InOp(**kw)

        elif tok.match(tokens.Keyword, 'NOT'):
            if statement.next_token.match(tokens.Keyword, 'IN'):
                op = NotInOp(**kw)
                statement.skip(1)
            else:
                op = NotOp(**kw)

        elif tok.match(tokens.Keyword, 'LIKE'):
            op = LikeOp(**kw)

        elif tok.match(tokens.Keyword, 'iLIKE'):
            op = iLikeOp(**kw)

        elif tok.match(tokens.Keyword, 'BETWEEN'):
            op = BetweenOp(**kw)
            statement.skip(3)

        elif tok.match(tokens.Keyword, 'IS'):
            op = IsOp(**kw)

        elif isinstance(tok, Comparison):
            op = CmpOp(tok, self.query)

        elif isinstance(tok, Parenthesis):
            if (tok[1].match(tokens.Name.Placeholder, '.*', regex=True)
                    or tok[1].match(tokens.Keyword, 'Null')
                    or isinstance(tok[1], IdentifierList)
                    or tok[1].ttype == tokens.DML
            ):
                pass
            else:
                op = ParenthesisOp(SQLStatement(tok), self.query)

        elif tok.match(tokens.Punctuation, (')', '(')):
            pass

        elif isinstance(tok, Identifier):
            pass
        else:
            raise SQLDecodeError

        return op

Source File: extract_tables.py From Archery with Apache License 2.0

4 votes

def extract_table_identifiers(token_stream, allow_functions=True):
    """yields tuples of TableReference namedtuples"""

    # We need to do some massaging of the names because postgres is case-
    # insensitive and '"Foo"' is not the same table as 'Foo' (while 'foo' is)
    def parse_identifier(item):
        name = item.get_real_name()
        schema_name = item.get_parent_name()
        alias = item.get_alias()
        if not name:
            schema_name = None
            name = item.get_name()
            alias = alias or name
        schema_quoted = schema_name and item.value[0] == '"'
        if schema_name and not schema_quoted:
            schema_name = schema_name.lower()
        quote_count = item.value.count('"')
        name_quoted = quote_count > 2 or (quote_count and not schema_quoted)
        alias_quoted = alias and item.value[-1] == '"'
        if alias_quoted or name_quoted and not alias and name.islower():
            alias = '"' + (alias or name) + '"'
        if name and not name_quoted and not name.islower():
            if not alias:
                alias = name
            name = name.lower()
        return schema_name, name, alias

    for item in token_stream:
        if isinstance(item, IdentifierList):
            for identifier in item.get_identifiers():
                # Sometimes Keywords (such as FROM ) are classified as
                # identifiers which don't have the get_real_name() method.
                try:
                    schema_name = identifier.get_parent_name()
                    real_name = identifier.get_real_name()
                    is_function = (
                        allow_functions and _identifier_is_function(identifier)
                    )
                except AttributeError:
                    continue
                if real_name:
                    yield TableReference(
                        schema_name, real_name, identifier.get_alias(), is_function
                    )
        elif isinstance(item, Identifier):
            schema_name, real_name, alias = parse_identifier(item)
            is_function = allow_functions and _identifier_is_function(item)

            yield TableReference(schema_name, real_name, alias, is_function)
        elif isinstance(item, Function):
            schema_name, real_name, alias = parse_identifier(item)
            yield TableReference(None, real_name, alias, allow_functions)


# extract_tables is inspired from examples in the sqlparse lib.

Source File: sql_parse.py From incubator-superset with Apache License 2.0

4 votes

def _extract_from_token(  # pylint: disable=too-many-branches
        self, token: Token
    ) -> None:
        """
        <Identifier> store a list of subtokens and <IdentifierList> store lists of
        subtoken list.

        It extracts <IdentifierList> and <Identifier> from :param token: and loops
        through all subtokens recursively. It finds table_name_preceding_token and
        passes <IdentifierList> and <Identifier> to self._process_tokenlist to populate
        self._tables.

        :param token: instance of Token or child class, e.g. TokenList, to be processed
        """
        if not hasattr(token, "tokens"):
            return

        table_name_preceding_token = False

        for item in token.tokens:
            if item.is_group and not self._is_identifier(item):
                self._extract_from_token(item)

            if item.ttype in Keyword and (
                item.normalized in PRECEDES_TABLE_NAME
                or item.normalized.endswith(" JOIN")
            ):
                table_name_preceding_token = True
                continue

            if item.ttype in Keyword:
                table_name_preceding_token = False
                continue

            if table_name_preceding_token:
                if isinstance(item, Identifier):
                    self._process_tokenlist(item)
                elif isinstance(item, IdentifierList):
                    for token2 in item.get_identifiers():
                        if isinstance(token2, TokenList):
                            self._process_tokenlist(token2)
            elif isinstance(item, IdentifierList):
                if any(not self._is_identifier(token2) for token2 in item.tokens):
                    self._extract_from_token(item)

Python sqlparse.sql.IdentifierList() Examples