Python Examples of pyparsing.Combine

Source File: macro_expander.py From rekall with GNU General Public License v2.0

6 votes

def expression(self):
        expression = pyparsing.Forward()

        # (1 + (2 + 3))
        nested_expression = pyparsing.nestedExpr(
            "(", ")", expression).setParseAction(self._combine_lists)

        # FOO(2 , 3)
        function_call = (
            _TOKEN().setResultsName("function")
            + _OPEN_PARENTHESIS()
            + pyparsing.delimitedList(
                pyparsing.Combine(expression, adjacent=False, joinString=" "),
                delim=",").setResultsName("func_args")
            + _CLOSE_PARENTHESIS()
        )

        expression << pyparsing.OneOrMore(
            function_call.setParseAction(self._is_known_function)
            | pyparsing.Group(nested_expression)
            | _TOKEN()
            | _NOT_TOKEN()
        )

        return pyparsing.Combine(expression, adjacent=False, joinString=" ")

Source File: yara_support.py From rekall with GNU General Public License v2.0

6 votes

def anything_beetween(opener_and_closer):
    """Builds a (pyparsing) parser for the content inside delimiters.

    Args:
    opener_and_closer: a string containing two elements: opener and closer

    Returns:
      A (pyparsing) parser for the content inside delimiters.
    """
    opener = pyparsing.Literal(opener_and_closer[0])
    closer = pyparsing.Literal(opener_and_closer[1])
    char_removal_mapping = dict.fromkeys(list(map(ord, opener_and_closer)))
    other_chars = str(string.printable).translate(char_removal_mapping)
    word_without_delimiters = pyparsing.Word(other_chars).setName(
        "other_chars")
    anything = pyparsing.Forward()
    delimited_block = opener + anything + closer
    # pylint: disable=expression-not-assigned
    anything << pyparsing.ZeroOrMore(
        word_without_delimiters.setName("word_without_delimiters")
        | delimited_block.setName("delimited_block")
    )

    # Combine all the parts into a single string.
    return pyparsing.Combine(anything)

Source File: ldap3mock.py From privacyidea with GNU Affero General Public License v3.0

6 votes

def _parse_filter():
        op = pyparsing.oneOf('! & |')
        lpar  = pyparsing.Literal('(').suppress()
        rpar  = pyparsing.Literal(')').suppress()

        k = pyparsing.Word(pyparsing.alphanums)
        # NOTE: We may need to expand on this list, but as this is not a real
        # LDAP server we should be OK.
        # Value to contain:
        #   numbers, upper/lower case letters, astrisk, at symbol, minus, full
        #   stop, backslash or a space
        v = pyparsing.Word(pyparsing.alphanums + "-*@.\\ äöü")
        rel = pyparsing.oneOf("= ~= >= <=")

        expr = pyparsing.Forward()
        atom = pyparsing.Group(lpar + op + expr + rpar) \
                            | pyparsing.Combine(lpar + k + rel + v + rpar)
        expr << atom + pyparsing.ZeroOrMore( expr )

        return expr

Source File: expression_parser.py From rekall with GNU General Public License v2.0

5 votes

def _cast_transformer(self):
        """Removes obvious casts."""
        return pyparsing.Combine(
            pyparsing.Regex(r"\([^()]*\)").suppress()
            + (pyparsing.Word(pyparsing.alphanums + "_")
               | pyparsing.Literal("(")),
            adjacent=False)

Source File: expression_parser.py From rekall with GNU General Public License v2.0

5 votes

def XXXX_cast_expression(self):
        """A function returning a parser for parsing cast expressions.

        Args:
            expression: a pyparsing parser for parsing an expression to be cast.

        Returns:
            A (pyparsing) parser for parsing cast expressions.
        """
        word = pyparsing.Word(pyparsing.alphanums + '_*[]')
        nested = pyparsing.Forward().setName("nested")
        nested << pyparsing.Combine(
            pyparsing.Literal('(').suppress()
            + pyparsing.Combine(
                pyparsing.ZeroOrMore(self._integer() | word | nested))
            + pyparsing.Literal(')').suppress()
        )
        typeof_expression = (
            _OPEN_PARENTHESIS
            + pyparsing.Keyword('typeof')
            + nested("typeof_arg")
            + _CLOSE_PARENTHESIS
        )

        type_expression = (
            typeof_expression
            | nested("simple_type")
        )
        return (
            type_expression
            + ~(_PLUS | _MINUS)
            + self.expression("expression")
        ).setParseAction(self._create_cast_expression)

Source File: expression_parser.py From rekall with GNU General Public License v2.0

5 votes

def _hexadecimal_as_string(self):
        return pyparsing.Combine('0x' + pyparsing.Word(pyparsing.hexnums))

Source File: c_parser.py From rekall with GNU General Public License v2.0

5 votes

def _typeof_expression(self):
        keyword = (
            pyparsing.Keyword('typeof')
            | pyparsing.Keyword('__typeof__')
        )
        return pyparsing.Combine(
            keyword
            + pyparsing.Literal('(')
            + parsers.anything_beetween('()')
            + pyparsing.Literal(')')
        )

Source File: yara_support.py From rekall with GNU General Public License v2.0

5 votes

def statement():
    return pyparsing.Group(
        _IDENTIFIER.setResultsName("lhs") + _EQUALS +
        pyparsing.Combine(
            (anything_in_curly() |
             pyparsing.QuotedString("'", escChar="\\", unquoteResults=False) |
             pyparsing.QuotedString("\"", escChar="\\", unquoteResults=False) |
             _REGEX) +
            pyparsing.ZeroOrMore(_KEYWORD),
            adjacent=False,
            joinString=" ",
        ).setResultsName("rhs")
    )

Source File: parsing.py From online-ratings with MIT License

5 votes

def _quoted(expr):
    return Combine(Suppress(Literal("'")) + expr + Suppress(Literal("'")))

Source File: jsLiteralParse.py From ReadableWebProxy with BSD 3-Clause "New" or "Revised" License

4 votes

def jsParse(inStr):
	# This disaster is a context-free grammar parser for parsing javascript object literals.
	# It needs to be able to handle a lot of the definitional messes you find in in-the-wild
	# javascript object literals.
	# Unfortunately, Javascript is /way/ more tolerant then JSON when it comes to object literals
	# so we can't just parse objects using python's `json` library.

	TRUE = pp.Keyword("true").setParseAction( pp.replaceWith(True) )
	FALSE = pp.Keyword("false").setParseAction( pp.replaceWith(False) )
	NULL = pp.Keyword("null").setParseAction( pp.replaceWith(None) )

	jsonString = pp.quotedString.setParseAction( pp.removeQuotes )
	jsonNumber = pp.Combine( pp.Optional('-') + ( '0' | pp.Word('123456789',pp.nums) ) +
											pp.Optional( '.' + pp.Word(pp.nums) ) +
											pp.Optional( pp.Word('eE',exact=1) + pp.Word(pp.nums+'+-',pp.nums) ) )

	jsonObject   = pp.Forward()
	jsonValue    = pp.Forward()
	jsonDict     = pp.Forward()
	jsonArray    = pp.Forward()
	jsonElements = pp.Forward()

	rawText      = pp.Regex('[a-zA-Z_$][0-9a-zA-Z_$]*')

	commaToNull = pp.Word(',,', exact=1).setParseAction(pp.replaceWith(None))
	jsonElements << pp.ZeroOrMore(commaToNull) + pp.Optional(jsonObject) + pp.ZeroOrMore((pp.Suppress(',') + jsonObject) | commaToNull)

	jsonValue << ( jsonString | jsonNumber | TRUE | FALSE | NULL )


	dictMembers = pp.delimitedList( pp.Group( (rawText | jsonString) + pp.Suppress(':') + (jsonValue | jsonDict | jsonArray)))
	jsonDict << ( pp.Dict( pp.Suppress('{') + pp.Optional(dictMembers) + pp.ZeroOrMore(pp.Suppress(',')) + pp.Suppress('}') ) )
	jsonArray << ( pp.Group(pp.Suppress('[') + pp.Optional(jsonElements) + pp.Suppress(']') ) )
	jsonObject << (jsonValue | jsonDict | jsonArray)

	jsonComment = pp.cppStyleComment
	jsonObject.ignore( jsonComment )

	def convertDict(s, l, toks):

		return dict(toks.asList())

	def convertNumbers(s,l,toks):
		n = toks[0]
		try:
			return int(n)
		except ValueError:
			return float(n)

	jsonNumber.setParseAction(convertNumbers)
	jsonDict.setParseAction(convertDict)

	# jsonObject.setDebug()
	jsonObject.parseString('"inStr"').pop()
	return jsonObject.parseString(inStr).pop()


# Stolen from http://stackoverflow.com/a/12017573/268006

Source File: searchparser.py From phpsploit with GNU General Public License v3.0

4 votes

def parser(self):
        """
        This function returns a parser.
        The grammar should be like most full text search engines (Google, Tsearch, Lucene).
        
        Grammar:
        - a query consists of alphanumeric words, with an optional '*' wildcard
          at the end of a word
        - a sequence of words between quotes is a literal string
        - words can be used together by using operators ('and' or 'or')
        - words with operators can be grouped with parenthesis
        - a word or group of words can be preceded by a 'not' operator
        - the 'and' operator precedes an 'or' operator
        - if an operator is missing, use an 'and' operator
        """
        operatorOr = Forward()
        
        operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \
                            Group(Word(alphanums)).setResultsName('word')
        
        operatorQuotesContent = Forward()
        operatorQuotesContent << (
            (operatorWord + operatorQuotesContent) | operatorWord
        )
        
        operatorQuotes = Group(
            Suppress('"') + operatorQuotesContent + Suppress('"')
        ).setResultsName("quotes") | operatorWord
        
        operatorParenthesis = Group(
            (Suppress("(") + operatorOr + Suppress(")"))
        ).setResultsName("parenthesis") | operatorQuotes

        operatorNot = Forward()
        operatorNot << (Group(
            Suppress(Keyword("not", caseless=True)) + operatorNot
        ).setResultsName("not") | operatorParenthesis)

        operatorAnd = Forward()
        operatorAnd << (Group(
            operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd
        ).setResultsName("and") | Group(
            operatorNot + OneOrMore(~oneOf("and or") + operatorAnd)
        ).setResultsName("and") | operatorNot)
        
        operatorOr << (Group(
            operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
        ).setResultsName("or") | operatorAnd)

        return operatorOr.parseString

Source File: shparsers.py From stash with MIT License

4 votes

def __init__(self, debug=False):

        self.debug = debug
        self.logger = logging.getLogger('StaSh.Parser')

        escaped = pp.Combine("\\" + pp.Word(pp.printables + ' ', exact=1)).setParseAction(self.escaped_action)
        escaped_oct = pp.Combine("\\" + pp.Word('01234567', max=3)).setParseAction(self.escaped_oct_action)
        escaped_hex = pp.Combine("\\x" + pp.Word('0123456789abcdefABCDEF', exact=2)).setParseAction(self.escaped_hex_action)
        # Some special uq_word is needed, e.g. &3 for file descriptor of Pythonista interactive prompt
        uq_word = (pp.Literal('&3') | pp.Word(_WORD_CHARS)).setParseAction(self.uq_word_action)
        bq_word = pp.QuotedString('`', escChar='\\', unquoteResults=False).setParseAction(self.bq_word_action)
        dq_word = pp.QuotedString('"', escChar='\\', unquoteResults=False).setParseAction(self.dq_word_action)
        sq_word = pp.QuotedString("'", escChar='\\', unquoteResults=False).setParseAction(self.sq_word_action)
        # The ^ operator means longest match (as opposed to | which means first match)
        word = pp.Combine(pp.OneOrMore(escaped ^ escaped_oct ^ escaped_hex
                                       ^ uq_word ^ bq_word ^ dq_word ^ sq_word))\
            .setParseAction(self.word_action)

        identifier = pp.Word(pp.alphas + '_', pp.alphas + pp.nums + '_').setParseAction(self.identifier_action)
        assign_op = pp.Literal('=').setParseAction(self.assign_op_action)
        assignment_word = pp.Combine(identifier + assign_op + word).setParseAction(self.assignment_word_action)

        punctuator = pp.oneOf('; &').setParseAction(self.punctuator_action)
        pipe_op = pp.Literal('|').setParseAction(self.pipe_op_action)
        io_redirect_op = pp.oneOf('>> >').setParseAction(self.io_redirect_op_action)
        io_redirect = (io_redirect_op + word)('io_redirect')

        # The optional ' ' is a workaround to a possible bug in pyparsing.
        # The position of cmd_word after cmd_prefix is always reported 1 character ahead
        # of the correct value.
        cmd_prefix = (pp.OneOrMore(assignment_word) + pp.Optional(' '))('cmd_prefix')
        cmd_suffix = (pp.OneOrMore(word)('args') + pp.Optional(io_redirect)) ^ io_redirect

        modifier = pp.oneOf('! \\')
        cmd_word = (pp.Combine(pp.Optional(modifier) + word) ^ word)('cmd_word').setParseAction(self.cmd_word_action)

        simple_command = \
            (cmd_prefix + pp.Optional(cmd_word) + pp.Optional(cmd_suffix)) \
            | (cmd_word + pp.Optional(cmd_suffix))
        simple_command = pp.Group(simple_command)

        pipe_sequence = simple_command + pp.ZeroOrMore(pipe_op + simple_command)
        pipe_sequence = pp.Group(pipe_sequence)

        complete_command = pp.Optional(pipe_sequence + pp.ZeroOrMore(punctuator + pipe_sequence) + pp.Optional(punctuator))

        # --- special parser for inside double quotes
        uq_word_in_dq = pp.Word(pp.printables.replace('`', ' ').replace('\\', ''))\
            .setParseAction(self.uq_word_action)
        word_in_dq = pp.Combine(pp.OneOrMore(escaped ^ escaped_oct ^ escaped_hex ^ bq_word ^ uq_word_in_dq))
        # ---

        self.parser = complete_command.parseWithTabs().ignore(pp.pythonStyleComment)
        self.parser_within_dq = word_in_dq.leaveWhitespace()
        self.next_word_type = ShParser._NEXT_WORD_CMD
        self.tokens = []
        self.parts = []

Source File: evaluator.py From manila with Apache License 2.0

4 votes

def _def_parser():
    # Enabling packrat parsing greatly speeds up the parsing.
    pyparsing.ParserElement.enablePackrat()

    alphas = pyparsing.alphas
    Combine = pyparsing.Combine
    Forward = pyparsing.Forward
    nums = pyparsing.nums
    oneOf = pyparsing.oneOf
    opAssoc = pyparsing.opAssoc
    operatorPrecedence = pyparsing.operatorPrecedence
    Word = pyparsing.Word

    integer = Word(nums)
    real = Combine(Word(nums) + '.' + Word(nums))
    variable = Word(alphas + '_' + '.')
    number = real | integer
    expr = Forward()
    fn = Word(alphas + '_' + '.')
    operand = number | variable | fn

    signop = oneOf('+ -')
    addop = oneOf('+ -')
    multop = oneOf('* /')
    comparisonop = oneOf(' '.join(EvalComparisonOp.operations.keys()))
    ternaryop = ('?', ':')
    boolandop = oneOf('AND and &&')
    boolorop = oneOf('OR or ||')
    negateop = oneOf('NOT not !')

    operand.setParseAction(EvalConstant)
    expr = operatorPrecedence(operand, [
        (fn, 1, opAssoc.RIGHT, EvalFunction),
        ("^", 2, opAssoc.RIGHT, EvalPowerOp),
        (signop, 1, opAssoc.RIGHT, EvalSignOp),
        (multop, 2, opAssoc.LEFT, EvalMultOp),
        (addop, 2, opAssoc.LEFT, EvalAddOp),
        (negateop, 1, opAssoc.RIGHT, EvalNegateOp),
        (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp),
        (ternaryop, 3, opAssoc.LEFT, EvalTernaryOp),
        (boolandop, 2, opAssoc.LEFT, EvalBoolAndOp),
        (boolorop, 2, opAssoc.LEFT, EvalBoolOrOp),
        (',', 2, opAssoc.RIGHT, EvalCommaSeperator), ])

    return expr

Python pyparsing.Combine() Examples