Python pyparsing.Combine() Examples

The following are 13 code examples of pyparsing.Combine(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyparsing , or try the search function .
Example #1
Source File: macro_expander.py    From rekall with GNU General Public License v2.0 6 votes vote down vote up
def expression(self):
        expression = pyparsing.Forward()

        # (1 + (2 + 3))
        nested_expression = pyparsing.nestedExpr(
            "(", ")", expression).setParseAction(self._combine_lists)

        # FOO(2 , 3)
        function_call = (
            _TOKEN().setResultsName("function")
            + _OPEN_PARENTHESIS()
            + pyparsing.delimitedList(
                pyparsing.Combine(expression, adjacent=False, joinString=" "),
                delim=",").setResultsName("func_args")
            + _CLOSE_PARENTHESIS()
        )

        expression << pyparsing.OneOrMore(
            function_call.setParseAction(self._is_known_function)
            | pyparsing.Group(nested_expression)
            | _TOKEN()
            | _NOT_TOKEN()
        )

        return pyparsing.Combine(expression, adjacent=False, joinString=" ") 
Example #2
Source File: yara_support.py    From rekall with GNU General Public License v2.0 6 votes vote down vote up
def anything_beetween(opener_and_closer):
    """Builds a (pyparsing) parser for the content inside delimiters.

    Args:
    opener_and_closer: a string containing two elements: opener and closer

    Returns:
      A (pyparsing) parser for the content inside delimiters.
    """
    opener = pyparsing.Literal(opener_and_closer[0])
    closer = pyparsing.Literal(opener_and_closer[1])
    char_removal_mapping = dict.fromkeys(list(map(ord, opener_and_closer)))
    other_chars = str(string.printable).translate(char_removal_mapping)
    word_without_delimiters = pyparsing.Word(other_chars).setName(
        "other_chars")
    anything = pyparsing.Forward()
    delimited_block = opener + anything + closer
    # pylint: disable=expression-not-assigned
    anything << pyparsing.ZeroOrMore(
        word_without_delimiters.setName("word_without_delimiters")
        | delimited_block.setName("delimited_block")
    )

    # Combine all the parts into a single string.
    return pyparsing.Combine(anything) 
Example #3
Source File: ldap3mock.py    From privacyidea with GNU Affero General Public License v3.0 6 votes vote down vote up
def _parse_filter():
        op = pyparsing.oneOf('! & |')
        lpar  = pyparsing.Literal('(').suppress()
        rpar  = pyparsing.Literal(')').suppress()

        k = pyparsing.Word(pyparsing.alphanums)
        # NOTE: We may need to expand on this list, but as this is not a real
        # LDAP server we should be OK.
        # Value to contain:
        #   numbers, upper/lower case letters, astrisk, at symbol, minus, full
        #   stop, backslash or a space
        v = pyparsing.Word(pyparsing.alphanums + "-*@.\\ äöü")
        rel = pyparsing.oneOf("= ~= >= <=")

        expr = pyparsing.Forward()
        atom = pyparsing.Group(lpar + op + expr + rpar) \
                            | pyparsing.Combine(lpar + k + rel + v + rpar)
        expr << atom + pyparsing.ZeroOrMore( expr )

        return expr 
Example #4
Source File: expression_parser.py    From rekall with GNU General Public License v2.0 5 votes vote down vote up
def _cast_transformer(self):
        """Removes obvious casts."""
        return pyparsing.Combine(
            pyparsing.Regex(r"\([^()]*\)").suppress()
            + (pyparsing.Word(pyparsing.alphanums + "_")
               | pyparsing.Literal("(")),
            adjacent=False) 
Example #5
Source File: expression_parser.py    From rekall with GNU General Public License v2.0 5 votes vote down vote up
def XXXX_cast_expression(self):
        """A function returning a parser for parsing cast expressions.

        Args:
            expression: a pyparsing parser for parsing an expression to be cast.

        Returns:
            A (pyparsing) parser for parsing cast expressions.
        """
        word = pyparsing.Word(pyparsing.alphanums + '_*[]')
        nested = pyparsing.Forward().setName("nested")
        nested << pyparsing.Combine(
            pyparsing.Literal('(').suppress()
            + pyparsing.Combine(
                pyparsing.ZeroOrMore(self._integer() | word | nested))
            + pyparsing.Literal(')').suppress()
        )
        typeof_expression = (
            _OPEN_PARENTHESIS
            + pyparsing.Keyword('typeof')
            + nested("typeof_arg")
            + _CLOSE_PARENTHESIS
        )

        type_expression = (
            typeof_expression
            | nested("simple_type")
        )
        return (
            type_expression
            + ~(_PLUS | _MINUS)
            + self.expression("expression")
        ).setParseAction(self._create_cast_expression) 
Example #6
Source File: expression_parser.py    From rekall with GNU General Public License v2.0 5 votes vote down vote up
def _hexadecimal_as_string(self):
        return pyparsing.Combine('0x' + pyparsing.Word(pyparsing.hexnums)) 
Example #7
Source File: c_parser.py    From rekall with GNU General Public License v2.0 5 votes vote down vote up
def _typeof_expression(self):
        keyword = (
            pyparsing.Keyword('typeof')
            | pyparsing.Keyword('__typeof__')
        )
        return pyparsing.Combine(
            keyword
            + pyparsing.Literal('(')
            + parsers.anything_beetween('()')
            + pyparsing.Literal(')')
        ) 
Example #8
Source File: yara_support.py    From rekall with GNU General Public License v2.0 5 votes vote down vote up
def statement():
    return pyparsing.Group(
        _IDENTIFIER.setResultsName("lhs") + _EQUALS +
        pyparsing.Combine(
            (anything_in_curly() |
             pyparsing.QuotedString("'", escChar="\\", unquoteResults=False) |
             pyparsing.QuotedString("\"", escChar="\\", unquoteResults=False) |
             _REGEX) +
            pyparsing.ZeroOrMore(_KEYWORD),
            adjacent=False,
            joinString=" ",
        ).setResultsName("rhs")
    ) 
Example #9
Source File: parsing.py    From online-ratings with MIT License 5 votes vote down vote up
def _quoted(expr):
    return Combine(Suppress(Literal("'")) + expr + Suppress(Literal("'"))) 
Example #10
Source File: jsLiteralParse.py    From ReadableWebProxy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def jsParse(inStr):
	# This disaster is a context-free grammar parser for parsing javascript object literals.
	# It needs to be able to handle a lot of the definitional messes you find in in-the-wild
	# javascript object literals.
	# Unfortunately, Javascript is /way/ more tolerant then JSON when it comes to object literals
	# so we can't just parse objects using python's `json` library.

	TRUE = pp.Keyword("true").setParseAction( pp.replaceWith(True) )
	FALSE = pp.Keyword("false").setParseAction( pp.replaceWith(False) )
	NULL = pp.Keyword("null").setParseAction( pp.replaceWith(None) )

	jsonString = pp.quotedString.setParseAction( pp.removeQuotes )
	jsonNumber = pp.Combine( pp.Optional('-') + ( '0' | pp.Word('123456789',pp.nums) ) +
											pp.Optional( '.' + pp.Word(pp.nums) ) +
											pp.Optional( pp.Word('eE',exact=1) + pp.Word(pp.nums+'+-',pp.nums) ) )

	jsonObject   = pp.Forward()
	jsonValue    = pp.Forward()
	jsonDict     = pp.Forward()
	jsonArray    = pp.Forward()
	jsonElements = pp.Forward()

	rawText      = pp.Regex('[a-zA-Z_$][0-9a-zA-Z_$]*')

	commaToNull = pp.Word(',,', exact=1).setParseAction(pp.replaceWith(None))
	jsonElements << pp.ZeroOrMore(commaToNull) + pp.Optional(jsonObject) + pp.ZeroOrMore((pp.Suppress(',') + jsonObject) | commaToNull)

	jsonValue << ( jsonString | jsonNumber | TRUE | FALSE | NULL )


	dictMembers = pp.delimitedList( pp.Group( (rawText | jsonString) + pp.Suppress(':') + (jsonValue | jsonDict | jsonArray)))
	jsonDict << ( pp.Dict( pp.Suppress('{') + pp.Optional(dictMembers) + pp.ZeroOrMore(pp.Suppress(',')) + pp.Suppress('}') ) )
	jsonArray << ( pp.Group(pp.Suppress('[') + pp.Optional(jsonElements) + pp.Suppress(']') ) )
	jsonObject << (jsonValue | jsonDict | jsonArray)

	jsonComment = pp.cppStyleComment
	jsonObject.ignore( jsonComment )

	def convertDict(s, l, toks):

		return dict(toks.asList())

	def convertNumbers(s,l,toks):
		n = toks[0]
		try:
			return int(n)
		except ValueError:
			return float(n)

	jsonNumber.setParseAction(convertNumbers)
	jsonDict.setParseAction(convertDict)

	# jsonObject.setDebug()
	jsonObject.parseString('"inStr"').pop()
	return jsonObject.parseString(inStr).pop()


# Stolen from http://stackoverflow.com/a/12017573/268006 
Example #11
Source File: searchparser.py    From phpsploit with GNU General Public License v3.0 4 votes vote down vote up
def parser(self):
        """
        This function returns a parser.
        The grammar should be like most full text search engines (Google, Tsearch, Lucene).
        
        Grammar:
        - a query consists of alphanumeric words, with an optional '*' wildcard
          at the end of a word
        - a sequence of words between quotes is a literal string
        - words can be used together by using operators ('and' or 'or')
        - words with operators can be grouped with parenthesis
        - a word or group of words can be preceded by a 'not' operator
        - the 'and' operator precedes an 'or' operator
        - if an operator is missing, use an 'and' operator
        """
        operatorOr = Forward()
        
        operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \
                            Group(Word(alphanums)).setResultsName('word')
        
        operatorQuotesContent = Forward()
        operatorQuotesContent << (
            (operatorWord + operatorQuotesContent) | operatorWord
        )
        
        operatorQuotes = Group(
            Suppress('"') + operatorQuotesContent + Suppress('"')
        ).setResultsName("quotes") | operatorWord
        
        operatorParenthesis = Group(
            (Suppress("(") + operatorOr + Suppress(")"))
        ).setResultsName("parenthesis") | operatorQuotes

        operatorNot = Forward()
        operatorNot << (Group(
            Suppress(Keyword("not", caseless=True)) + operatorNot
        ).setResultsName("not") | operatorParenthesis)

        operatorAnd = Forward()
        operatorAnd << (Group(
            operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd
        ).setResultsName("and") | Group(
            operatorNot + OneOrMore(~oneOf("and or") + operatorAnd)
        ).setResultsName("and") | operatorNot)
        
        operatorOr << (Group(
            operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
        ).setResultsName("or") | operatorAnd)

        return operatorOr.parseString 
Example #12
Source File: shparsers.py    From stash with MIT License 4 votes vote down vote up
def __init__(self, debug=False):

        self.debug = debug
        self.logger = logging.getLogger('StaSh.Parser')

        escaped = pp.Combine("\\" + pp.Word(pp.printables + ' ', exact=1)).setParseAction(self.escaped_action)
        escaped_oct = pp.Combine("\\" + pp.Word('01234567', max=3)).setParseAction(self.escaped_oct_action)
        escaped_hex = pp.Combine("\\x" + pp.Word('0123456789abcdefABCDEF', exact=2)).setParseAction(self.escaped_hex_action)
        # Some special uq_word is needed, e.g. &3 for file descriptor of Pythonista interactive prompt
        uq_word = (pp.Literal('&3') | pp.Word(_WORD_CHARS)).setParseAction(self.uq_word_action)
        bq_word = pp.QuotedString('`', escChar='\\', unquoteResults=False).setParseAction(self.bq_word_action)
        dq_word = pp.QuotedString('"', escChar='\\', unquoteResults=False).setParseAction(self.dq_word_action)
        sq_word = pp.QuotedString("'", escChar='\\', unquoteResults=False).setParseAction(self.sq_word_action)
        # The ^ operator means longest match (as opposed to | which means first match)
        word = pp.Combine(pp.OneOrMore(escaped ^ escaped_oct ^ escaped_hex
                                       ^ uq_word ^ bq_word ^ dq_word ^ sq_word))\
            .setParseAction(self.word_action)

        identifier = pp.Word(pp.alphas + '_', pp.alphas + pp.nums + '_').setParseAction(self.identifier_action)
        assign_op = pp.Literal('=').setParseAction(self.assign_op_action)
        assignment_word = pp.Combine(identifier + assign_op + word).setParseAction(self.assignment_word_action)

        punctuator = pp.oneOf('; &').setParseAction(self.punctuator_action)
        pipe_op = pp.Literal('|').setParseAction(self.pipe_op_action)
        io_redirect_op = pp.oneOf('>> >').setParseAction(self.io_redirect_op_action)
        io_redirect = (io_redirect_op + word)('io_redirect')

        # The optional ' ' is a workaround to a possible bug in pyparsing.
        # The position of cmd_word after cmd_prefix is always reported 1 character ahead
        # of the correct value.
        cmd_prefix = (pp.OneOrMore(assignment_word) + pp.Optional(' '))('cmd_prefix')
        cmd_suffix = (pp.OneOrMore(word)('args') + pp.Optional(io_redirect)) ^ io_redirect

        modifier = pp.oneOf('! \\')
        cmd_word = (pp.Combine(pp.Optional(modifier) + word) ^ word)('cmd_word').setParseAction(self.cmd_word_action)

        simple_command = \
            (cmd_prefix + pp.Optional(cmd_word) + pp.Optional(cmd_suffix)) \
            | (cmd_word + pp.Optional(cmd_suffix))
        simple_command = pp.Group(simple_command)

        pipe_sequence = simple_command + pp.ZeroOrMore(pipe_op + simple_command)
        pipe_sequence = pp.Group(pipe_sequence)

        complete_command = pp.Optional(pipe_sequence + pp.ZeroOrMore(punctuator + pipe_sequence) + pp.Optional(punctuator))

        # --- special parser for inside double quotes
        uq_word_in_dq = pp.Word(pp.printables.replace('`', ' ').replace('\\', ''))\
            .setParseAction(self.uq_word_action)
        word_in_dq = pp.Combine(pp.OneOrMore(escaped ^ escaped_oct ^ escaped_hex ^ bq_word ^ uq_word_in_dq))
        # ---

        self.parser = complete_command.parseWithTabs().ignore(pp.pythonStyleComment)
        self.parser_within_dq = word_in_dq.leaveWhitespace()
        self.next_word_type = ShParser._NEXT_WORD_CMD
        self.tokens = []
        self.parts = [] 
Example #13
Source File: evaluator.py    From manila with Apache License 2.0 4 votes vote down vote up
def _def_parser():
    # Enabling packrat parsing greatly speeds up the parsing.
    pyparsing.ParserElement.enablePackrat()

    alphas = pyparsing.alphas
    Combine = pyparsing.Combine
    Forward = pyparsing.Forward
    nums = pyparsing.nums
    oneOf = pyparsing.oneOf
    opAssoc = pyparsing.opAssoc
    operatorPrecedence = pyparsing.operatorPrecedence
    Word = pyparsing.Word

    integer = Word(nums)
    real = Combine(Word(nums) + '.' + Word(nums))
    variable = Word(alphas + '_' + '.')
    number = real | integer
    expr = Forward()
    fn = Word(alphas + '_' + '.')
    operand = number | variable | fn

    signop = oneOf('+ -')
    addop = oneOf('+ -')
    multop = oneOf('* /')
    comparisonop = oneOf(' '.join(EvalComparisonOp.operations.keys()))
    ternaryop = ('?', ':')
    boolandop = oneOf('AND and &&')
    boolorop = oneOf('OR or ||')
    negateop = oneOf('NOT not !')

    operand.setParseAction(EvalConstant)
    expr = operatorPrecedence(operand, [
        (fn, 1, opAssoc.RIGHT, EvalFunction),
        ("^", 2, opAssoc.RIGHT, EvalPowerOp),
        (signop, 1, opAssoc.RIGHT, EvalSignOp),
        (multop, 2, opAssoc.LEFT, EvalMultOp),
        (addop, 2, opAssoc.LEFT, EvalAddOp),
        (negateop, 1, opAssoc.RIGHT, EvalNegateOp),
        (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp),
        (ternaryop, 3, opAssoc.LEFT, EvalTernaryOp),
        (boolandop, 2, opAssoc.LEFT, EvalBoolAndOp),
        (boolorop, 2, opAssoc.LEFT, EvalBoolOrOp),
        (',', 2, opAssoc.RIGHT, EvalCommaSeperator), ])

    return expr