Python xml.parsers.expat.ParserCreate() Examples

The following are 30 code examples of xml.parsers.expat.ParserCreate(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module xml.parsers.expat , or try the search function .
Example #1
Source File: test_pyexpat.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_ignore_use_foreign_dtd(self):
        """
        If UseForeignDTD is passed True and a document with an external
        entity reference is parsed, ExternalEntityRefHandler is called with
        the public and system ids from the document.
        """
        handler_call_args = []
        def resolve_entity(context, base, system_id, public_id):
            handler_call_args.append((public_id, system_id))
            return 1

        parser = expat.ParserCreate()
        parser.UseForeignDTD(True)
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse(
            "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
        self.assertEqual(handler_call_args, [("bar", "baz")]) 
Example #2
Source File: test_pyexpat.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_unchanged_size(self):
        xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
        xml2 = 'a'*512 + '</s>'
        parser = expat.ParserCreate()
        parser.CharacterDataHandler = self.counting_handler
        parser.buffer_size = 512
        parser.buffer_text = 1

        # Feed 512 bytes of character data: the handler should be called
        # once.
        self.n = 0
        parser.Parse(xml1)
        self.assertEqual(self.n, 1)

        # Reassign to buffer_size, but assign the same size.
        parser.buffer_size = parser.buffer_size
        self.assertEqual(self.n, 1)

        # Try parsing rest of the document
        parser.Parse(xml2)
        self.assertEqual(self.n, 2) 
Example #3
Source File: test_pyexpat.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_ignore_use_foreign_dtd(self):
        """
        If UseForeignDTD is passed True and a document with an external
        entity reference is parsed, ExternalEntityRefHandler is called with
        the public and system ids from the document.
        """
        handler_call_args = []
        def resolve_entity(context, base, system_id, public_id):
            handler_call_args.append((public_id, system_id))
            return 1

        parser = expat.ParserCreate()
        parser.UseForeignDTD(True)
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse(
            "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
        self.assertEqual(handler_call_args, [("bar", "baz")]) 
Example #4
Source File: test_pyexpat.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_parse_only_xml_data(self):
        # http://python.org/sf/1296433
        #
        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
        # this one doesn't crash
        #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)

        class SpecificException(Exception):
            pass

        def handler(text):
            raise SpecificException

        parser = expat.ParserCreate()
        parser.CharacterDataHandler = handler

        # https://github.com/IronLanguages/ironpython2/issues/464
        if sys.platform == 'cli':
            self.assertRaises(Exception, parser.Parse, xml, True)
        else:
            self.assertRaises(Exception, parser.Parse, xml) 
Example #5
Source File: crawlerpersister.py    From watchdog with Apache License 2.0 6 votes vote down vote up
def loadXML(self, fileName):
        """
        Loads the crawler parameters from an XML file.
        @param fileName The file from where is loaded the crawler data
        """
        self._parser = expat.ParserCreate("UTF-8")
        self._parser.StartElementHandler = self.__start_element
        self._parser.EndElementHandler = self.__end_element
        self._parser.CharacterDataHandler = self.__char_data
        self._parser.returns_unicode = False

        f = None
        try:
            f = open(fileName)
            content = f.read()
            self.__feed(content.replace("\n", ""))
        finally:
            if f is not None:
                f.close() 
Example #6
Source File: test_pyexpat.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_unchanged_size(self):
        xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
        xml2 = 'a'*512 + '</s>'
        parser = expat.ParserCreate()
        parser.CharacterDataHandler = self.counting_handler
        parser.buffer_size = 512
        parser.buffer_text = 1

        # Feed 512 bytes of character data: the handler should be called
        # once.
        self.n = 0
        parser.Parse(xml1)
        self.assertEqual(self.n, 1)

        # Reassign to buffer_size, but assign the same size.
        parser.buffer_size = parser.buffer_size
        self.assertEqual(self.n, 1)

        # Try parsing rest of the document
        parser.Parse(xml2)
        self.assertEqual(self.n, 2) 
Example #7
Source File: crawlerpersister.py    From watchdog with Apache License 2.0 6 votes vote down vote up
def loadXML(self, fileName):
        """
        Loads the crawler parameters from an XML file.
        @param fileName The file from where is loaded the crawler data
        """
        self._parser = expat.ParserCreate("UTF-8")
        self._parser.StartElementHandler = self.__start_element
        self._parser.EndElementHandler = self.__end_element
        self._parser.CharacterDataHandler = self.__char_data
        self._parser.returns_unicode = False

        f = None
        try:
            f = open(fileName)
            content = f.read()
            self.__feed(content.replace("\n", ""))
        finally:
            if f is not None:
                f.close() 
Example #8
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_change_size_1(self):
        xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
        xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
        parser = expat.ParserCreate()
        parser.CharacterDataHandler = self.counting_handler
        parser.buffer_text = 1
        parser.buffer_size = 1024
        self.assertEqual(parser.buffer_size, 1024)

        self.n = 0
        parser.Parse(xml1, 0)
        parser.buffer_size *= 2
        self.assertEqual(parser.buffer_size, 2048)
        parser.Parse(xml2, 1)
        self.assertEqual(self.n, 2) 
Example #9
Source File: expatreader.py    From BinderFilter with MIT License 5 votes vote down vote up
def reset(self):
        if self._namespaces:
            self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
                                              intern=self._interning)
            self._parser.namespace_prefixes = 1
            self._parser.StartElementHandler = self.start_element_ns
            self._parser.EndElementHandler = self.end_element_ns
        else:
            self._parser = expat.ParserCreate(self._source.getEncoding(),
                                              intern = self._interning)
            self._parser.StartElementHandler = self.start_element
            self._parser.EndElementHandler = self.end_element

        self._reset_cont_handler()
        self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
        self._parser.NotationDeclHandler = self.notation_decl
        self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
        self._parser.EndNamespaceDeclHandler = self.end_namespace_decl

        self._decl_handler_prop = None
        if self._lex_handler_prop:
            self._reset_lex_handler_prop()
#         self._parser.DefaultHandler =
#         self._parser.DefaultHandlerExpand =
#         self._parser.NotStandaloneHandler =
        self._parser.ExternalEntityRefHandler = self.external_entity_ref
        try:
            self._parser.SkippedEntityHandler = self.skipped_entity_handler
        except AttributeError:
            # This pyexpat does not support SkippedEntity
            pass
        self._parser.SetParamEntityParsing(
            expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)

        self._parsing = 0
        self._entity_stack = []

    # Locator methods 
Example #10
Source File: plistlib.py    From BinderFilter with MIT License 5 votes vote down vote up
def parse(self, fileobj):
        from xml.parsers.expat import ParserCreate
        parser = ParserCreate()
        parser.StartElementHandler = self.handleBeginElement
        parser.EndElementHandler = self.handleEndElement
        parser.CharacterDataHandler = self.handleData
        parser.ParseFile(fileobj)
        return self.root 
Example #11
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_utf8(self):

        out = self.Outputter()
        parser = expat.ParserCreate(namespace_separator='!')
        for name in self.handler_names:
            setattr(parser, name, getattr(out, name))
        parser.returns_unicode = 0
        parser.Parse(data, 1)

        # Verify output
        op = out.out
        self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
        self.assertEqual(op[1], "Comment: ' comment data '")
        self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
        self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
        self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
        self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
        self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
        self.assertEqual(op[7], "Character data: 'Contents of subelements'")
        self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'")
        self.assertEqual(op[9], "End of NS decl: 'myns'")
        self.assertEqual(op[10], "Start element: 'sub2' {}")
        self.assertEqual(op[11], 'Start of CDATA section')
        self.assertEqual(op[12], "Character data: 'contents of CDATA section'")
        self.assertEqual(op[13], 'End of CDATA section')
        self.assertEqual(op[14], "End element: 'sub2'")
        self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)")
        self.assertEqual(op[16], "End element: 'root'") 
Example #12
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_unicode(self):
        # Try the parse again, this time producing Unicode output
        out = self.Outputter()
        parser = expat.ParserCreate(namespace_separator='!')
        parser.returns_unicode = 1
        for name in self.handler_names:
            setattr(parser, name, getattr(out, name))

        parser.Parse(data, 1)

        op = out.out
        self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
        self.assertEqual(op[1], "Comment: u' comment data '")
        self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
        self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
        self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
        self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
        self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
        self.assertEqual(op[7], "Character data: u'Contents of subelements'")
        self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'")
        self.assertEqual(op[9], "End of NS decl: u'myns'")
        self.assertEqual(op[10], "Start element: u'sub2' {}")
        self.assertEqual(op[11], 'Start of CDATA section')
        self.assertEqual(op[12], "Character data: u'contents of CDATA section'")
        self.assertEqual(op[13], 'End of CDATA section')
        self.assertEqual(op[14], "End element: u'sub2'")
        self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)")
        self.assertEqual(op[16], "End element: u'root'") 
Example #13
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_use_foreign_dtd(self):
        """
        If UseForeignDTD is passed True and a document without an external
        entity reference is parsed, ExternalEntityRefHandler is first called
        with None for the public and system ids.
        """
        handler_call_args = []
        def resolve_entity(context, base, system_id, public_id):
            handler_call_args.append((public_id, system_id))
            return 1

        parser = expat.ParserCreate()
        parser.UseForeignDTD(True)
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse("<?xml version='1.0'?><element/>")
        self.assertEqual(handler_call_args, [(None, None)])

        # test UseForeignDTD() is equal to UseForeignDTD(True)
        handler_call_args[:] = []

        parser = expat.ParserCreate()
        parser.UseForeignDTD()
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse("<?xml version='1.0'?><element/>")
        self.assertEqual(handler_call_args, [(None, None)]) 
Example #14
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def small_buffer_test(self, buffer_len):
        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
        parser = expat.ParserCreate()
        parser.CharacterDataHandler = self.counting_handler
        parser.buffer_size = 1024
        parser.buffer_text = 1

        self.n = 0
        parser.Parse(xml)
        return self.n 
Example #15
Source File: expatbuilder.py    From Computable with MIT License 5 votes vote down vote up
def createParser(self):
        """Create a new namespace-handling parser."""
        parser = expat.ParserCreate(namespace_separator=" ")
        parser.namespace_prefixes = True
        return parser 
Example #16
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def test1(self):
        xml = "\0\r\n"
        parser = expat.ParserCreate()
        try:
            parser.Parse(xml, True)
            self.fail()
        except expat.ExpatError as e:
            self.assertEqual(str(e), 'unclosed token: line 2, column 0') 
Example #17
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def test2(self):
        xml = "<?xml version\xc2\x85='1.0'?>\r\n"
        parser = expat.ParserCreate()
        try:
            parser.Parse(xml, True)
            self.fail()
        except expat.ExpatError as e:
            self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14') 
Example #18
Source File: expatbuilder.py    From Computable with MIT License 5 votes vote down vote up
def createParser(self):
        """Create a new parser object."""
        return expat.ParserCreate() 
Example #19
Source File: test_pyexpat.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_illegal(self):
        try:
            expat.ParserCreate(namespace_separator=42)
            self.fail()
        except TypeError, e:
            self.assertEqual(str(e),
                'ParserCreate() argument 2 must be string or None, not int') 
Example #20
Source File: expatbuilder.py    From BinderFilter with MIT License 5 votes vote down vote up
def createParser(self):
        """Create a new namespace-handling parser."""
        parser = expat.ParserCreate(namespace_separator=" ")
        parser.namespace_prefixes = True
        return parser 
Example #21
Source File: expatbuilder.py    From BinderFilter with MIT License 5 votes vote down vote up
def createParser(self):
        """Create a new parser object."""
        return expat.ParserCreate() 
Example #22
Source File: disk.py    From USBMap with MIT License 5 votes vote down vote up
def _get_plist(self, s):
        p = {}
        try:
            if sys.version_info >= (3, 0):
                p = plistlib.loads(s.encode("utf-8"))
            else:
                # p = plistlib.readPlistFromString(s)
                # We avoid using readPlistFromString() as that uses
                # cStringIO and fails when Unicode strings are detected
                # Don't subclass - keep the parser local
                from xml.parsers.expat import ParserCreate
                # Create a new PlistParser object - then we need to set up
                # the values and parse.
                pa = plistlib.PlistParser()
                # We also monkey patch this to encode unicode as utf-8
                def end_string():
                    d = pa.getData()
                    if isinstance(d,unicode):
                        d = d.encode("utf-8")
                    pa.addObject(d)
                pa.end_string = end_string
                parser = ParserCreate()
                parser.StartElementHandler = pa.handleBeginElement
                parser.EndElementHandler = pa.handleEndElement
                parser.CharacterDataHandler = pa.handleData
                if isinstance(s, unicode):
                    # Encode unicode -> string; use utf-8 for safety
                    s = s.encode("utf-8")
                # Parse the string
                parser.Parse(s, 1)
                p = pa.root
        except Exception as e:
            print(e)
            pass
        return p 
Example #23
Source File: vulnerabilityxmlparser.py    From ITWSV with MIT License 5 votes vote down vote up
def __init__(self):
        self._parser = expat.ParserCreate()
        self._parser.StartElementHandler = self.start_element
        self._parser.EndElementHandler = self.end_element
        self._parser.CharacterDataHandler = self.char_data
        self.vulnerabilities = []
        self.vul = None
        self.references = {}
        self.title = ""
        self.url = ""
        self.tag = "" 
Example #24
Source File: anomalyxmlparser.py    From ITWSV with MIT License 5 votes vote down vote up
def __init__(self):
        self._parser = expat.ParserCreate()
        self._parser.StartElementHandler = self.start_element
        self._parser.EndElementHandler = self.end_element
        self._parser.CharacterDataHandler = self.char_data
        self.anomalies = []
        self.anom = None
        self.references = {}
        self.title = ""
        self.url = ""
        self.tag = "" 
Example #25
Source File: reportgeneratorsxmlparser.py    From ITWSV with MIT License 5 votes vote down vote up
def __init__(self):
        self._parser = expat.ParserCreate()
        self._parser.StartElementHandler = self.start_element
        self._parser.EndElementHandler = self.end_element
        self._parser.CharacterDataHandler = self.char_data
        self.reportGenerators = []
        self.repGen = None
        self.tag = "" 
Example #26
Source File: element_tree.py    From avocado-vt with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, html=0, target=None):
        try:
            from xml.parsers import expat
        except ImportError:
            raise ImportError(
                "No module named expat; use SimpleXMLTreeBuilder instead"
            )
        self._parser = parser = expat.ParserCreate(None, "}")
        if target is None:
            target = TreeBuilder()
        self._target = target
        self._names = {}  # name memo cache
        # callbacks
        parser.DefaultHandlerExpand = self._default
        parser.StartElementHandler = self._start
        parser.EndElementHandler = self._end
        parser.CharacterDataHandler = self._data
        # let expat do the buffering, if supported
        try:
            self._parser.buffer_text = 1
        except AttributeError:
            pass
        # use new-style attribute handling, if supported
        try:
            self._parser.ordered_attributes = 1
            self._parser.specified_attributes = 1
            parser.StartElementHandler = self._start_list
        except AttributeError:
            pass
        encoding = None
        if hasattr(parser, "returns_unicode") and not parser.returns_unicode:
            encoding = "utf-8"
        # target.xml(encoding, None)
        self._doctype = None
        self.entity = {} 
Example #27
Source File: wildfirelib.py    From ACE with Apache License 2.0 5 votes vote down vote up
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
          namespace_separator=':', **kwargs):
    handler = _DictSAXHandler(namespace_separator=namespace_separator,
                              **kwargs)
    if isinstance(xml_input, _unicode):
        if not encoding:
            encoding = 'utf-8'
        xml_input = xml_input.encode(encoding)
    if not process_namespaces:
        namespace_separator = None
    parser = expat.ParserCreate(
        encoding,
        namespace_separator
    )
    try:
        parser.ordered_attributes = True
    except AttributeError:
        # Jython's expat does not support ordered_attributes
        pass
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    parser.buffer_text = True
    try:
        parser.ParseFile(xml_input)
    except (TypeError, AttributeError):
        parser.Parse(xml_input, True)
    return handler.item 
Example #28
Source File: test_pyexpat.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_use_foreign_dtd(self):
        """
        If UseForeignDTD is passed True and a document without an external
        entity reference is parsed, ExternalEntityRefHandler is first called
        with None for the public and system ids.
        """
        handler_call_args = []
        def resolve_entity(context, base, system_id, public_id):
            handler_call_args.append((public_id, system_id))
            return 1

        parser = expat.ParserCreate()
        parser.UseForeignDTD(True)
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse("<?xml version='1.0'?><element/>")
        self.assertEqual(handler_call_args, [(None, None)])

        # test UseForeignDTD() is equal to UseForeignDTD(True)
        handler_call_args[:] = []

        parser = expat.ParserCreate()
        parser.UseForeignDTD()
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse("<?xml version='1.0'?><element/>")
        self.assertEqual(handler_call_args, [(None, None)]) 
Example #29
Source File: test_pyexpat.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test2(self):
        xml = "<?xml version\xc2\x85='1.0'?>\r\n"
        parser = expat.ParserCreate()
        err_pattern = r'XML declaration not well-formed: line 1, column \d+'
        with self.assertRaisesRegexp(expat.ExpatError, err_pattern):
            parser.Parse(xml, True) 
Example #30
Source File: test_pyexpat.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test1(self):
        xml = "\0\r\n"
        parser = expat.ParserCreate()
        try:
            parser.Parse(xml, True)
            self.fail()
        except expat.ExpatError as e:
            self.assertEqual(str(e), 'unclosed token: line 2, column 0')