Python xml.parsers.expat.ParserCreate() Examples
The following are 30
code examples of xml.parsers.expat.ParserCreate().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xml.parsers.expat
, or try the search function
.
Example #1
Source File: test_pyexpat.py From BinderFilter with MIT License | 6 votes |
def test_ignore_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document with an external entity reference is parsed, ExternalEntityRefHandler is called with the public and system ids from the document. """ handler_call_args = [] def resolve_entity(context, base, system_id, public_id): handler_call_args.append((public_id, system_id)) return 1 parser = expat.ParserCreate() parser.UseForeignDTD(True) parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse( "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") self.assertEqual(handler_call_args, [("bar", "baz")])
Example #2
Source File: test_pyexpat.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_unchanged_size(self): xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512)) xml2 = 'a'*512 + '</s>' parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_size = 512 parser.buffer_text = 1 # Feed 512 bytes of character data: the handler should be called # once. self.n = 0 parser.Parse(xml1) self.assertEqual(self.n, 1) # Reassign to buffer_size, but assign the same size. parser.buffer_size = parser.buffer_size self.assertEqual(self.n, 1) # Try parsing rest of the document parser.Parse(xml2) self.assertEqual(self.n, 2)
Example #3
Source File: test_pyexpat.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_ignore_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document with an external entity reference is parsed, ExternalEntityRefHandler is called with the public and system ids from the document. """ handler_call_args = [] def resolve_entity(context, base, system_id, public_id): handler_call_args.append((public_id, system_id)) return 1 parser = expat.ParserCreate() parser.UseForeignDTD(True) parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse( "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") self.assertEqual(handler_call_args, [("bar", "baz")])
Example #4
Source File: test_pyexpat.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_parse_only_xml_data(self): # http://python.org/sf/1296433 # xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) # this one doesn't crash #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) class SpecificException(Exception): pass def handler(text): raise SpecificException parser = expat.ParserCreate() parser.CharacterDataHandler = handler # https://github.com/IronLanguages/ironpython2/issues/464 if sys.platform == 'cli': self.assertRaises(Exception, parser.Parse, xml, True) else: self.assertRaises(Exception, parser.Parse, xml)
Example #5
Source File: crawlerpersister.py From watchdog with Apache License 2.0 | 6 votes |
def loadXML(self, fileName): """ Loads the crawler parameters from an XML file. @param fileName The file from where is loaded the crawler data """ self._parser = expat.ParserCreate("UTF-8") self._parser.StartElementHandler = self.__start_element self._parser.EndElementHandler = self.__end_element self._parser.CharacterDataHandler = self.__char_data self._parser.returns_unicode = False f = None try: f = open(fileName) content = f.read() self.__feed(content.replace("\n", "")) finally: if f is not None: f.close()
Example #6
Source File: test_pyexpat.py From BinderFilter with MIT License | 6 votes |
def test_unchanged_size(self): xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512)) xml2 = 'a'*512 + '</s>' parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_size = 512 parser.buffer_text = 1 # Feed 512 bytes of character data: the handler should be called # once. self.n = 0 parser.Parse(xml1) self.assertEqual(self.n, 1) # Reassign to buffer_size, but assign the same size. parser.buffer_size = parser.buffer_size self.assertEqual(self.n, 1) # Try parsing rest of the document parser.Parse(xml2) self.assertEqual(self.n, 2)
Example #7
Source File: crawlerpersister.py From watchdog with Apache License 2.0 | 6 votes |
def loadXML(self, fileName): """ Loads the crawler parameters from an XML file. @param fileName The file from where is loaded the crawler data """ self._parser = expat.ParserCreate("UTF-8") self._parser.StartElementHandler = self.__start_element self._parser.EndElementHandler = self.__end_element self._parser.CharacterDataHandler = self.__char_data self._parser.returns_unicode = False f = None try: f = open(fileName) content = f.read() self.__feed(content.replace("\n", "")) finally: if f is not None: f.close()
Example #8
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def test_change_size_1(self): xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024) xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_text = 1 parser.buffer_size = 1024 self.assertEqual(parser.buffer_size, 1024) self.n = 0 parser.Parse(xml1, 0) parser.buffer_size *= 2 self.assertEqual(parser.buffer_size, 2048) parser.Parse(xml2, 1) self.assertEqual(self.n, 2)
Example #9
Source File: expatreader.py From BinderFilter with MIT License | 5 votes |
def reset(self): if self._namespaces: self._parser = expat.ParserCreate(self._source.getEncoding(), " ", intern=self._interning) self._parser.namespace_prefixes = 1 self._parser.StartElementHandler = self.start_element_ns self._parser.EndElementHandler = self.end_element_ns else: self._parser = expat.ParserCreate(self._source.getEncoding(), intern = self._interning) self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element self._reset_cont_handler() self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl self._parser.NotationDeclHandler = self.notation_decl self._parser.StartNamespaceDeclHandler = self.start_namespace_decl self._parser.EndNamespaceDeclHandler = self.end_namespace_decl self._decl_handler_prop = None if self._lex_handler_prop: self._reset_lex_handler_prop() # self._parser.DefaultHandler = # self._parser.DefaultHandlerExpand = # self._parser.NotStandaloneHandler = self._parser.ExternalEntityRefHandler = self.external_entity_ref try: self._parser.SkippedEntityHandler = self.skipped_entity_handler except AttributeError: # This pyexpat does not support SkippedEntity pass self._parser.SetParamEntityParsing( expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) self._parsing = 0 self._entity_stack = [] # Locator methods
Example #10
Source File: plistlib.py From BinderFilter with MIT License | 5 votes |
def parse(self, fileobj): from xml.parsers.expat import ParserCreate parser = ParserCreate() parser.StartElementHandler = self.handleBeginElement parser.EndElementHandler = self.handleEndElement parser.CharacterDataHandler = self.handleData parser.ParseFile(fileobj) return self.root
Example #11
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def test_utf8(self): out = self.Outputter() parser = expat.ParserCreate(namespace_separator='!') for name in self.handler_names: setattr(parser, name, getattr(out, name)) parser.returns_unicode = 0 parser.Parse(data, 1) # Verify output op = out.out self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'') self.assertEqual(op[1], "Comment: ' comment data '") self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)") self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')") self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}") self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'") self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}") self.assertEqual(op[7], "Character data: 'Contents of subelements'") self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'") self.assertEqual(op[9], "End of NS decl: 'myns'") self.assertEqual(op[10], "Start element: 'sub2' {}") self.assertEqual(op[11], 'Start of CDATA section') self.assertEqual(op[12], "Character data: 'contents of CDATA section'") self.assertEqual(op[13], 'End of CDATA section') self.assertEqual(op[14], "End element: 'sub2'") self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)") self.assertEqual(op[16], "End element: 'root'")
Example #12
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def test_unicode(self): # Try the parse again, this time producing Unicode output out = self.Outputter() parser = expat.ParserCreate(namespace_separator='!') parser.returns_unicode = 1 for name in self.handler_names: setattr(parser, name, getattr(out, name)) parser.Parse(data, 1) op = out.out self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') self.assertEqual(op[1], "Comment: u' comment data '") self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") self.assertEqual(op[7], "Character data: u'Contents of subelements'") self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") self.assertEqual(op[9], "End of NS decl: u'myns'") self.assertEqual(op[10], "Start element: u'sub2' {}") self.assertEqual(op[11], 'Start of CDATA section') self.assertEqual(op[12], "Character data: u'contents of CDATA section'") self.assertEqual(op[13], 'End of CDATA section') self.assertEqual(op[14], "End element: u'sub2'") self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") self.assertEqual(op[16], "End element: u'root'")
Example #13
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def test_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document without an external entity reference is parsed, ExternalEntityRefHandler is first called with None for the public and system ids. """ handler_call_args = [] def resolve_entity(context, base, system_id, public_id): handler_call_args.append((public_id, system_id)) return 1 parser = expat.ParserCreate() parser.UseForeignDTD(True) parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse("<?xml version='1.0'?><element/>") self.assertEqual(handler_call_args, [(None, None)]) # test UseForeignDTD() is equal to UseForeignDTD(True) handler_call_args[:] = [] parser = expat.ParserCreate() parser.UseForeignDTD() parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse("<?xml version='1.0'?><element/>") self.assertEqual(handler_call_args, [(None, None)])
Example #14
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def small_buffer_test(self, buffer_len): xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len) parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_size = 1024 parser.buffer_text = 1 self.n = 0 parser.Parse(xml) return self.n
Example #15
Source File: expatbuilder.py From Computable with MIT License | 5 votes |
def createParser(self): """Create a new namespace-handling parser.""" parser = expat.ParserCreate(namespace_separator=" ") parser.namespace_prefixes = True return parser
Example #16
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def test1(self): xml = "\0\r\n" parser = expat.ParserCreate() try: parser.Parse(xml, True) self.fail() except expat.ExpatError as e: self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Example #17
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def test2(self): xml = "<?xml version\xc2\x85='1.0'?>\r\n" parser = expat.ParserCreate() try: parser.Parse(xml, True) self.fail() except expat.ExpatError as e: self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Example #18
Source File: expatbuilder.py From Computable with MIT License | 5 votes |
def createParser(self): """Create a new parser object.""" return expat.ParserCreate()
Example #19
Source File: test_pyexpat.py From BinderFilter with MIT License | 5 votes |
def test_illegal(self): try: expat.ParserCreate(namespace_separator=42) self.fail() except TypeError, e: self.assertEqual(str(e), 'ParserCreate() argument 2 must be string or None, not int')
Example #20
Source File: expatbuilder.py From BinderFilter with MIT License | 5 votes |
def createParser(self): """Create a new namespace-handling parser.""" parser = expat.ParserCreate(namespace_separator=" ") parser.namespace_prefixes = True return parser
Example #21
Source File: expatbuilder.py From BinderFilter with MIT License | 5 votes |
def createParser(self): """Create a new parser object.""" return expat.ParserCreate()
Example #22
Source File: disk.py From USBMap with MIT License | 5 votes |
def _get_plist(self, s): p = {} try: if sys.version_info >= (3, 0): p = plistlib.loads(s.encode("utf-8")) else: # p = plistlib.readPlistFromString(s) # We avoid using readPlistFromString() as that uses # cStringIO and fails when Unicode strings are detected # Don't subclass - keep the parser local from xml.parsers.expat import ParserCreate # Create a new PlistParser object - then we need to set up # the values and parse. pa = plistlib.PlistParser() # We also monkey patch this to encode unicode as utf-8 def end_string(): d = pa.getData() if isinstance(d,unicode): d = d.encode("utf-8") pa.addObject(d) pa.end_string = end_string parser = ParserCreate() parser.StartElementHandler = pa.handleBeginElement parser.EndElementHandler = pa.handleEndElement parser.CharacterDataHandler = pa.handleData if isinstance(s, unicode): # Encode unicode -> string; use utf-8 for safety s = s.encode("utf-8") # Parse the string parser.Parse(s, 1) p = pa.root except Exception as e: print(e) pass return p
Example #23
Source File: vulnerabilityxmlparser.py From ITWSV with MIT License | 5 votes |
def __init__(self): self._parser = expat.ParserCreate() self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element self._parser.CharacterDataHandler = self.char_data self.vulnerabilities = [] self.vul = None self.references = {} self.title = "" self.url = "" self.tag = ""
Example #24
Source File: anomalyxmlparser.py From ITWSV with MIT License | 5 votes |
def __init__(self): self._parser = expat.ParserCreate() self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element self._parser.CharacterDataHandler = self.char_data self.anomalies = [] self.anom = None self.references = {} self.title = "" self.url = "" self.tag = ""
Example #25
Source File: reportgeneratorsxmlparser.py From ITWSV with MIT License | 5 votes |
def __init__(self): self._parser = expat.ParserCreate() self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element self._parser.CharacterDataHandler = self.char_data self.reportGenerators = [] self.repGen = None self.tag = ""
Example #26
Source File: element_tree.py From avocado-vt with GNU General Public License v2.0 | 5 votes |
def __init__(self, html=0, target=None): try: from xml.parsers import expat except ImportError: raise ImportError( "No module named expat; use SimpleXMLTreeBuilder instead" ) self._parser = parser = expat.ParserCreate(None, "}") if target is None: target = TreeBuilder() self._target = target self._names = {} # name memo cache # callbacks parser.DefaultHandlerExpand = self._default parser.StartElementHandler = self._start parser.EndElementHandler = self._end parser.CharacterDataHandler = self._data # let expat do the buffering, if supported try: self._parser.buffer_text = 1 except AttributeError: pass # use new-style attribute handling, if supported try: self._parser.ordered_attributes = 1 self._parser.specified_attributes = 1 parser.StartElementHandler = self._start_list except AttributeError: pass encoding = None if hasattr(parser, "returns_unicode") and not parser.returns_unicode: encoding = "utf-8" # target.xml(encoding, None) self._doctype = None self.entity = {}
Example #27
Source File: wildfirelib.py From ACE with Apache License 2.0 | 5 votes |
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, namespace_separator=':', **kwargs): handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs) if isinstance(xml_input, _unicode): if not encoding: encoding = 'utf-8' xml_input = xml_input.encode(encoding) if not process_namespaces: namespace_separator = None parser = expat.ParserCreate( encoding, namespace_separator ) try: parser.ordered_attributes = True except AttributeError: # Jython's expat does not support ordered_attributes pass parser.StartElementHandler = handler.startElement parser.EndElementHandler = handler.endElement parser.CharacterDataHandler = handler.characters parser.buffer_text = True try: parser.ParseFile(xml_input) except (TypeError, AttributeError): parser.Parse(xml_input, True) return handler.item
Example #28
Source File: test_pyexpat.py From ironpython2 with Apache License 2.0 | 5 votes |
def test_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document without an external entity reference is parsed, ExternalEntityRefHandler is first called with None for the public and system ids. """ handler_call_args = [] def resolve_entity(context, base, system_id, public_id): handler_call_args.append((public_id, system_id)) return 1 parser = expat.ParserCreate() parser.UseForeignDTD(True) parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse("<?xml version='1.0'?><element/>") self.assertEqual(handler_call_args, [(None, None)]) # test UseForeignDTD() is equal to UseForeignDTD(True) handler_call_args[:] = [] parser = expat.ParserCreate() parser.UseForeignDTD() parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse("<?xml version='1.0'?><element/>") self.assertEqual(handler_call_args, [(None, None)])
Example #29
Source File: test_pyexpat.py From ironpython2 with Apache License 2.0 | 5 votes |
def test2(self): xml = "<?xml version\xc2\x85='1.0'?>\r\n" parser = expat.ParserCreate() err_pattern = r'XML declaration not well-formed: line 1, column \d+' with self.assertRaisesRegexp(expat.ExpatError, err_pattern): parser.Parse(xml, True)
Example #30
Source File: test_pyexpat.py From ironpython2 with Apache License 2.0 | 5 votes |
def test1(self): xml = "\0\r\n" parser = expat.ParserCreate() try: parser.Parse(xml, True) self.fail() except expat.ExpatError as e: self.assertEqual(str(e), 'unclosed token: line 2, column 0')