Python xml.etree.ElementTree.XMLParser() Examples
The following are 30
code examples of xml.etree.ElementTree.XMLParser().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xml.etree.ElementTree
, or try the search function
.
Example #1
Source File: read.py From typhon with MIT License | 6 votes |
def parse(source, binaryfp=None): """Parse ArtsXML file from source. Args: source (str): Filename or file pointer. Returns: xml.etree.ElementTree: XML Tree of the ARTS data file. """ arts_element = type('ARTSElementBinaryFP', ARTSElement.__bases__, dict(ARTSElement.__dict__)) arts_element.binaryfp = binaryfp return ElementTree.parse(source, parser=ElementTree.XMLParser( target=ElementTree.TreeBuilder( element_factory=arts_element)))
Example #2
Source File: main.py From meshio with MIT License | 6 votes |
def read(self): parser = ET.XMLParser() tree = ET.parse(self.filename, parser) root = tree.getroot() if root.tag != "Xdmf": raise ReadError() version = root.get("Version") if version.split(".")[0] == "2": return self.read_xdmf2(root) if version.split(".")[0] != "3": raise ReadError("Unknown XDMF version {}.".format(version)) return self.read_xdmf3(root)
Example #3
Source File: test_xml_etree.py From gcblue with BSD 3-Clause "New" or "Revised" License | 6 votes |
def bug_200708_close(): """ Test default builder. >>> parser = ET.XMLParser() # default >>> parser.feed("<element>some text</element>") >>> summarize(parser.close()) 'element' Test custom builder. >>> class EchoTarget: ... def close(self): ... return ET.Element("element") # simulate root >>> parser = ET.XMLParser(EchoTarget()) >>> parser.feed("<element>some text</element>") >>> summarize(parser.close()) 'element' """
Example #4
Source File: parser.py From fbchat-archive-parser with MIT License | 6 votes |
def parse_impl(self): """ Parses the HTML content as a stream. This is far less memory intensive than loading the entire HTML file into memory, like BeautifulSoup does. """ # Cast to str to ensure not unicode under Python 2, as the parser # doesn't like that. parser = XMLParser(encoding=str('UTF-8')) element_iter = ET.iterparse(self.handle, events=("start", "end"), parser=parser) for pos, element in element_iter: tag, class_attr = _tag_and_class_attr(element) if tag == "h1" and pos == "end": if not self.user: self.user = element.text.strip() elif tag == "div" and "thread" in class_attr and pos == "start": participants = self.parse_participants(element) thread = self.parse_thread(participants, element_iter, True) self.save_thread(thread)
Example #5
Source File: document_processor.py From mma-dexter with Apache License 2.0 | 6 votes |
def fetch_daily_feeds(self, day): """ Fetch the feed for +day+ and returns an ElementTree instance. """ # import xml.etree.ElementTree as ET from xml.etree import ElementTree from htmlentitydefs import name2codepoint if self.FEED_PASSWORD is None: raise ValueError("%s.FEED_PASSWORD must be set." % self.__class__.__name__) r = requests.get(self.FEED_URL % day.strftime('%d-%m-%Y'), auth=(self.FEED_USER, self.FEED_PASSWORD), verify=False, timeout=60) r.raise_for_status() parser = ElementTree.XMLParser() parser.parser.UseForeignDTD(True) parser.entity.update((x, unichr(i)) for x, i in name2codepoint.iteritems()) etree = ElementTree return etree.fromstring(r.text.encode('utf-8'), parser=parser)
Example #6
Source File: document_processor.py From mma-dexter with Apache License 2.0 | 6 votes |
def fetch_filtered_daily_feeds(self, day, filter_parm): """ Fetch the feed for +day+ and returns an ElementTree instance. """ # import xml.etree.ElementTree as ET from xml.etree import ElementTree from htmlentitydefs import name2codepoint if self.FEED_PASSWORD is None: raise ValueError("%s.FEED_PASSWORD must be set." % self.__class__.__name__) r = requests.get(self.FEED_FILTER_URL % (day.strftime('%d-%m-%Y'), filter_parm), auth=(self.FEED_USER, self.FEED_PASSWORD), verify=False, timeout=60) r.raise_for_status() parser = ElementTree.XMLParser() parser.parser.UseForeignDTD(True) parser.entity.update((x, unichr(i)) for x, i in name2codepoint.iteritems()) etree = ElementTree return etree.fromstring(r.text.encode('utf-8'), parser=parser)
Example #7
Source File: omexml.py From aicsimageio with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, xml=None, rootnode=None): if xml is None and rootnode is None: xml = default_xml if rootnode is None: if sys.platform.startswith('win'): enc = 'ISO-8859-1' else: enc = 'UTF-8' self.dom = ElementTree.fromstring(xml, ElementTree.XMLParser(encoding=enc)) else: self.dom = rootnode # determine OME namespaces self.ns = get_namespaces(self.dom) if __name__ == '__main__': if self.ns['ome'] is None: raise Exception("Error: String not in OME-XML format") # generate a uuid if there is none # < OME UUID = "urn:uuid:ef8af211-b6c1-44d4-97de-daca46f16346" omeElem = self.dom if not omeElem.get('UUID'): omeElem.set('UUID', 'urn:uuid:'+str(uuid.uuid4())) self.uuidStr = omeElem.get('UUID')
Example #8
Source File: test_xml_etree.py From oss-ftp with MIT License | 6 votes |
def bug_200708_close(): """ Test default builder. >>> parser = ET.XMLParser() # default >>> parser.feed("<element>some text</element>") >>> summarize(parser.close()) 'element' Test custom builder. >>> class EchoTarget: ... def close(self): ... return ET.Element("element") # simulate root >>> parser = ET.XMLParser(EchoTarget()) >>> parser.feed("<element>some text</element>") >>> summarize(parser.close()) 'element' """
Example #9
Source File: test_xml_etree.py From BinderFilter with MIT License | 6 votes |
def bug_200708_close(): """ Test default builder. >>> parser = ET.XMLParser() # default >>> parser.feed("<element>some text</element>") >>> summarize(parser.close()) 'element' Test custom builder. >>> class EchoTarget: ... def close(self): ... return ET.Element("element") # simulate root >>> parser = ET.XMLParser(EchoTarget()) >>> parser.feed("<element>some text</element>") >>> summarize(parser.close()) 'element' """
Example #10
Source File: test_xml_etree.py From oss-ftp with MIT License | 5 votes |
def entity(): """ Test entity handling. 1) good entities >>> e = ET.XML("<document title='舰'>test</document>") >>> serialize(e) '<document title="舰">test</document>' 2) bad entities >>> ET.XML("<document>&entity;</document>") Traceback (most recent call last): ParseError: undefined entity: line 1, column 10 >>> ET.XML(ENTITY_XML) Traceback (most recent call last): ParseError: undefined entity &entity;: line 5, column 10 3) custom entity >>> parser = ET.XMLParser() >>> parser.entity["entity"] = "text" >>> parser.feed(ENTITY_XML) >>> root = parser.close() >>> serialize(root) '<document>text</document>' """
Example #11
Source File: __init__.py From zim-desktop-wiki with GNU General Public License v2.0 | 5 votes |
def new_parsetree_from_xml(xml): # For some reason this does not work with cElementTree.XMLBuilder ... from xml.etree.ElementTree import XMLParser from zim.formats import ParseTree builder = XMLParser() builder.feed(xml) root = builder.close() return ParseTree(root)
Example #12
Source File: test_xml_etree.py From gcblue with BSD 3-Clause "New" or "Revised" License | 5 votes |
def entity(): """ Test entity handling. 1) good entities >>> e = ET.XML("<document title='舰'>test</document>") >>> serialize(e) '<document title="舰">test</document>' 2) bad entities >>> ET.XML("<document>&entity;</document>") Traceback (most recent call last): ParseError: undefined entity: line 1, column 10 >>> ET.XML(ENTITY_XML) Traceback (most recent call last): ParseError: undefined entity &entity;: line 5, column 10 3) custom entity >>> parser = ET.XMLParser() >>> parser.entity["entity"] = "text" >>> parser.feed(ENTITY_XML) >>> root = parser.close() >>> serialize(root) '<document>text</document>' """
Example #13
Source File: __init__.py From zim-desktop-wiki with GNU General Public License v2.0 | 5 votes |
def fromstring(self, string): '''Set the contents of this tree from XML representation.''' parser = ElementTreeModule.XMLParser() parser.feed(string) root = parser.close() self._etree._setroot(root) return self # allow ParseTree().fromstring(..)
Example #14
Source File: parser.py From fbchat-archive-parser with MIT License | 5 votes |
def _get_manifest_data(self): user, thread_references = None, [] ignore_anchors = True saw_anchor = False # Cast to str to ensure not unicode under Python 2, as the parser # doesn't like that. parser = XMLParser(encoding=str('UTF-8')) element_iter = ET.iterparse(self.handle, events=("start", "end"), parser=parser) for pos, element in element_iter: tag, class_attr = _tag_and_class_attr(element) if tag == "h1" and pos == "end": if not self.user: user = element.text.strip() elif tag == "div" and "content" in class_attr and pos == "start": ignore_anchors = False elif tag == "a" and pos == "start": if ignore_anchors: continue saw_anchor = True participants = self.parse_participants(element) thread_path = re.sub(r'^../', '', element.attrib['href']) if using_windows(): thread_path = thread_path.replace('/', '\\') thread_references += [(participants, os.path.join(self.root, thread_path))] if not saw_anchor: # Indicator of a `messages.htm` file that is probably in the legacy format. raise UnsuitableParserError return user, thread_references
Example #15
Source File: parser.py From fbchat-archive-parser with MIT License | 5 votes |
def process_thread(self, participants, thread_path): file_path = os.path.join(self.root, thread_path) try: with io.open(file_path, 'rt', encoding='utf8') as thread_file: parser = XMLParser(encoding=str('UTF-8')) element_iter = ET.iterparse( SafeXMLStream(thread_file), events=("start", "end"), parser=parser) thread = self.parse_thread(participants, element_iter, False) except FileNotFoundError: raise MissingReferenceError(file_path) self.save_thread(thread)
Example #16
Source File: en_fr_to_raw.py From AmusingPythonCodes with MIT License | 5 votes |
def read_and_write_xml_data(filenames, output_path): data = [] for filename in filenames: parser = etree.XMLParser(encoding='utf-8') root = etree.parse(filename, parser=parser).getroot().find('srcset') for doc in root: segs = doc.findall('seg') for seg in segs: data.append(seg.text.strip()) with codecs.open(output_path, 'w', encoding='utf-8') as out: out.write('\n'.join(data))
Example #17
Source File: xform2json.py From pyxform with BSD 2-Clause "Simplified" License | 5 votes |
def __init__(self, root): if isinstance(root, basestring): parser = ETree.XMLParser(encoding="UTF-8") self._root = _try_parse(root, parser) self._dict = XmlDictObject( { self._root.tag: _convert_xml_to_dict_recurse( self._root, XmlDictObject ) } ) elif not isinstance(root, ETree.Element): raise TypeError("Expected ElementTree.Element or file path string")
Example #18
Source File: rest_server_class.py From warriorframework with Apache License 2.0 | 5 votes |
def verify_xml(self, incoming_xml, respond_obj, file=False): """ Verify the incoming_xml data with either a. whole xml file b. tag text pairs :param: incoming_xml: an xml string respond_obj: contains the verification detail from datafile file: indicate if comparing whole file or just pairs :return: True if whole file match/all pairs match False if not match """ if file: status = False for expect_xml_file in respond_obj["request_verify_data"]: expect_xml_file = getAbsPath(expect_xml_file, getDirName(self.datafile)) status, _, _, _ = compare_xml(incoming_xml, expect_xml_file, output_file=False, sorted_json=False, remove_namespaces=True) return status else: incoming_xml = ET.fromstring(incoming_xml, parser=ET.XMLParser(encoding="utf-8")) for element_pair in respond_obj["request_verify"]: xpath = element_pair.split(",")[0][4:] value = element_pair.split(",")[1][6:] incoming_value = getChildElementWithSpecificXpath(incoming_xml, xpath) if incoming_value is None or value != incoming_value.text: return False return True
Example #19
Source File: base.py From syntribos with Apache License 2.0 | 5 votes |
def _xml_to_obj(cls, serialized_str, encoding="iso-8859-2"): parser = ET.XMLParser(encoding=encoding) element = ET.fromstring(serialized_str, parser=parser) return cls._xml_ele_to_obj(cls._remove_xml_namespaces(element))
Example #20
Source File: test_xml_etree.py From BinderFilter with MIT License | 5 votes |
def entity(): """ Test entity handling. 1) good entities >>> e = ET.XML("<document title='舰'>test</document>") >>> serialize(e) '<document title="舰">test</document>' 2) bad entities >>> ET.XML("<document>&entity;</document>") Traceback (most recent call last): ParseError: undefined entity: line 1, column 10 >>> ET.XML(ENTITY_XML) Traceback (most recent call last): ParseError: undefined entity &entity;: line 5, column 10 3) custom entity >>> parser = ET.XMLParser() >>> parser.entity["entity"] = "text" >>> parser.feed(ENTITY_XML) >>> root = parser.close() >>> serialize(root) '<document>text</document>' """
Example #21
Source File: document_processor.py From mma-dexter with Apache License 2.0 | 5 votes |
def fetch_daily_feeds(self, day): """ Fetch the feed for +day+ and returns an ElementTree instance. """ # import xml.etree.ElementTree as ET from xml.etree import ElementTree from htmlentitydefs import name2codepoint if self.FEED_PASSWORD is None: raise ValueError("%s.FEED_PASSWORD must be set." % self.__class__.__name__) # r = requests.get(self.FEED_URL % day.strftime('%d-%m-%Y'), # auth=(self.FEED_USER, self.FEED_PASSWORD), # verify=False, # timeout=60) payload = {'PHP_AUTH_USER': self.FEED_USER, 'PHP_AUTH_PW': self.FEED_PASSWORD} r = requests.get(self.FEED_URL % day.strftime('%d-%m-%Y'), headers=payload, verify=False, timeout=60) r.raise_for_status() parser = ElementTree.XMLParser() parser.parser.UseForeignDTD(True) parser.entity.update((x, unichr(i)) for x, i in name2codepoint.iteritems()) etree = ElementTree return etree.fromstring(r.text.encode('utf-8'), parser=parser)
Example #22
Source File: xmlparser.py From edl with MIT License | 5 votes |
def getlog(self,input): lines=input.split(b"<?xml") data = [] for line in lines: if line==b'': continue line=b"<?xml"+line parser = ET.XMLParser(encoding="utf-8") tree = ET.fromstring(line, parser=parser) e = ET.ElementTree(tree).getroot() for atype in e.findall('log'): if 'value' in atype.attrib: data.append(atype.attrib['value']) return data
Example #23
Source File: xmlparser.py From edl with MIT License | 5 votes |
def getresponse(self,input): lines=input.split(b"<?xml") content = {} for line in lines: if line==b'': continue line=b"<?xml"+line parser = ET.XMLParser(encoding="utf-8") tree = ET.fromstring(line, parser=parser) e = ET.ElementTree(tree).getroot() for atype in e.findall('response'): for field in atype.attrib: content[field]=atype.attrib[field] return content
Example #24
Source File: confluence.py From confluence-publisher with MIT License | 5 votes |
def _init_parser(self): self._original_parser = etree.XMLParser(*self.args, **self.kwargs) self._original_parser.entity.update(self.known_entity)
Example #25
Source File: advancedsettings.py From script.artwork.beef with MIT License | 5 votes |
def read_xml(): if not xbmcvfs.exists(FILENAME): return ET.Element(ROOT_TAG) parser = ET.XMLParser(target=CommentedTreeBuilder()) with closing(xbmcvfs.File(FILENAME)) as as_xml: try: return ET.parse(as_xml, parser).getroot() except ET.ParseError: log("Can't parse advancedsettings.xml", xbmc.LOGWARNING)
Example #26
Source File: xml_util.py From pyxcli with Apache License 2.0 | 5 votes |
def __init__(self): self.tree_builder = _TerminationDetectingTreeBuilder() self.xml_tree_builder = et.XMLParser(target=self.tree_builder)
Example #27
Source File: util.py From sphinxcontrib-needs with MIT License | 5 votes |
def extract_needs_from_html(html): # Replace entities, which elementTree can not handle html = html.replace('©', '') html = html.replace('&', '') if sys.version_info >= (3, 0): source = StringIO(html) parser = ElementTree.XMLParser(encoding="utf-8") else: # Python 2.x source = StringIO(html.encode("utf-8")) parser = ElementTree.XMLParser(encoding="utf-8") # XML knows not nbsp definition, which comes from HTML. # So we need to add it parser.entity["nbsp"] = ' ' etree = ElementTree.ElementTree() document = etree.parse(source, parser=parser) tables = document.findall(".//html:table", NS) # Sphinx <3.0 start html-code with: # <html xmlns="http://www.w3.org/1999/xhtml"> # Sphinx >= 3.0 starts it with: # <html> # So above search will not work for Sphinx >= 3.0 and we try a new one if len(tables) == 0: tables = document.findall(".//html:table", {'html': ''}) return [HtmlNeed(table) for table in tables if 'need' in table.get('class', '')]
Example #28
Source File: util.py From aamo with MIT License | 5 votes |
def load_xml(file_name): # Load an XML file try: parser = ET.XMLParser(encoding="utf-8") return ET.parse(base_dir() + file_name, parser=parser) except IOError as ex: if ex.errno == 2: raise e.FileNotFound else: raise e.LoadFileException(str(ex)+'\nUnable to load XML ' + base_dir() + file_name)
Example #29
Source File: data_process_i2b2.py From MedicalRelationExtraction with MIT License | 5 votes |
def data_process(inDIR, outFile): fileList = file_name(inDIR) print(len(fileList)) lableType = set() outFile = open(outFile, "w") for f in fileList: print(f, end=' ') linkNO = 0 inFile = open(inDIR + f, "r") xmlString = "" for lines in inFile.readlines(): xmlString += lines.replace(" & ", " ").replace("&", " and ") inFile.close() parser = ET.XMLParser(encoding="utf-8") root = ET.fromstring(xmlString, parser=parser) # tree = ET.parse(inDIR + f) # root = tree.getroot() text = root.find("TEXT").text.replace("\n", " ").strip() # print(text) tags = root.find("TAGS") for tlink in tags.findall("TLINK"): id = f[:-4] +"_"+ str(tlink.attrib['id'] ) target = tlink.attrib['fromText'] + " " + tlink.attrib['toText'] label = tlink.attrib['type'].upper() if label == '': continue lableType.add(label) # print(id + "\t"+target + "\t" + label) outFile.write(id + "\t" + target + "\t" + text + "\t"+ label+"\n") linkNO += 1 print("linkNO = " + str(linkNO)) print("*"*80)
Example #30
Source File: DoxygenDB.py From CodeAtlasSublime with Eclipse Public License 1.0 | 5 votes |
def _getXmlDocumentItem(self, fileName): filePath = '%s/%s.xml' % (self._dbFolder, fileName) xmlDoc = self.xmlCache.get(filePath) if xmlDoc: return xmlDoc doc = None # try different encoding and configurations encodingArray = ['utf-8', 'iso-8859-5'] for docEncoding in encodingArray: try: doc = ET.parse(filePath, parser=ET.XMLParser(encoding=docEncoding)) if doc is not None: print('parse %s success. encoding = %s' % (fileName, docEncoding)) break except: print('parse %s failed. encoding = %s'% (fileName, docEncoding)) # traceback.print_exc() continue if doc is None: print('parse %s failed'% (fileName, )) return XmlDocItem(None) xmlDoc = XmlDocItem(doc) self.xmlCache[filePath] = xmlDoc return xmlDoc