Python xml.etree.cElementTree.iterparse() Examples
The following are 30
code examples of xml.etree.cElementTree.iterparse().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xml.etree.cElementTree
, or try the search function
.
Example #1
Source File: path-flowmon-parse-result.py From ns3-load-balance with GNU General Public License v2.0 | 6 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: print "FlowID: %i" % flow.flowId, print flow.paths
Example #2
Source File: writexl.py From pylightxl with MIT License | 6 votes |
def xml_namespace(file): """ Takes an xml file and returns the root namespace as a dict :param str file: xml file path :return dict: dictionary of root namespace """ events = "start", "start-ns", "end-ns" ns_map = [] for event, elem in ET.iterparse(file, events): if event == "start-ns": elem = ('default', elem[1]) if elem[0] == '' else elem ns_map.append(elem) # elif event == "end-ns": # ns_map.pop() # return dict(ns_map) # elif event == "start": # return dict(ns_map) return dict(ns_map)
Example #3
Source File: cloudstorage_api.py From luci-py with Apache License 2.0 | 6 votes |
def _find_elements(self, result, elements): """Find interesting elements from XML. This function tries to only look for specified elements without parsing the entire XML. The specified elements is better located near the beginning. Args: result: response XML. elements: a set of interesting element tags. Returns: A dict from element tag to element value. """ element_mapping = {} result = StringIO.StringIO(result) for _, e in ET.iterparse(result, events=('end',)): if not elements: break if e.tag in elements: element_mapping[e.tag] = e.text elements.remove(e.tag) return element_mapping
Example #4
Source File: cloudstorage_api.py From luci-py with Apache License 2.0 | 6 votes |
def _find_elements(self, result, elements): """Find interesting elements from XML. This function tries to only look for specified elements without parsing the entire XML. The specified elements is better located near the beginning. Args: result: response XML. elements: a set of interesting element tags. Returns: A dict from element tag to element value. """ element_mapping = {} result = StringIO.StringIO(result) for _, e in ET.iterparse(result, events=('end',)): if not elements: break if e.tag in elements: element_mapping[e.tag] = e.text elements.remove(e.tag) return element_mapping
Example #5
Source File: cloudstorage_api.py From MyLife with MIT License | 6 votes |
def _find_elements(self, result, elements): """Find interesting elements from XML. This function tries to only look for specified elements without parsing the entire XML. The specified elements is better located near the beginning. Args: result: response XML. elements: a set of interesting element tags. Returns: A dict from element tag to element value. """ element_mapping = {} result = StringIO.StringIO(result) for _, e in ET.iterparse(result, events=('end',)): if not elements: break if e.tag in elements: element_mapping[e.tag] = e.text elements.remove(e.tag) return element_mapping
Example #6
Source File: Bootstrapper.py From discograph with MIT License | 6 votes |
def iterparse(source, tag): context = ElementTree.iterparse( source, events=('start', 'end',), ) context = iter(context) _, root = next(context) depth = 0 for event, element in context: if element.tag == tag: if event == 'start': depth += 1 else: depth -= 1 if depth == 0: yield element root.clear()
Example #7
Source File: cloudstorage_api.py From billing-export-python with Apache License 2.0 | 6 votes |
def _find_elements(self, result, elements): """Find interesting elements from XML. This function tries to only look for specified elements without parsing the entire XML. The specified elements is better located near the beginning. Args: result: response XML. elements: a set of interesting element tags. Returns: A dict from element tag to element value. """ element_mapping = {} result = StringIO.StringIO(result) for _, e in ET.iterparse(result, events=('end',)): if not elements: break if e.tag in elements: element_mapping[e.tag] = e.text elements.remove(e.tag) return element_mapping
Example #8
Source File: lhe.py From madminer with MIT License | 5 votes |
def _untar_and_parse_lhe_file(filename, tags=None): # Untar event file new_filename, extension = os.path.splitext(filename) if extension == ".gz": if not os.path.exists(new_filename): call_command("gunzip -c {} > {}".format(filename, new_filename)) filename = new_filename for event, elem in ET.iterparse(filename): if tags and elem.tag not in tags: continue else: yield elem elem.clear()
Example #9
Source File: flowmon-parse-results.py From ns3-load-balance with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
Example #10
Source File: flowmon-parse-results.py From 802.11ah-ns3 with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
Example #11
Source File: flowmon-parse-results.py From ns3-rdma with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
Example #12
Source File: flowmon-parse-results.py From ns3-ecn-sharp with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
Example #13
Source File: proxy_Jforests.py From rankeval with Mozilla Public License 2.0 | 5 votes |
def _count_nodes(file_path): """ Count the total number of nodes (both split and leaf nodes) in the model identified by file_path. Parameters ---------- file_path : str The path to the filename where the model has been saved Returns ------- tuple(n_trees, n_nodes) : tuple(int, int) The total number of trees and nodes (both split and leaf nodes) in the model identified by file_path. """ # get an iterable # NOTE: it seems like there is a bug inside lxmx since selecting only # terminal tags with events=("end",) some tags are skipped... context = etree.iterparse(file_path, events=("start", "end")) # get the root element _, root = next(context) n_nodes = 0 n_trees = 0 for event, elem in context: if event != "end": continue if elem.tag == 'Tree': n_trees += 1 elif elem.tag == 'SplitFeatures' or elem.tag == 'LeafOutputs': n_nodes += len(elem.text.split(" ")) elem.clear() # discard the element root.clear() # remove root reference to the child return n_trees, n_nodes
Example #14
Source File: books_iterate_xml.py From Learning-Python-Networking-Second-Edition with MIT License | 5 votes |
def books(file): for event, elem in iterparse(file): if event == 'start' and elem.tag == 'root': books = elem if event == 'end' and elem.tag == 'book': print('{0}, {1}, {2}, {3}, {4}'. format(elem.findtext('title'), elem.findtext('publisher'), elem.findtext('numberOfChapters'), elem.findtext('pageCount'),elem.findtext('author'))) if event == 'end' and elem.tag == 'chapter': print('{0}, {1}, {2}'. format(elem.findtext('chapterNumber'), elem.findtext('chapterTitle'), elem.findtext('pageCount')))
Example #15
Source File: flowmon-parse-results.py From CRE-NS3 with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
Example #16
Source File: xml_.py From ryu with Apache License 2.0 | 5 votes |
def parse_root(raw): "Efficiently parses the root element of a *raw* XML document, returning a tuple of its qualified name and attribute dictionary." fp = StringIO(raw) for event, element in ET.iterparse(fp, events=('start',)): return (element.tag, element.attrib)
Example #17
Source File: Bootstrapper.py From discograph with MIT License | 5 votes |
def get_iterator(tag): file_path = Bootstrapper.get_xml_path(tag) file_pointer = gzip.GzipFile(file_path, 'r') iterator = Bootstrapper.iterparse(file_pointer, tag) iterator = Bootstrapper.clean_elements(iterator) return iterator
Example #18
Source File: element_iterator.py From python-mediawiki-utilities with MIT License | 5 votes |
def from_file(cls, f): return EventPointer(etree.iterparse(f, events=("start", "end")))
Example #19
Source File: wmt.py From datasets with Apache License 2.0 | 5 votes |
def _parse_tmx(path): """Generates examples from TMX file.""" def _get_tuv_lang(tuv): for k, v in tuv.items(): if k.endswith("}lang"): return v raise AssertionError("Language not found in `tuv` attributes.") def _get_tuv_seg(tuv): segs = tuv.findall("seg") assert len(segs) == 1, "Invalid number of segments: %d" % len(segs) return segs[0].text with tf.io.gfile.GFile(path, "rb") as f: if six.PY3: # Workaround due to: https://github.com/tensorflow/tensorflow/issues/33563 utf_f = codecs.getreader("utf-8")(f) else: utf_f = f for line_id, (_, elem) in enumerate(ElementTree.iterparse(utf_f)): # pytype: disable=wrong-arg-types if elem.tag == "tu": yield line_id, { _get_tuv_lang(tuv): _get_tuv_seg(tuv) for tuv in elem.iterfind("tuv") } elem.clear()
Example #20
Source File: load_dump.py From es-django-example with Apache License 2.0 | 5 votes |
def _parse_file(self, xml_file): with open(join(self.dir, xml_file)) as input: root = cElementTree.iterparse(input) for event, e in root: if event != 'end' or e.tag != 'row': continue yield dict( (k, int(v) if v.isdigit() else v) for (k, v) in e.items() )
Example #21
Source File: tunein.py From AlexaPi with MIT License | 5 votes |
def parse_new_asx(data): # Copied from mopidy.audio.playlists try: for _, element in elementtree.iterparse(data): element.tag = element.tag.lower() # normalize for ref in element.findall('entry/ref[@href]'): yield fix_asf_uri(ref.get('href', '').strip()) for entry in element.findall('entry[@href]'): yield fix_asf_uri(entry.get('href', '').strip()) except elementtree.ParseError: return
Example #22
Source File: proxy_QuickRank.py From rankeval with Mozilla Public License 2.0 | 5 votes |
def _count_nodes(file_path): """ Count the total number of nodes (both split and leaf nodes) in the model identified by file_path. Parameters ---------- file_path : str The path to the filename where the model has been saved Returns ------- tuple(n_trees, n_nodes) : tuple(int, int) The total number of trees and nodes (both split and leaf nodes) in the model identified by file_path. """ # get an iterable context = etree.iterparse(file_path, events=("end",)) # get the root element _, root = next(context) n_nodes = 0 n_trees = 0 for _, elem in context: if elem.tag == 'tree': n_trees += 1 elif elem.tag == 'feature' or elem.tag == 'output': n_nodes += 1 elem.clear() # discard the element root.clear() # remove root reference to the child return n_trees, n_nodes
Example #23
Source File: __init__.py From uniconvertor with GNU Affero General Public License v3.0 | 5 votes |
def check_svg(path): tag = None fileptr = get_fileptr(path) try: for event, el in cElementTree.iterparse(fileptr, ('start',)): tag = el.tag break except cElementTree.ParseError: pass finally: fileptr.close() return tag == '{http://www.w3.org/2000/svg}svg' or tag == 'svg'
Example #24
Source File: process_dataset.py From Multilingual-Model-Transfer with MIT License | 5 votes |
def parse(itemfile): for event, elem in iterparse(itemfile): if elem.tag == "item": yield processItem(elem) elem.clear()
Example #25
Source File: ShimCacheParser_ACP.py From appcompatprocessor with Apache License 2.0 | 5 votes |
def read_mir(xml_file, quiet=False): out_list = [] tmp_list = [] error = "" # Open the MIR output file. try: for (_, reg_item) in et.iterparse(xml_file, events=('end',)): if reg_item.tag != 'RegistryItem': continue path_name = reg_item.find("Path").text if not path_name: print "[-] Error XML missing Path" print et.tostring(reg_item) reg_item.clear() continue path_name = path_name.lower() # Check to see that we have the right registry value. if 'control\\session manager\\appcompatcache\\appcompatcache' in path_name \ or 'control\\session manager\\appcompatibility\\appcompatcache' in path_name: # return the base64 decoded value data. bin_data = binascii.a2b_base64(reg_item.find('Value').text) tmp_list = read_cache(bin_data, quiet) if tmp_list: for row in tmp_list: if g_verbose: row.append(path_name) if row not in out_list: out_list.append(row) reg_item.clear() except (AttributeError, TypeError, IOError), err: error = "[-] Error reading MIR XML: %s" % str(err) print error return (error, None)
Example #26
Source File: parser.py From Yuki-Chan-The-Auto-Pentest with MIT License | 5 votes |
def extract_hostnames(file): host_names = [] hostname_pattern = re.compile("(^[^.]*)") file_type = detect_type(file) if file_type == "xml": for event, elem in cElementTree.iterparse(file): # Check if it is a record if elem.tag == "record": # Check that it is a RR Type that has an IP Address if "address" in elem.attrib: # Process A, AAAA and PTR Records if re.search(r'PTR|^[A]$|AAAA', elem.attrib['type']): host_names.append(re.search(hostname_pattern, elem.attrib['name']).group(1)) # Process NS Records elif re.search(r'NS', elem.attrib['type']): host_names.append(re.search(hostname_pattern, elem.attrib['target']).group(1)) # Process SOA Records elif re.search(r'SOA', elem.attrib['type']): host_names.append(re.search(hostname_pattern, elem.attrib['mname']).group(1)) # Process MX Records elif re.search(r'MX', elem.attrib['type']): host_names.append(re.search(hostname_pattern, elem.attrib['exchange']).group(1)) # Process SRV Records elif re.search(r'SRV', elem.attrib['type']): host_names.append(re.search(hostname_pattern, elem.attrib['target']).group(1)) elif file_type == "csv": reader = csv.reader(open(file, 'r'), delimiter=',') reader.next() for row in reader: host_names.append(re.search(hostname_pattern, row[1]).group(1)) host_names = list(set(host_names)) # Return list with no empty values return filter(None, host_names)
Example #27
Source File: flowmon-parse-results.py From ntu-dsi-dcn with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
Example #28
Source File: utils.py From dvhb-hybrid with MIT License | 5 votes |
def validate_svg_file(f): tag = None f.seek(0) try: for event, el in xml_et.iterparse(f, ('start',)): tag = el.tag break except xml_et.ParseError: pass if tag != '{http://www.w3.org/2000/svg}svg': raise ValidationError('Uploaded file is not an image or SVG file') f.seek(0) return f
Example #29
Source File: flowmon-parse-results.py From IEEE-802.11ah-ns-3 with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
Example #30
Source File: flowmon-parse-results.py From Tocino with GNU General Public License v2.0 | 5 votes |
def main(argv): file_obj = open(argv[1]) print "Reading XML file ", sys.stdout.flush() level = 0 sim_list = [] for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")): if event == "start": level += 1 if event == "end": level -= 1 if level == 0 and elem.tag == 'FlowMonitor': sim = Simulation(elem) sim_list.append(sim) elem.clear() # won't need this any more sys.stdout.write(".") sys.stdout.flush() print " done." for sim in sim_list: for flow in sim.flows: t = flow.fiveTuple proto = {6: 'TCP', 17: 'UDP'} [t.protocol] print "FlowID: %i (%s %s/%s --> %s/%i)" % \ (flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort) print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,) print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,) print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,) print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)