Python lxml.etree.XMLParser() Examples
The following are 30
code examples of lxml.etree.XMLParser().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: sim_files.py From simLAB with GNU General Public License v2.0 | 6 votes |
def readXml(self, simType): path = os.path.dirname(__file__) if simType == types.TYPE_USIM: path = os.path.join(path, "sim_files_3g.xml") else: path = os.path.join(path, "sim_files_2g.xml") tree = etree.ElementTree() if not os.path.exists(path): logging.warning("File %s not exists" %path) logging.info("Create xml") if simType == types.TYPE_USIM: root = etree.Element('sim_3G') else: root = etree.Element('sim_2G') else: parser = etree.XMLParser(remove_blank_text=True) root = etree.parse(path, parser).getroot() return path, root
Example #2
Source File: scan.py From report-ng with GNU General Public License v2.0 | 6 votes |
def __init__(self, filename, requests_and_responses=False): self._filename = filename self._requests_and_responses = requests_and_responses json_ext = '.json' yaml_ext = '.yaml' if filename[-len(json_ext):] == json_ext: self._scan = json.loads(open(filename).read().decode('utf-8-sig'), object_pairs_hook=UnsortableOrderedDict) elif filename[-len(yaml_ext):] == yaml_ext: self._scan = yaml_load(open(filename).read(), yaml.SafeLoader, UnsortableOrderedDict) else: # xml #self._xml = etree.parse(filename) etree_parser = etree.XMLParser(huge_tree=True) self._xml = etree.parse(filename, parser=etree_parser) root = self._xml.getroot() if root.tag == 'Sessions': self._webinspect_import() elif root.tag == 'issues': self._burp_import() elif root.tag == 'items': self._burp_items_import() else: raise Exception('Unknown scan format!')
Example #3
Source File: sso.py From vsphere-automation-sdk-python with MIT License | 6 votes |
def _canonicalize(xml_string): ''' Given an xml string, canonicalize the string per U{http://www.w3.org/2001/10/xml-exc-c14n#} @type xml_string: C{str} @param xml_string: The XML string that needs to be canonicalized. @rtype: C{str} @return: Canonicalized string in Unicode. ''' parser = etree.XMLParser(remove_blank_text=True) tree = etree.fromstring(xml_string, parser=parser).getroottree() string = BytesIO() tree.write_c14n(string, exclusive=True, with_comments=False) return string.getvalue().decode(UTF_8)
Example #4
Source File: views.py From MobileSF with GNU General Public License v3.0 | 6 votes |
def findBodyType(request): bd_typ ="none" try: if request["body"]: try: json.loads(request["body"]) bd_typ ="json" except: pass try: config = etree.XMLParser(remove_blank_text=True, resolve_entities=False) #Prevent Entity Expansion Attacks against the Framework etree.fromstring(request["body"],config) bd_typ ="xml" except: pass qs=parse_qs(request["body"]) if qs: bd_typ="form" return bd_typ except: PrintException("[ERROR] Finding Request Body type")
Example #5
Source File: site.py From shareplum with MIT License | 6 votes |
def get_site_templates(self, lcid="1033"): # Build Request soap_request = Soap("GetSiteTemplates") soap_request.add_parameter("LCID", lcid) self.last_request = str(soap_request) # Send Request response = post(self._session, url=self._url("Sites"), headers=self._headers("GetSiteTemplates"), data=str(soap_request).encode("utf-8"), verify=self._verify_ssl, timeout=self.timeout) return response envelope = etree.fromstring(response.text.encode("utf-8"), parser=etree.XMLParser(huge_tree=self.huge_tree, recover=True)) lists = envelope[0][0][1] data = [] for _list in lists: data.append({k: v for (k, v) in _list.items()}) return data
Example #6
Source File: site.py From shareplum with MIT License | 6 votes |
def get_list_templates(self): # Build Request soap_request = Soap("GetListTemplates") soap_request.add_parameter("GetListTemplates") self.last_request = str(soap_request) # Send Request response = post(self._session, url=self._url("Webs"), headers=self._headers("GetListTemplates"), data=str(soap_request).encode("utf-8"), verify=self._verify_ssl, timeout=self.timeout) envelope = etree.fromstring(response.text.encode("utf-8"), parser=etree.XMLParser(huge_tree=self.huge_tree, recover=True)) lists = envelope[0][0][0][0] data = [] for _list in lists: data.append({k: v for (k, v) in _list.items()}) return data
Example #7
Source File: site.py From shareplum with MIT License | 6 votes |
def get_site(self): # Build Request soap_request = Soap("GetSite") soap_request.add_parameter("SiteUrl", self.site_url) self.last_request = str(soap_request) # Send Request response = post(self._session, url=self._url("Sites"), headers=self._headers("GetSite"), data=str(soap_request).encode("utf-8"), verify=self._verify_ssl, timeout=self.timeout) envelope = etree.fromstring(response.text.encode("utf-8"), parser=etree.XMLParser(huge_tree=self.huge_tree, recover=True)) data = envelope[0][0][0] # TODO: Not sure what to do with this, so just return the text return data.text
Example #8
Source File: romeo.py From dissemin with GNU Affero General Public License v3.0 | 6 votes |
def perform_romeo_query(self, search_terms): search_terms = search_terms.copy() if self.api_key: search_terms['ak'] = self.api_key # Perform the query try: req = requests.get(self.base_url, params=search_terms, timeout=20) except requests.exceptions.RequestException as e: raise MetadataSourceException('Error while querying RoMEO.\n' + 'URL was: '+self.base_url+'\n' + 'Parameters were: '+str(search_terms)+'\n' + 'Error is: '+str(e)) # Parse it try: parser = ET.XMLParser(encoding='ISO-8859-1') root = ET.parse(BytesIO(req.content), parser) except ET.ParseError as e: raise MetadataSourceException('RoMEO returned an invalid XML response.\n' + 'URL was: '+self.base_url+'\n' + 'Parameters were: '+str(search_terms)+'\n' + 'Error is: '+str(e)) return root
Example #9
Source File: youtube.py From xblock-video with GNU General Public License v3.0 | 6 votes |
def download_default_transcript(self, url=None, language_code=None): # pylint: disable=unused-argument """ Download default transcript from Youtube API and format it to WebVTT-like unicode. Reference to `get_transcripts_from_youtube()`: https://github.com/edx/edx-platform/blob/ecc3473d36b3c7a360e260f8962e21cb01eb1c39/common/lib/xmodule/xmodule/video_module/transcripts_utils.py#L122 """ if url is None: raise VideoXBlockException(_('`url` parameter is required.')) utf8_parser = etree.XMLParser(encoding='utf-8') data = requests.get(url) xmltree = etree.fromstring(data.content, parser=utf8_parser) sub = [ self.format_transcript_element(element, i) for i, element in enumerate(xmltree, 1) ] sub = "".join(sub) sub = u"WEBVTT\n\n" + unicode(sub) if "WEBVTT" not in sub else unicode(sub) return sub
Example #10
Source File: generic.py From n6 with GNU Affero General Public License v3.0 | 6 votes |
def iter_entry(self, data): """ Get an iterator over rows extracted from the raw data body. Args: `data` (dict): As returned by prepare_data() (especially, its 'raw' item contains the raw data body). Returns: An iterator over xml tree: """ raw_entry = StringIO(data['raw']).getvalue() parser = etree.XMLParser(ns_clean=True, remove_blank_text=True) tree = etree.fromstring(str(raw_entry), parser) return tree
Example #11
Source File: protocol.py From dissemin with GNU Affero General Public License v3.0 | 6 votes |
def get_new_status(self, identifier): """ Unconditionnally fetch the new status of a deposit, by ID (e.g. hal-0001234) """ deposit_url = '%s%s' % (self.api_url, identifier) req = requests.get(deposit_url, auth=requests.auth.HTTPBasicAuth(self.username,self.password)) if req.status_code == 400: return 'deleted' req.raise_for_status() parser = etree.XMLParser(encoding='utf-8') receipt = etree.parse(BytesIO(req.text.encode('utf-8')), parser) receipt = receipt.getroot() hal_status = receipt.find('status').text if hal_status == 'accept' or hal_status == 'replace': return 'published' elif hal_status == 'verify' or hal_status == 'update': return 'pending' elif hal_status == 'delete': return 'refused'
Example #12
Source File: files.py From janeway with GNU Affero General Public License v3.0 | 6 votes |
def transform_with_xsl(xml_path, xsl_path, recover=False): try: xml_dom = etree.parse(xml_path) except etree.XMLSyntaxError as e: if recover: logger.error(e) parser = etree.XMLParser(recover=True) xml_dom = etree.parse(xml_path, parser=parser) else: raise xsl_transform = etree.XSLT(etree.parse(xsl_path)) try: transformed_dom = xsl_transform(xml_dom) except Exception as err: logger.error(err) for xsl_error in xsl_transform.error_log: logger.error(xsl_error) if not recover: raise return transformed_dom
Example #13
Source File: intellij_set_default_inspection_profile.py From ansible-role-intellij with MIT License | 5 votes |
def pretty_print(elem): text = etree.tostring(elem, encoding='iso-8859-1') parser = etree.XMLParser(remove_blank_text=True) xml = etree.fromstring(text, parser) return etree.tostring( xml, encoding='iso-8859-1', pretty_print=True, xml_declaration=False)
Example #14
Source File: pokerstars.py From poker with MIT License | 5 votes |
def __init__(self, notes: str): self.raw = notes parser = etree.XMLParser(recover=True, resolve_entities=False) self.root = etree.XML(notes.encode(), parser)
Example #15
Source File: intellij_configure_jdk.py From ansible-role-intellij with MIT License | 5 votes |
def pretty_print(elem): text = etree.tostring(elem, encoding='iso-8859-1') parser = etree.XMLParser(remove_blank_text=True) xml = etree.fromstring(text, parser) return etree.tostring( xml, encoding='iso-8859-1', pretty_print=True, xml_declaration=False)
Example #16
Source File: intellij_set_default_jdk.py From ansible-role-intellij with MIT License | 5 votes |
def pretty_print(elem): text = etree.tostring(elem, encoding='iso-8859-1') parser = etree.XMLParser(remove_blank_text=True) xml = etree.fromstring(text, parser) return etree.tostring( xml, encoding='iso-8859-1', pretty_print=True, xml_declaration=False)
Example #17
Source File: selector.py From ChemDataExtractor with MIT License | 5 votes |
def from_xml_text(cls, text, base_url=None, namespaces=None, encoding=None): return cls.from_text(text, base_url=base_url, parser=XMLParser, translator=CssXmlTranslator, fmt='xml', namespaces=namespaces, encoding=encoding)
Example #18
Source File: intellij_set_default_maven.py From ansible-role-intellij with MIT License | 5 votes |
def pretty_print(elem): text = etree.tostring(elem, encoding='iso-8859-1') parser = etree.XMLParser(remove_blank_text=True) xml = etree.fromstring(text, parser) return etree.tostring( xml, encoding='iso-8859-1', pretty_print=True, xml_declaration=False)
Example #19
Source File: main.py From sysmon-config-bypass-finder with GNU General Public License v3.0 | 5 votes |
def _read_config_to_json(sysmon_config): parser = etree.XMLParser(remove_comments=True) tree = objectify.parse(sysmon_config, parser=parser) root = tree.getroot() event_filtering = root.find('EventFiltering') configuration = [] for rule in event_filtering.getchildren(): rule_type = rule.tag on_match = rule.get('onmatch') single_rule = { 'rule_type': rule_type, 'on_match': on_match, 'conditions': [] } for condition in rule.iterchildren(): cond_operator = condition.get('condition') cond_content = condition.text cond_type = condition.tag single_rule['conditions'].append({ 'operator': cond_operator, 'content': cond_content, 'condition_type': cond_type }) configuration.append(single_rule) return configuration
Example #20
Source File: parser.py From avacity-2.0 with MIT License | 5 votes |
def __init__(self): self.parser = etree.XMLParser(remove_comments=True) self.apprnc_map = ["sc", "et", "brt", "at", "ht", "bt", "sh", "rg", "ss", "pt", "fat", "fft"]
Example #21
Source File: xml.py From ansible-xml with GNU General Public License v3.0 | 5 votes |
def child_to_element(module, child, in_type): if in_type == 'xml': infile = BytesIO(to_bytes(child, errors='surrogate_or_strict')) try: parser = etree.XMLParser() node = etree.parse(infile, parser) return node.getroot() except etree.XMLSyntaxError as e: module.fail_json(msg="Error while parsing child element: %s" % e) elif in_type == 'yaml': if isinstance(child, string_types): return etree.Element(child) elif isinstance(child, MutableMapping): if len(child) > 1: module.fail_json(msg="Can only create children from hashes with one key") (key, value) = next(iteritems(child)) if isinstance(value, MutableMapping): children = value.pop('_', None) node = etree.Element(key, value) if children is not None: if not isinstance(children, list): module.fail_json(msg="Invalid children type: %s, must be list." % type(children)) subnodes = children_to_nodes(module, children) node.extend(subnodes) else: node = etree.Element(key) node.text = value return node else: module.fail_json(msg="Invalid child type: %s. Children must be either strings or hashes." % type(child)) else: module.fail_json(msg="Invalid child input type: %s. Type must be either xml or yaml." % in_type)
Example #22
Source File: _lxml.py From nzb-subliminal with GNU General Public License v3.0 | 5 votes |
def default_parser(self, encoding): # This can either return a parser object or a class, which # will be instantiated with default arguments. if self._default_parser is not None: return self._default_parser return etree.XMLParser( target=self, strip_cdata=False, recover=True, encoding=encoding)
Example #23
Source File: _lxml.py From B.E.N.J.I. with MIT License | 5 votes |
def default_parser(self, encoding): # This can either return a parser object or a class, which # will be instantiated with default arguments. if self._default_parser is not None: return self._default_parser return etree.XMLParser( target=self, strip_cdata=False, recover=True, encoding=encoding)
Example #24
Source File: conftest.py From pycon with MIT License | 5 votes |
def sample_invoice_xml(): here = os.path.dirname(__file__) parser = etree.XMLParser(remove_blank_text=True) root = etree.parse( os.path.join(here, "../data/IT01234567890_FPA01.xml"), parser=parser ) for elem in root.iter("*"): if elem.text is not None: elem.text = elem.text.strip() return root
Example #25
Source File: client.py From nsxramlclient with MIT License | 5 votes |
def get_xml_example_by_displayname(self, display_name, method, remove_content=None, remove_comments=None): if not remove_content: remove_content = True if not remove_comments: remove_comments = True method_options = {'read': 'get', 'create': 'post', 'delete': 'delete', 'update': 'put'} matched_resource = self.find_resource_recursively(display_name) assert matched_resource, 'The searched displayName could not be found in RAML File' assert method_options[method] in matched_resource[1].methods, 'the resource does not support ' \ 'the {} method'.format(method) assert matched_resource[1].methods[method_options[method]].body, 'the resource does not have a ' \ 'body schema in the RAML File' matched_resource_body = matched_resource[1].methods[method_options[method]].body example = matched_resource_body['application/xml'].example try: parser = et.XMLParser(remove_comments=remove_comments) example_et = et.fromstring(example, parser=parser) except et.XMLSyntaxError as e: raise Exception('The parsing of the body example XML failed, please check the format in the RAML file,' 'the execption is:\n{}'.format(e)) if remove_content: for parent, child in self._iterparent(example_et): child.text = None child.tail = None return example_et
Example #26
Source File: conftest.py From dissemin with GNU Affero General Public License v3.0 | 5 votes |
def dissemin_xml_1_0(): ''' Loads a dissemin xml document ready to be manipulated and be validated ''' directory = os.path.dirname(os.path.abspath(__file__)) parser = etree.XMLParser(remove_blank_text=True) return etree.parse(os.path.join(directory, 'schema', 'test_data', 'dissemin_v1.0.xml'), parser).getroot()
Example #27
Source File: test_romeo.py From dissemin with GNU Affero General Public License v3.0 | 5 votes |
def perform_romeo_query(self, search_terms): filename = '_'.join(sorted('{}-{}'.format(key, val.replace(' ','_')) for key, val in search_terms.items())) + '.xml' try: with open(os.path.join(self.datadir, filename), 'rb') as response_file: parser = etree.XMLParser(encoding='ISO-8859-1') return etree.parse(response_file, parser) except IOError: xml = super(RomeoAPIStub, self).perform_romeo_query(search_terms) with open(os.path.join(self.datadir, filename), 'wb') as response_file: xml.write(response_file) return xml # SHERPA/RoMEO interface
Example #28
Source File: romeo.py From dissemin with GNU Affero General Public License v3.0 | 5 votes |
def get_romeo_latest_update_date(self): """ Fetches the dates of the latest updates on the RoMEO service. This returns a dict: the dates can be accessed via the 'publishers' and 'journals' keys. """ r = requests.get('http://www.sherpa.ac.uk/downloads/download-dates.php', {'ak':self.api_key, 'format':'xml'}) parser = ET.XMLParser(encoding='ISO-8859-1') root = ET.parse(BytesIO(r.content), parser) return { 'publishers': self._get_romeo_date(root, './publisherspolicies/latestupdate'), 'journals': self._get_romeo_date(root, './journals/latestupdate') }
Example #29
Source File: cctop.py From ssbio with MIT License | 5 votes |
def parse_cctop_full(infile): """Parse a CCTOP XML results file and return a list of the consensus TM domains in the format:: [(1, inside_outside_or_tm), (2, inside_outside_or_tm), ...] Where the first value of a tuple is the sequence residue number, and the second is the predicted location with the values 'I' (inside), 'O' (outside), or 'M' (membrane). Args: infile (str): Path to CCTOP XML file Returns: list: List of tuples in the format described above """ parser = etree.XMLParser(ns_clean=True) with open(infile, 'r') as f: tree = etree.fromstring(f.read(), parser) all_info = [] if tree.find('Topology') is not None: for r in tree.find('Topology').findall('Region'): region_start = int(r.attrib['from']) region_end = int(r.attrib['to']) region = r.attrib['loc'] for i in range(region_start, region_end + 1): all_info.append((i, region)) return all_info
Example #30
Source File: XnatUtils.py From dax with MIT License | 5 votes |
def get_resource_lastdate_modified(intf, resource_obj): """ Get the last modified data for a resource on XNAT. (NOT WORKING: bug on XNAT side for version<1.6.5) :param intf: pyxnat.Interface object :param resource: resource pyxnat Eobject :return: date of last modified data with the format %Y%m%d%H%M%S """ # xpaths for times in resource xml created_dcm_xpath = "/cat:DCMCatalog/cat:entries/cat:entry/@createdTime" modified_dcm_xpath = "/cat:DCMCatalog/cat:entries/cat:entry/@modifiedTime" created_xpath = "/cat:Catalog/cat:entries/cat:entry/@createdTime" modified_xpath = "/cat:Catalog/cat:entries/cat:entry/@modifiedTime" # Get the resource object and its uri res_xml_uri = '%s?format=xml' % (resource_obj._uri) # Get the XML for resource xmlstr = intf._exec(res_xml_uri, 'GET') # Parse out the times root = etree.fromstring(xmlstr, parser=etree.XMLParser(huge_tree=True)) create_times = root.xpath(created_xpath, namespaces=root.nsmap) if not create_times: create_times = root.xpath(created_dcm_xpath, namespaces=root.nsmap) mod_times = root.xpath(modified_xpath, namespaces=root.nsmap) if not mod_times: mod_times = root.xpath(modified_dcm_xpath, namespaces=root.nsmap) # Find the most recent time all_times = create_times + mod_times if all_times: max_time = max(all_times) date = max_time.split('.')[0] res_date = (date.split('T')[0].replace('-', '') + date.split('T')[1].replace(':', '')) else: res_date = ('{:%Y%m%d%H%M%S}'.format(datetime.now())) return res_date