Python lxml.etree.fromstring() Examples
The following are 30
code examples of lxml.etree.fromstring().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: bugowner.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def add_bugowner(self, package, owner): url = self.makeurl(['source', self.project, package, '_meta']) root = ET.fromstring(self.cached_GET(url)) idname = 'userid' if owner.kind == 'person' else 'groupid' # XXX: can't use 'and' here to filter for bugowner too exists = root.findall('./{}[@{}="{}"]'.format(owner.kind, idname, owner.name)) for node in exists: if node.get('role') == 'bugowner': logger.debug("%s/%s already has %s %s", self.project, package, owner.kind, owner.name) return node = ET.SubElement(root, owner.kind) node.set(idname, owner.name) node.set('role', 'bugowner') data = ET.tostring(root) logger.debug(data) self.http_PUT(url, data=data)
Example #2
Source File: verification.py From Penny-Dreadful-Tools with GNU General Public License v3.0 | 6 votes |
def main() -> None: manifest = requests.get('http://mtgoclientdepot.onlinegaming.wizards.com/MTGO.application') tree = etree.fromstring(manifest.content) identity = tree.find('{urn:schemas-microsoft-com:asm.v1}assemblyIdentity') version = identity.attrib['version'] print('Current MTGO Version is {0}'.format(version)) data = {'version': version} with open('mtgo_version.json', mode='w') as f: json.dump(data, f) project = repo.get_verification_project() current = [c for c in project.get_columns() if c.name == version] if not current: print(f'Creating column for {version}') project.create_column(version)
Example #3
Source File: workarounds.py From pulseaudio-dlna with GNU General Public License v3.0 | 6 votes |
def _parse_xml(self, xml): # Parse MediaRenderer description XML xml_root = etree.fromstring(xml) namespaces = xml_root.nsmap namespaces.pop(None, None) # Determine AVRC URL url_base = xml_root.find(self.MR_YAMAHA_URLBASE_PATH, namespaces) control_url = xml_root.find(self.MR_YAMAHA_CONTROLURL_PATH, namespaces) if ((url_base is None) or (control_url is None)): return False ip, port = urlparse.urlparse(url_base.text).netloc.split(':') if ((not ip) or (not port)): return False self.ip = ip self.port = port self.control_url = control_url.text return True
Example #4
Source File: repochecks.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def mirrorRepomd(cachedir, url): # Use repomd.xml to get the location of primary.xml.gz repoindex = ETL.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content) primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)", namespaces={'repo': 'http://linux.duke.edu/metadata/repo'}) if not primarypath.endswith(".xml.gz"): raise Exception('unsupported primary format') primarydest = os.path.join(cachedir, os.path.basename(primarypath)) if not os.path.exists(primarydest): # Delete the old files first for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"): os.unlink(oldfile) with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp: primarytemp.write(requests.get(url + '/' + primarypath).content) os.link(primarytemp.name, primarydest) return primarydest
Example #5
Source File: belvaParseXML.py From Basic-Expression-Lexicon-Variation-Algorithms-BELVA with GNU General Public License v3.0 | 6 votes |
def parseHTMLxpathSearch(http_source, xpathString): #--------------------------------------------------------------------------------- return_values = [] http_source= str(http_source).replace('\x00','') try: html = lxml.html.fromstring(http_source) for data in html.xpath(xpathString): return_values.append(etree.tostring(data.content)) data.clear() except: pass return return_values #--------------------------------------------------------------------------------- # parse HTML and return value asked
Example #6
Source File: stagingapi.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def create_package_container(self, project, package, meta=None, disable_build=False): """ Creates a package container without any fields in project/package :param project: project to create it :param package: package name :param meta: package metadata :param disable_build: should the package be created with build flag disabled """ if not meta: meta = '<package name="{}"><title/><description/></package>' meta = meta.format(package) if disable_build: root = ET.fromstring(meta) elm = ET.SubElement(root, 'build') ET.SubElement(elm, 'disable') meta = ET.tostring(root) url = self.makeurl(['source', project, package, '_meta']) http_PUT(url, data=meta)
Example #7
Source File: tools.py From ec2-api with Apache License 2.0 | 6 votes |
def parse_xml(xml_string): xml_string = _xml_scheme.sub('', xml_string.decode("utf-8")) xml = etree.fromstring(xml_string) def convert_node(node): children = list(node) if len(children): if children[0].tag == 'item': val = list(convert_node(child)[1] for child in children) else: val = dict(convert_node(child) for child in children) elif node.tag.endswith('Set'): val = [] else: # TODO(ft): do not use private function val = (ec2utils._try_convert(node.text) if node.text else node.text) return node.tag, val return dict([convert_node(xml)])
Example #8
Source File: Packets.py From Timeline with GNU General Public License v3.0 | 6 votes |
def tryParseXML(self, xml_data): try: if not self.penguin.ReceivePacketEnabled: return True XMLdata = parseXML(str(xml_data)) t = XMLdata.get('t') if t not in AVAILABLE_XML_PACKET_TYPES: return None body = XMLdata.xpath('//body') for i in range(len(body)): b = body[i] action = b.get("action") # Just to make sure `action` exists! return [t, body] except: return None
Example #9
Source File: repochecks.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def mirrorRepomd(cachedir, url): # Use repomd.xml to get the location of primary.xml.gz repoindex = ETL.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content) primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)", namespaces={'repo': 'http://linux.duke.edu/metadata/repo'}) if not primarypath.endswith(".xml.gz"): raise Exception('unsupported primary format') primarydest = os.path.join(cachedir, os.path.basename(primarypath)) if not os.path.exists(primarydest): # Delete the old files first for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"): os.unlink(oldfile) with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp: primarytemp.write(requests.get(url + '/' + primarypath).content) os.link(primarytemp.name, primarydest) return primarydest
Example #10
Source File: test_examples.py From dataflows with MIT License | 6 votes |
def country_population(): from lxml import etree from urllib.request import urlopen page = urlopen('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population').read() parser = etree.HTMLParser() tree = etree.fromstring(page, parser=parser) tables = tree.findall('.//table') for table in tables: if 'wikitable' in table.attrib.get('class', ''): rows = table.findall('.//tr') for row in rows: cells = row.findall('td') if len(cells) > 3: name = cells[1].find('.//a').attrib.get('title') population = cells[2].text yield(dict( name=name, population=population ))
Example #11
Source File: Feed.py From python-in-practice with GNU General Public License v3.0 | 6 votes |
def _parse(data, limit): tree = etree.fromstring(data) output = [] # RSS prefix = "" tag = "*/item" if tree.find(tag) is None: prefix = "{http://purl.org/rss/1.0/}" tag = prefix + "item" for element in tree.findall(tag): title = element.find(prefix + "title") link = element.find(prefix + "link") if link is None: link = element.find("guid") _maybe_append(output, title, link) if limit and len(output) == limit: break if output: return ["<ul>"] + output + ["</ul>"]
Example #12
Source File: stagingapi.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def create_package_container(self, project, package, meta=None, disable_build=False): """ Creates a package container without any fields in project/package :param project: project to create it :param package: package name :param meta: package metadata :param disable_build: should the package be created with build flag disabled """ if not meta: meta = '<package name="{}"><title/><description/></package>' meta = meta.format(package) if disable_build: root = ET.fromstring(meta) elm = ET.SubElement(root, 'build') ET.SubElement(elm, 'disable') meta = ET.tostring(root) url = self.makeurl(['source', project, package, '_meta']) http_PUT(url, data=meta)
Example #13
Source File: OBSLocal.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def create_group(self, name, users=[]): meta = """ <group> <title>{}</title> </group> """.format(name) if len(users): root = ET.fromstring(meta) persons = ET.SubElement(root, 'person') for user in users: ET.SubElement(persons, 'person', {'userid': user} ) meta = ET.tostring(root) if not name in self.groups: self.groups.append(name) url = osc.core.makeurl(APIURL, ['group', name]) osc.core.http_PUT(url, data=meta)
Example #14
Source File: animal.py From yui with GNU Affero General Public License v3.0 | 6 votes |
def get_cat_image_url(timeout: float) -> str: api_url = 'http://thecatapi.com/api/images/get' async with aiohttp.ClientSession() as session: while True: try: async with session.get( api_url, params={'format': 'xml', 'type': 'jpg,png'} ) as res: if res.status != 200: raise APIServerError xml_result = await res.read() tree = etree.fromstring(xml_result) url = tree.find('data/images/image/url').text except aiohttp.client_exceptions.ServerDisconnectedError: await asyncio.sleep(0.1) continue try: async with async_timeout.timeout(timeout=timeout): async with session.get(url) as res: async with res: if res.status == 200: return url except (aiohttp.ClientConnectorError, asyncio.TimeoutError): continue
Example #15
Source File: create_pascal_tf_record.py From ros_people_object_detection_tensorflow with Apache License 2.0 | 5 votes |
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
Example #16
Source File: OBSLocal.py From openSUSE-release-tools with GNU General Public License v2.0 | 5 votes |
def update_meta(self, reviewer={}, maintainer={}, project_links=[], with_repo=False): meta = """ <project name="{0}"> <title></title> <description></description> </project>""".format(self.name) root = ET.fromstring(meta) for group in reviewer.get('groups', []): ET.SubElement(root, 'group', { 'groupid': group, 'role': 'reviewer'} ) for group in reviewer.get('users', []): ET.SubElement(root, 'person', { 'userid': group, 'role': 'reviewer'} ) # TODO: avoid this duplication for group in maintainer.get('groups', []): ET.SubElement(root, 'group', { 'groupid': group, 'role': 'maintainer'} ) for group in maintainer.get('users', []): ET.SubElement(root, 'person', { 'userid': group, 'role': 'maintainer'} ) for link in project_links: ET.SubElement(root, 'link', { 'project': link }) if with_repo: repo = ET.SubElement(root, 'repository', { 'name': 'standard' }) ET.SubElement(repo, 'arch').text = 'x86_64' self.custom_meta(ET.tostring(root))
Example #17
Source File: wiki_parse.py From adam_qas with GNU General Public License v3.0 | 5 votes |
def __init__(self, html_data, is_file): self.es_ops = ElasticSearchOperate() self.html_data = html_data self.new_line_non_break_pattern = re.compile(self.new_line_non_break_regex) parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True) if is_file: self.html_tree = etree.parse(self.html_data, parser) else: self.html_tree = etree.fromstring(self.html_data, parser)
Example #18
Source File: animal.py From yui with GNU Affero General Public License v3.0 | 5 votes |
def get_fox_image_url(timeout: float) -> str: url = 'http://fox-info.net/fox-gallery' async with async_timeout.timeout(timeout=timeout): async with aiohttp.ClientSession() as session: async with session.get(url) as resp: data = await resp.text() h = html.fromstring(data) image_els = h.cssselect('#gallery-1 img.attachment-thumbnail') try: return str(image_els[0].get('src')) except IndexError: raise APIServerError
Example #19
Source File: _utils.py From sec-edgar-downloader with MIT License | 5 votes |
def extract_elements_from_xml(xml_byte_object, xpath_selector): xml_root = etree.fromstring(xml_byte_object) return xml_root.xpath(xpath_selector, namespaces=W3_NAMESPACE)
Example #20
Source File: create_pascal_tf_record.py From ros_people_object_detection_tensorflow with Apache License 2.0 | 5 votes |
def main(_): if FLAGS.set not in SETS: raise ValueError('set must be in : {}'.format(SETS)) if FLAGS.year not in YEARS: raise ValueError('year must be in : {}'.format(YEARS)) data_dir = FLAGS.data_dir years = ['VOC2007', 'VOC2012'] if FLAGS.year != 'merged': years = [FLAGS.year] writer = tf.python_io.TFRecordWriter(FLAGS.output_path) label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) for year in years: logging.info('Reading from PASCAL %s dataset.', year) examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', 'aeroplane_' + FLAGS.set + '.txt') annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir) examples_list = dataset_util.read_examples_list(examples_path) for idx, example in enumerate(examples_list): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples_list)) path = os.path.join(annotations_dir, example + '.xml') with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) writer.write(tf_example.SerializeToString()) writer.close()
Example #21
Source File: utils.py From rtcclient with Apache License 2.0 | 5 votes |
def remove_empty_elements(docs): root = etree.fromstring(bytes(docs, 'utf-8')) for element in root.xpath("//*[not(node())]"): if "rdf:resource" not in str(etree.tostring(element)): element.getparent().remove(element) return etree.tostring(root)
Example #22
Source File: wiki_parse.py From adam_qas with GNU General Public License v3.0 | 5 votes |
def __init__(self, pageid): self.page_id = pageid self.new_line_non_break_pattern = re.compile(self.new_line_non_break_regex) self.es_ops = ElasticSearchOperate() wiki_data = self.es_ops.get_wiki_article(pageid) if wiki_data is not None and __wiki_raw__ in wiki_data: self.html_data = wiki_data[__wiki_raw__] parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True) self.html_tree = etree.fromstring(self.html_data, parser)
Example #23
Source File: create_pet_tf_record.py From ros_people_object_detection_tensorflow with Apache License 2.0 | 5 votes |
def create_tf_record(output_filename, label_map_dict, annotations_dir, image_dir, examples): """Creates a TFRecord file from examples. Args: output_filename: Path to where output file is saved. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. """ writer = tf.python_io.TFRecordWriter(output_filename) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) path = os.path.join(annotations_dir, 'xmls', example + '.xml') if not os.path.exists(path): logging.warning('Could not find %s, ignoring example.', path) continue with tf.gfile.GFile(path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, label_map_dict, image_dir) writer.write(tf_example.SerializeToString()) writer.close() # TODO: Add test for pet/PASCAL main files.
Example #24
Source File: company.py From py-edgar with GNU General Public License v3.0 | 5 votes |
def get_all_filings(self, filing_type="", prior_to="", ownership="include", no_of_entries=100): url = self.get_filings_url(filing_type, prior_to, ownership, no_of_entries) page = self._get(url) return html.fromstring(page.content)
Example #25
Source File: test_middleware.py From ec2-api with Apache License 2.0 | 5 votes |
def _extract_code(self, result): tree = etree.fromstring(result.body) return tree.findall('./Errors')[0].find('Error/Code').text
Example #26
Source File: bugowner.py From openSUSE-release-tools with GNU General Public License v2.0 | 5 votes |
def is_release_manager(self, name): if self.release_managers is None: self.release_managers = set() url = self.makeurl(['group', 'sle-release-managers']) root = ET.fromstring(self.cached_GET(url)) for node in root.findall('.//person[@userid]'): self.release_managers.add(node.get('userid')) # XXX: hardcoded bot self.release_managers.add('leaper') logger.debug("release managers %s", self.release_managers) return name in self.release_managers
Example #27
Source File: check_tests.py From openSUSE-release-tools with GNU General Public License v2.0 | 5 votes |
def test_check_command_single(self): """Validate json conversion for a single project.""" wf = OBSLocal.StagingWorkflow() wf.create_staging('H') self.checkcommand = CheckCommand(wf.api) with open('tests/fixtures/project/staging_projects/openSUSE:Factory/H.xml', encoding='utf-8') as f: xml = etree.fromstring(f.read()) wf.api.project_status = MagicMock(return_value=xml) report = self.checkcommand._check_project('openSUSE:Factory:Staging:H') self.assertMultiLineEqual('\n'.join(report).strip(), H_REPORT.strip())
Example #28
Source File: test_middleware.py From ec2-api with Apache License 2.0 | 5 votes |
def _extract_message(self, result): tree = etree.fromstring(result.body) return tree.findall('./Errors')[0].find('Error/Message').text
Example #29
Source File: test_apirequest.py From ec2-api with Apache License 2.0 | 5 votes |
def _compare_aws_xml(self, root_tag, xmlns, request_id, dict_data, observed): # NOTE(ft): we cann't use matchers.XMLMatches since it makes comparison # based on the order of tags xml = etree.fromstring(observed) self.assertEqual(xmlns, xml.nsmap.get(None)) observed_data = tools.parse_xml(observed) expected = {root_tag: tools.update_dict( dict_data, {'requestId': request_id})} self.assertThat(observed_data, matchers.DictMatches(expected))
Example #30
Source File: SamlSigner.py From ADFSpoof with Apache License 2.0 | 5 votes |
def sign_XML(self, params, id_attribute, algorithm, digest): saml_string = string.Template(self.saml_template).substitute(params) data = etree.fromstring(saml_string) signed_xml = XMLSigner(c14n_algorithm="http://www.w3.org/2001/10/xml-exc-c14n#", signature_algorithm=algorithm, digest_algorithm=digest).sign(data, key=self.key, cert=[self.cert], reference_uri=params.get('AssertionID'), id_attribute=id_attribute) signed_saml_string = etree.tostring(signed_xml).replace(b'\n', b'') signed_saml_string = re.sub(b'-----(BEGIN|END) CERTIFICATE-----', b'', signed_saml_string) return signed_saml_string