Python lxml.etree.fromstring() Examples

The following are 30 code examples of lxml.etree.fromstring(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lxml.etree , or try the search function .
Example #1
Source File: bugowner.py    From openSUSE-release-tools with GNU General Public License v2.0 6 votes vote down vote up
def add_bugowner(self, package, owner):
        url = self.makeurl(['source', self.project, package, '_meta'])
        root = ET.fromstring(self.cached_GET(url))
        idname = 'userid' if owner.kind == 'person' else 'groupid'
        # XXX: can't use 'and' here to filter for bugowner too
        exists = root.findall('./{}[@{}="{}"]'.format(owner.kind, idname, owner.name))
        for node in exists:
            if node.get('role') == 'bugowner':
                logger.debug("%s/%s already has %s %s", self.project, package, owner.kind, owner.name)
            return

        node = ET.SubElement(root, owner.kind)
        node.set(idname, owner.name)
        node.set('role', 'bugowner')

        data = ET.tostring(root)
        logger.debug(data)
        self.http_PUT(url, data=data) 
Example #2
Source File: verification.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 6 votes vote down vote up
def main() -> None:
    manifest = requests.get('http://mtgoclientdepot.onlinegaming.wizards.com/MTGO.application')
    tree = etree.fromstring(manifest.content)
    identity = tree.find('{urn:schemas-microsoft-com:asm.v1}assemblyIdentity')
    version = identity.attrib['version']

    print('Current MTGO Version is {0}'.format(version))

    data = {'version': version}
    with open('mtgo_version.json', mode='w') as f:
        json.dump(data, f)

    project = repo.get_verification_project()
    current = [c for c in project.get_columns() if c.name == version]
    if not current:
        print(f'Creating column for {version}')
        project.create_column(version) 
Example #3
Source File: workarounds.py    From pulseaudio-dlna with GNU General Public License v3.0 6 votes vote down vote up
def _parse_xml(self, xml):
        # Parse MediaRenderer description XML
        xml_root = etree.fromstring(xml)
        namespaces = xml_root.nsmap
        namespaces.pop(None, None)

        # Determine AVRC URL
        url_base = xml_root.find(self.MR_YAMAHA_URLBASE_PATH, namespaces)
        control_url = xml_root.find(self.MR_YAMAHA_CONTROLURL_PATH, namespaces)
        if ((url_base is None) or (control_url is None)):
            return False
        ip, port = urlparse.urlparse(url_base.text).netloc.split(':')
        if ((not ip) or (not port)):
            return False

        self.ip = ip
        self.port = port
        self.control_url = control_url.text
        return True 
Example #4
Source File: repochecks.py    From openSUSE-release-tools with GNU General Public License v2.0 6 votes vote down vote up
def mirrorRepomd(cachedir, url):
    # Use repomd.xml to get the location of primary.xml.gz
    repoindex = ETL.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content)
    primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)",
                                  namespaces={'repo': 'http://linux.duke.edu/metadata/repo'})
    if not primarypath.endswith(".xml.gz"):
        raise Exception('unsupported primary format')

    primarydest = os.path.join(cachedir, os.path.basename(primarypath))
    if not os.path.exists(primarydest):
        # Delete the old files first
        for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"):
            os.unlink(oldfile)

        with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp:
            primarytemp.write(requests.get(url + '/' + primarypath).content)
            os.link(primarytemp.name, primarydest)
    return primarydest 
Example #5
Source File: belvaParseXML.py    From Basic-Expression-Lexicon-Variation-Algorithms-BELVA with GNU General Public License v3.0 6 votes vote down vote up
def parseHTMLxpathSearch(http_source, xpathString):
#---------------------------------------------------------------------------------

    return_values = []


    http_source= str(http_source).replace('\x00','')
    try:
        html = lxml.html.fromstring(http_source)

        for data in html.xpath(xpathString):
            return_values.append(etree.tostring(data.content))
            data.clear()

    except:
        pass

    return return_values



#---------------------------------------------------------------------------------
# parse HTML and return value asked 
Example #6
Source File: stagingapi.py    From openSUSE-release-tools with GNU General Public License v2.0 6 votes vote down vote up
def create_package_container(self, project, package, meta=None, disable_build=False):
        """
        Creates a package container without any fields in project/package
        :param project: project to create it
        :param package: package name
        :param meta: package metadata
        :param disable_build: should the package be created with build
                              flag disabled
        """
        if not meta:
            meta = '<package name="{}"><title/><description/></package>'
            meta = meta.format(package)

        if disable_build:
            root = ET.fromstring(meta)
            elm = ET.SubElement(root, 'build')
            ET.SubElement(elm, 'disable')
            meta = ET.tostring(root)

        url = self.makeurl(['source', project, package, '_meta'])
        http_PUT(url, data=meta) 
Example #7
Source File: tools.py    From ec2-api with Apache License 2.0 6 votes vote down vote up
def parse_xml(xml_string):
    xml_string = _xml_scheme.sub('', xml_string.decode("utf-8"))
    xml = etree.fromstring(xml_string)

    def convert_node(node):
        children = list(node)
        if len(children):
            if children[0].tag == 'item':
                val = list(convert_node(child)[1] for child in children)
            else:
                val = dict(convert_node(child) for child in children)
        elif node.tag.endswith('Set'):
            val = []
        else:
            # TODO(ft): do not use private function
            val = (ec2utils._try_convert(node.text)
                   if node.text
                   else node.text)
        return node.tag, val

    return dict([convert_node(xml)]) 
Example #8
Source File: Packets.py    From Timeline with GNU General Public License v3.0 6 votes vote down vote up
def tryParseXML(self, xml_data):
		try:
			if not self.penguin.ReceivePacketEnabled:
				return True

			XMLdata = parseXML(str(xml_data))
			
			t = XMLdata.get('t')
			if t not in AVAILABLE_XML_PACKET_TYPES:
				return None

			body = XMLdata.xpath('//body')
			for i in range(len(body)):
				b = body[i]
				action = b.get("action") # Just to make sure `action` exists!

			return [t, body]

		except:
			return None 
Example #9
Source File: repochecks.py    From openSUSE-release-tools with GNU General Public License v2.0 6 votes vote down vote up
def mirrorRepomd(cachedir, url):
    # Use repomd.xml to get the location of primary.xml.gz
    repoindex = ETL.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content)
    primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)",
                                  namespaces={'repo': 'http://linux.duke.edu/metadata/repo'})
    if not primarypath.endswith(".xml.gz"):
        raise Exception('unsupported primary format')

    primarydest = os.path.join(cachedir, os.path.basename(primarypath))
    if not os.path.exists(primarydest):
        # Delete the old files first
        for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"):
            os.unlink(oldfile)

        with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp:
            primarytemp.write(requests.get(url + '/' + primarypath).content)
            os.link(primarytemp.name, primarydest)
    return primarydest 
Example #10
Source File: test_examples.py    From dataflows with MIT License 6 votes vote down vote up
def country_population():
    from lxml import etree
    from urllib.request import urlopen
    page = urlopen('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population').read()
    parser = etree.HTMLParser()
    tree = etree.fromstring(page, parser=parser)
    tables = tree.findall('.//table')
    for table in tables:
        if 'wikitable' in table.attrib.get('class', ''):
            rows = table.findall('.//tr')
            for row in rows:
                cells = row.findall('td')
                if len(cells) > 3:
                    name = cells[1].find('.//a').attrib.get('title')
                    population = cells[2].text
                    yield(dict(
                        name=name,
                        population=population
                    )) 
Example #11
Source File: Feed.py    From python-in-practice with GNU General Public License v3.0 6 votes vote down vote up
def _parse(data, limit):
        tree = etree.fromstring(data)
        output = []
        # RSS
        prefix = ""
        tag = "*/item"
        if tree.find(tag) is None:
            prefix = "{http://purl.org/rss/1.0/}"
            tag = prefix + "item"
        for element in tree.findall(tag):
            title = element.find(prefix + "title")
            link = element.find(prefix + "link")
            if link is None:
                link = element.find("guid")
            _maybe_append(output, title, link)
            if limit and len(output) == limit:
                break
        if output:
            return ["<ul>"] + output + ["</ul>"] 
Example #12
Source File: stagingapi.py    From openSUSE-release-tools with GNU General Public License v2.0 6 votes vote down vote up
def create_package_container(self, project, package, meta=None, disable_build=False):
        """
        Creates a package container without any fields in project/package
        :param project: project to create it
        :param package: package name
        :param meta: package metadata
        :param disable_build: should the package be created with build
                              flag disabled
        """
        if not meta:
            meta = '<package name="{}"><title/><description/></package>'
            meta = meta.format(package)

        if disable_build:
            root = ET.fromstring(meta)
            elm = ET.SubElement(root, 'build')
            ET.SubElement(elm, 'disable')
            meta = ET.tostring(root)

        url = self.makeurl(['source', project, package, '_meta'])
        http_PUT(url, data=meta) 
Example #13
Source File: OBSLocal.py    From openSUSE-release-tools with GNU General Public License v2.0 6 votes vote down vote up
def create_group(self, name, users=[]):

        meta = """
        <group>
          <title>{}</title>
        </group>
        """.format(name)

        if len(users):
            root = ET.fromstring(meta)
            persons = ET.SubElement(root, 'person')
            for user in users:
                ET.SubElement(persons, 'person', {'userid': user} )
            meta = ET.tostring(root)

        if not name in self.groups:
            self.groups.append(name)
        url = osc.core.makeurl(APIURL, ['group', name])
        osc.core.http_PUT(url, data=meta) 
Example #14
Source File: animal.py    From yui with GNU Affero General Public License v3.0 6 votes vote down vote up
def get_cat_image_url(timeout: float) -> str:
    api_url = 'http://thecatapi.com/api/images/get'
    async with aiohttp.ClientSession() as session:
        while True:
            try:
                async with session.get(
                    api_url, params={'format': 'xml', 'type': 'jpg,png'}
                ) as res:
                    if res.status != 200:
                        raise APIServerError
                    xml_result = await res.read()
                    tree = etree.fromstring(xml_result)
                    url = tree.find('data/images/image/url').text
            except aiohttp.client_exceptions.ServerDisconnectedError:
                await asyncio.sleep(0.1)
                continue
            try:
                async with async_timeout.timeout(timeout=timeout):
                    async with session.get(url) as res:
                        async with res:
                            if res.status == 200:
                                return url
            except (aiohttp.ClientConnectorError, asyncio.TimeoutError):
                continue 
Example #15
Source File: create_pascal_tf_record.py    From ros_people_object_detection_tensorflow with Apache License 2.0 5 votes vote down vote up
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  for year in years:
    logging.info('Reading from PASCAL %s dataset.', year)
    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 'aeroplane_' + FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      path = os.path.join(annotations_dir, example + '.xml')
      with tf.gfile.GFile(path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
      writer.write(tf_example.SerializeToString())

  writer.close() 
Example #16
Source File: OBSLocal.py    From openSUSE-release-tools with GNU General Public License v2.0 5 votes vote down vote up
def update_meta(self, reviewer={}, maintainer={}, project_links=[], with_repo=False):
        meta = """
            <project name="{0}">
              <title></title>
              <description></description>
            </project>""".format(self.name)

        root = ET.fromstring(meta)
        for group in reviewer.get('groups', []):
            ET.SubElement(root, 'group', { 'groupid': group, 'role': 'reviewer'} )
        for group in reviewer.get('users', []):
            ET.SubElement(root, 'person', { 'userid': group, 'role': 'reviewer'} )
        # TODO: avoid this duplication
        for group in maintainer.get('groups', []):
            ET.SubElement(root, 'group', { 'groupid': group, 'role': 'maintainer'} )
        for group in maintainer.get('users', []):
            ET.SubElement(root, 'person', { 'userid': group, 'role': 'maintainer'} )

        for link in project_links:
            ET.SubElement(root, 'link', { 'project': link })

        if with_repo:
            repo = ET.SubElement(root, 'repository', { 'name': 'standard' })
            ET.SubElement(repo, 'arch').text = 'x86_64'

        self.custom_meta(ET.tostring(root)) 
Example #17
Source File: wiki_parse.py    From adam_qas with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, html_data, is_file):
        self.es_ops = ElasticSearchOperate()
        self.html_data = html_data
        self.new_line_non_break_pattern = re.compile(self.new_line_non_break_regex)
        parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True)
        if is_file:
            self.html_tree = etree.parse(self.html_data, parser)
        else:
            self.html_tree = etree.fromstring(self.html_data, parser) 
Example #18
Source File: animal.py    From yui with GNU Affero General Public License v3.0 5 votes vote down vote up
def get_fox_image_url(timeout: float) -> str:
    url = 'http://fox-info.net/fox-gallery'
    async with async_timeout.timeout(timeout=timeout):
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as resp:
                data = await resp.text()
    h = html.fromstring(data)
    image_els = h.cssselect('#gallery-1 img.attachment-thumbnail')
    try:
        return str(image_els[0].get('src'))
    except IndexError:
        raise APIServerError 
Example #19
Source File: _utils.py    From sec-edgar-downloader with MIT License 5 votes vote down vote up
def extract_elements_from_xml(xml_byte_object, xpath_selector):
    xml_root = etree.fromstring(xml_byte_object)
    return xml_root.xpath(xpath_selector, namespaces=W3_NAMESPACE) 
Example #20
Source File: create_pascal_tf_record.py    From ros_people_object_detection_tensorflow with Apache License 2.0 5 votes vote down vote up
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  for year in years:
    logging.info('Reading from PASCAL %s dataset.', year)
    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 'aeroplane_' + FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      path = os.path.join(annotations_dir, example + '.xml')
      with tf.gfile.GFile(path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
      writer.write(tf_example.SerializeToString())

  writer.close() 
Example #21
Source File: utils.py    From rtcclient with Apache License 2.0 5 votes vote down vote up
def remove_empty_elements(docs):
    root = etree.fromstring(bytes(docs, 'utf-8'))
    for element in root.xpath("//*[not(node())]"):
        if "rdf:resource" not in str(etree.tostring(element)):
            element.getparent().remove(element)

    return etree.tostring(root) 
Example #22
Source File: wiki_parse.py    From adam_qas with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, pageid):
        self.page_id = pageid
        self.new_line_non_break_pattern = re.compile(self.new_line_non_break_regex)
        self.es_ops = ElasticSearchOperate()
        wiki_data = self.es_ops.get_wiki_article(pageid)
        if wiki_data is not None and __wiki_raw__ in wiki_data:
            self.html_data = wiki_data[__wiki_raw__]
            parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True)
            self.html_tree = etree.fromstring(self.html_data, parser) 
Example #23
Source File: create_pet_tf_record.py    From ros_people_object_detection_tensorflow with Apache License 2.0 5 votes vote down vote up
def create_tf_record(output_filename,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
  """
  writer = tf.python_io.TFRecordWriter(output_filename)
  for idx, example in enumerate(examples):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples))
    path = os.path.join(annotations_dir, 'xmls', example + '.xml')

    if not os.path.exists(path):
      logging.warning('Could not find %s, ignoring example.', path)
      continue
    with tf.gfile.GFile(path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
    writer.write(tf_example.SerializeToString())

  writer.close()


# TODO: Add test for pet/PASCAL main files. 
Example #24
Source File: company.py    From py-edgar with GNU General Public License v3.0 5 votes vote down vote up
def get_all_filings(self, filing_type="", prior_to="", ownership="include", no_of_entries=100):
      url = self.get_filings_url(filing_type, prior_to, ownership, no_of_entries)
      page = self._get(url)
      return html.fromstring(page.content) 
Example #25
Source File: test_middleware.py    From ec2-api with Apache License 2.0 5 votes vote down vote up
def _extract_code(self, result):
        tree = etree.fromstring(result.body)
        return tree.findall('./Errors')[0].find('Error/Code').text 
Example #26
Source File: bugowner.py    From openSUSE-release-tools with GNU General Public License v2.0 5 votes vote down vote up
def is_release_manager(self, name):
        if self.release_managers is None:
            self.release_managers = set()
            url = self.makeurl(['group', 'sle-release-managers'])
            root = ET.fromstring(self.cached_GET(url))
            for node in root.findall('.//person[@userid]'):
                self.release_managers.add(node.get('userid'))
            # XXX: hardcoded bot
            self.release_managers.add('leaper')
            logger.debug("release managers %s", self.release_managers)

        return name in self.release_managers 
Example #27
Source File: check_tests.py    From openSUSE-release-tools with GNU General Public License v2.0 5 votes vote down vote up
def test_check_command_single(self):
        """Validate json conversion for a single project."""

        wf = OBSLocal.StagingWorkflow()
        wf.create_staging('H')
        self.checkcommand = CheckCommand(wf.api)

        with open('tests/fixtures/project/staging_projects/openSUSE:Factory/H.xml', encoding='utf-8') as f:
            xml = etree.fromstring(f.read())
            wf.api.project_status = MagicMock(return_value=xml)
        report = self.checkcommand._check_project('openSUSE:Factory:Staging:H')
        self.assertMultiLineEqual('\n'.join(report).strip(), H_REPORT.strip()) 
Example #28
Source File: test_middleware.py    From ec2-api with Apache License 2.0 5 votes vote down vote up
def _extract_message(self, result):
        tree = etree.fromstring(result.body)
        return tree.findall('./Errors')[0].find('Error/Message').text 
Example #29
Source File: test_apirequest.py    From ec2-api with Apache License 2.0 5 votes vote down vote up
def _compare_aws_xml(self, root_tag, xmlns, request_id, dict_data,
                         observed):
        # NOTE(ft): we cann't use matchers.XMLMatches since it makes comparison
        # based on the order of tags
        xml = etree.fromstring(observed)
        self.assertEqual(xmlns, xml.nsmap.get(None))
        observed_data = tools.parse_xml(observed)
        expected = {root_tag: tools.update_dict(
                                  dict_data,
                                  {'requestId': request_id})}
        self.assertThat(observed_data, matchers.DictMatches(expected)) 
Example #30
Source File: SamlSigner.py    From ADFSpoof with Apache License 2.0 5 votes vote down vote up
def sign_XML(self, params, id_attribute, algorithm, digest):
        saml_string = string.Template(self.saml_template).substitute(params)
        data = etree.fromstring(saml_string)

        signed_xml = XMLSigner(c14n_algorithm="http://www.w3.org/2001/10/xml-exc-c14n#", signature_algorithm=algorithm, digest_algorithm=digest).sign(data, key=self.key, cert=[self.cert], reference_uri=params.get('AssertionID'), id_attribute=id_attribute)
        signed_saml_string = etree.tostring(signed_xml).replace(b'\n', b'')
        signed_saml_string = re.sub(b'-----(BEGIN|END) CERTIFICATE-----', b'', signed_saml_string)
        return signed_saml_string