Python Examples of lxml.etree.fromstring

Source File: bugowner.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def add_bugowner(self, package, owner):
        url = self.makeurl(['source', self.project, package, '_meta'])
        root = ET.fromstring(self.cached_GET(url))
        idname = 'userid' if owner.kind == 'person' else 'groupid'
        # XXX: can't use 'and' here to filter for bugowner too
        exists = root.findall('./{}[@{}="{}"]'.format(owner.kind, idname, owner.name))
        for node in exists:
            if node.get('role') == 'bugowner':
                logger.debug("%s/%s already has %s %s", self.project, package, owner.kind, owner.name)
            return

        node = ET.SubElement(root, owner.kind)
        node.set(idname, owner.name)
        node.set('role', 'bugowner')

        data = ET.tostring(root)
        logger.debug(data)
        self.http_PUT(url, data=data)

Source File: verification.py From Penny-Dreadful-Tools with GNU General Public License v3.0

6 votes

def main() -> None:
    manifest = requests.get('http://mtgoclientdepot.onlinegaming.wizards.com/MTGO.application')
    tree = etree.fromstring(manifest.content)
    identity = tree.find('{urn:schemas-microsoft-com:asm.v1}assemblyIdentity')
    version = identity.attrib['version']

    print('Current MTGO Version is {0}'.format(version))

    data = {'version': version}
    with open('mtgo_version.json', mode='w') as f:
        json.dump(data, f)

    project = repo.get_verification_project()
    current = [c for c in project.get_columns() if c.name == version]
    if not current:
        print(f'Creating column for {version}')
        project.create_column(version)

Source File: workarounds.py From pulseaudio-dlna with GNU General Public License v3.0

6 votes

def _parse_xml(self, xml):
        # Parse MediaRenderer description XML
        xml_root = etree.fromstring(xml)
        namespaces = xml_root.nsmap
        namespaces.pop(None, None)

        # Determine AVRC URL
        url_base = xml_root.find(self.MR_YAMAHA_URLBASE_PATH, namespaces)
        control_url = xml_root.find(self.MR_YAMAHA_CONTROLURL_PATH, namespaces)
        if ((url_base is None) or (control_url is None)):
            return False
        ip, port = urlparse.urlparse(url_base.text).netloc.split(':')
        if ((not ip) or (not port)):
            return False

        self.ip = ip
        self.port = port
        self.control_url = control_url.text
        return True

Source File: repochecks.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def mirrorRepomd(cachedir, url):
    # Use repomd.xml to get the location of primary.xml.gz
    repoindex = ETL.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content)
    primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)",
                                  namespaces={'repo': 'http://linux.duke.edu/metadata/repo'})
    if not primarypath.endswith(".xml.gz"):
        raise Exception('unsupported primary format')

    primarydest = os.path.join(cachedir, os.path.basename(primarypath))
    if not os.path.exists(primarydest):
        # Delete the old files first
        for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"):
            os.unlink(oldfile)

        with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp:
            primarytemp.write(requests.get(url + '/' + primarypath).content)
            os.link(primarytemp.name, primarydest)
    return primarydest

Source File: belvaParseXML.py From Basic-Expression-Lexicon-Variation-Algorithms-BELVA with GNU General Public License v3.0

6 votes

def parseHTMLxpathSearch(http_source, xpathString):
#---------------------------------------------------------------------------------

    return_values = []


    http_source= str(http_source).replace('\x00','')
    try:
        html = lxml.html.fromstring(http_source)

        for data in html.xpath(xpathString):
            return_values.append(etree.tostring(data.content))
            data.clear()

    except:
        pass

    return return_values



#---------------------------------------------------------------------------------
# parse HTML and return value asked

Source File: stagingapi.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def create_package_container(self, project, package, meta=None, disable_build=False):
        """
        Creates a package container without any fields in project/package
        :param project: project to create it
        :param package: package name
        :param meta: package metadata
        :param disable_build: should the package be created with build
                              flag disabled
        """
        if not meta:
            meta = '<package name="{}"><title/><description/></package>'
            meta = meta.format(package)

        if disable_build:
            root = ET.fromstring(meta)
            elm = ET.SubElement(root, 'build')
            ET.SubElement(elm, 'disable')
            meta = ET.tostring(root)

        url = self.makeurl(['source', project, package, '_meta'])
        http_PUT(url, data=meta)

Source File: tools.py From ec2-api with Apache License 2.0

6 votes

def parse_xml(xml_string):
    xml_string = _xml_scheme.sub('', xml_string.decode("utf-8"))
    xml = etree.fromstring(xml_string)

    def convert_node(node):
        children = list(node)
        if len(children):
            if children[0].tag == 'item':
                val = list(convert_node(child)[1] for child in children)
            else:
                val = dict(convert_node(child) for child in children)
        elif node.tag.endswith('Set'):
            val = []
        else:
            # TODO(ft): do not use private function
            val = (ec2utils._try_convert(node.text)
                   if node.text
                   else node.text)
        return node.tag, val

    return dict([convert_node(xml)])

Source File: Packets.py From Timeline with GNU General Public License v3.0

6 votes

def tryParseXML(self, xml_data):
		try:
			if not self.penguin.ReceivePacketEnabled:
				return True

			XMLdata = parseXML(str(xml_data))
			
			t = XMLdata.get('t')
			if t not in AVAILABLE_XML_PACKET_TYPES:
				return None

			body = XMLdata.xpath('//body')
			for i in range(len(body)):
				b = body[i]
				action = b.get("action") # Just to make sure `action` exists!

			return [t, body]

		except:
			return None

Source File: repochecks.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def mirrorRepomd(cachedir, url):
    # Use repomd.xml to get the location of primary.xml.gz
    repoindex = ETL.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content)
    primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)",
                                  namespaces={'repo': 'http://linux.duke.edu/metadata/repo'})
    if not primarypath.endswith(".xml.gz"):
        raise Exception('unsupported primary format')

    primarydest = os.path.join(cachedir, os.path.basename(primarypath))
    if not os.path.exists(primarydest):
        # Delete the old files first
        for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"):
            os.unlink(oldfile)

        with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp:
            primarytemp.write(requests.get(url + '/' + primarypath).content)
            os.link(primarytemp.name, primarydest)
    return primarydest

Source File: test_examples.py From dataflows with MIT License

6 votes

def country_population():
    from lxml import etree
    from urllib.request import urlopen
    page = urlopen('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population').read()
    parser = etree.HTMLParser()
    tree = etree.fromstring(page, parser=parser)
    tables = tree.findall('.//table')
    for table in tables:
        if 'wikitable' in table.attrib.get('class', ''):
            rows = table.findall('.//tr')
            for row in rows:
                cells = row.findall('td')
                if len(cells) > 3:
                    name = cells[1].find('.//a').attrib.get('title')
                    population = cells[2].text
                    yield(dict(
                        name=name,
                        population=population
                    ))

Source File: Feed.py From python-in-practice with GNU General Public License v3.0

6 votes

def _parse(data, limit):
        tree = etree.fromstring(data)
        output = []
        # RSS
        prefix = ""
        tag = "*/item"
        if tree.find(tag) is None:
            prefix = "{http://purl.org/rss/1.0/}"
            tag = prefix + "item"
        for element in tree.findall(tag):
            title = element.find(prefix + "title")
            link = element.find(prefix + "link")
            if link is None:
                link = element.find("guid")
            _maybe_append(output, title, link)
            if limit and len(output) == limit:
                break
        if output:
            return ["<ul>"] + output + ["</ul>"]

Source File: stagingapi.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def create_package_container(self, project, package, meta=None, disable_build=False):
        """
        Creates a package container without any fields in project/package
        :param project: project to create it
        :param package: package name
        :param meta: package metadata
        :param disable_build: should the package be created with build
                              flag disabled
        """
        if not meta:
            meta = '<package name="{}"><title/><description/></package>'
            meta = meta.format(package)

        if disable_build:
            root = ET.fromstring(meta)
            elm = ET.SubElement(root, 'build')
            ET.SubElement(elm, 'disable')
            meta = ET.tostring(root)

        url = self.makeurl(['source', project, package, '_meta'])
        http_PUT(url, data=meta)

Source File: OBSLocal.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def create_group(self, name, users=[]):

        meta = """
        <group>
          <title>{}</title>
        </group>
        """.format(name)

        if len(users):
            root = ET.fromstring(meta)
            persons = ET.SubElement(root, 'person')
            for user in users:
                ET.SubElement(persons, 'person', {'userid': user} )
            meta = ET.tostring(root)

        if not name in self.groups:
            self.groups.append(name)
        url = osc.core.makeurl(APIURL, ['group', name])
        osc.core.http_PUT(url, data=meta)

Source File: animal.py From yui with GNU Affero General Public License v3.0

6 votes

def get_cat_image_url(timeout: float) -> str:
    api_url = 'http://thecatapi.com/api/images/get'
    async with aiohttp.ClientSession() as session:
        while True:
            try:
                async with session.get(
                    api_url, params={'format': 'xml', 'type': 'jpg,png'}
                ) as res:
                    if res.status != 200:
                        raise APIServerError
                    xml_result = await res.read()
                    tree = etree.fromstring(xml_result)
                    url = tree.find('data/images/image/url').text
            except aiohttp.client_exceptions.ServerDisconnectedError:
                await asyncio.sleep(0.1)
                continue
            try:
                async with async_timeout.timeout(timeout=timeout):
                    async with session.get(url) as res:
                        async with res:
                            if res.status == 200:
                                return url
            except (aiohttp.ClientConnectorError, asyncio.TimeoutError):
                continue

Source File: create_pascal_tf_record.py From ros_people_object_detection_tensorflow with Apache License 2.0

5 votes

def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  for year in years:
    logging.info('Reading from PASCAL %s dataset.', year)
    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 'aeroplane_' + FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      path = os.path.join(annotations_dir, example + '.xml')
      with tf.gfile.GFile(path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
      writer.write(tf_example.SerializeToString())

  writer.close()

Source File: OBSLocal.py From openSUSE-release-tools with GNU General Public License v2.0

5 votes

def update_meta(self, reviewer={}, maintainer={}, project_links=[], with_repo=False):
        meta = """
            <project name="{0}">
              <title></title>
              <description></description>
            </project>""".format(self.name)

        root = ET.fromstring(meta)
        for group in reviewer.get('groups', []):
            ET.SubElement(root, 'group', { 'groupid': group, 'role': 'reviewer'} )
        for group in reviewer.get('users', []):
            ET.SubElement(root, 'person', { 'userid': group, 'role': 'reviewer'} )
        # TODO: avoid this duplication
        for group in maintainer.get('groups', []):
            ET.SubElement(root, 'group', { 'groupid': group, 'role': 'maintainer'} )
        for group in maintainer.get('users', []):
            ET.SubElement(root, 'person', { 'userid': group, 'role': 'maintainer'} )

        for link in project_links:
            ET.SubElement(root, 'link', { 'project': link })

        if with_repo:
            repo = ET.SubElement(root, 'repository', { 'name': 'standard' })
            ET.SubElement(repo, 'arch').text = 'x86_64'

        self.custom_meta(ET.tostring(root))

Source File: wiki_parse.py From adam_qas with GNU General Public License v3.0

5 votes

def __init__(self, html_data, is_file):
        self.es_ops = ElasticSearchOperate()
        self.html_data = html_data
        self.new_line_non_break_pattern = re.compile(self.new_line_non_break_regex)
        parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True)
        if is_file:
            self.html_tree = etree.parse(self.html_data, parser)
        else:
            self.html_tree = etree.fromstring(self.html_data, parser)

Source File: animal.py From yui with GNU Affero General Public License v3.0

5 votes

def get_fox_image_url(timeout: float) -> str:
    url = 'http://fox-info.net/fox-gallery'
    async with async_timeout.timeout(timeout=timeout):
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as resp:
                data = await resp.text()
    h = html.fromstring(data)
    image_els = h.cssselect('#gallery-1 img.attachment-thumbnail')
    try:
        return str(image_els[0].get('src'))
    except IndexError:
        raise APIServerError

Source File: _utils.py From sec-edgar-downloader with MIT License

5 votes

def extract_elements_from_xml(xml_byte_object, xpath_selector):
    xml_root = etree.fromstring(xml_byte_object)
    return xml_root.xpath(xpath_selector, namespaces=W3_NAMESPACE)

Source File: create_pascal_tf_record.py From ros_people_object_detection_tensorflow with Apache License 2.0

5 votes

def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  for year in years:
    logging.info('Reading from PASCAL %s dataset.', year)
    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 'aeroplane_' + FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      path = os.path.join(annotations_dir, example + '.xml')
      with tf.gfile.GFile(path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
      writer.write(tf_example.SerializeToString())

  writer.close()

Source File: utils.py From rtcclient with Apache License 2.0

5 votes

def remove_empty_elements(docs):
    root = etree.fromstring(bytes(docs, 'utf-8'))
    for element in root.xpath("//*[not(node())]"):
        if "rdf:resource" not in str(etree.tostring(element)):
            element.getparent().remove(element)

    return etree.tostring(root)

Source File: wiki_parse.py From adam_qas with GNU General Public License v3.0

5 votes

def __init__(self, pageid):
        self.page_id = pageid
        self.new_line_non_break_pattern = re.compile(self.new_line_non_break_regex)
        self.es_ops = ElasticSearchOperate()
        wiki_data = self.es_ops.get_wiki_article(pageid)
        if wiki_data is not None and __wiki_raw__ in wiki_data:
            self.html_data = wiki_data[__wiki_raw__]
            parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True)
            self.html_tree = etree.fromstring(self.html_data, parser)

Source File: create_pet_tf_record.py From ros_people_object_detection_tensorflow with Apache License 2.0

5 votes

def create_tf_record(output_filename,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
  """
  writer = tf.python_io.TFRecordWriter(output_filename)
  for idx, example in enumerate(examples):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples))
    path = os.path.join(annotations_dir, 'xmls', example + '.xml')

    if not os.path.exists(path):
      logging.warning('Could not find %s, ignoring example.', path)
      continue
    with tf.gfile.GFile(path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
    writer.write(tf_example.SerializeToString())

  writer.close()


# TODO: Add test for pet/PASCAL main files.

Source File: company.py From py-edgar with GNU General Public License v3.0

5 votes

def get_all_filings(self, filing_type="", prior_to="", ownership="include", no_of_entries=100):
      url = self.get_filings_url(filing_type, prior_to, ownership, no_of_entries)
      page = self._get(url)
      return html.fromstring(page.content)

Source File: test_middleware.py From ec2-api with Apache License 2.0

5 votes

def _extract_code(self, result):
        tree = etree.fromstring(result.body)
        return tree.findall('./Errors')[0].find('Error/Code').text

Source File: bugowner.py From openSUSE-release-tools with GNU General Public License v2.0

5 votes

def is_release_manager(self, name):
        if self.release_managers is None:
            self.release_managers = set()
            url = self.makeurl(['group', 'sle-release-managers'])
            root = ET.fromstring(self.cached_GET(url))
            for node in root.findall('.//person[@userid]'):
                self.release_managers.add(node.get('userid'))
            # XXX: hardcoded bot
            self.release_managers.add('leaper')
            logger.debug("release managers %s", self.release_managers)

        return name in self.release_managers

Source File: check_tests.py From openSUSE-release-tools with GNU General Public License v2.0

5 votes

def test_check_command_single(self):
        """Validate json conversion for a single project."""

        wf = OBSLocal.StagingWorkflow()
        wf.create_staging('H')
        self.checkcommand = CheckCommand(wf.api)

        with open('tests/fixtures/project/staging_projects/openSUSE:Factory/H.xml', encoding='utf-8') as f:
            xml = etree.fromstring(f.read())
            wf.api.project_status = MagicMock(return_value=xml)
        report = self.checkcommand._check_project('openSUSE:Factory:Staging:H')
        self.assertMultiLineEqual('\n'.join(report).strip(), H_REPORT.strip())

Source File: test_middleware.py From ec2-api with Apache License 2.0

5 votes

def _extract_message(self, result):
        tree = etree.fromstring(result.body)
        return tree.findall('./Errors')[0].find('Error/Message').text

Source File: test_apirequest.py From ec2-api with Apache License 2.0

5 votes

def _compare_aws_xml(self, root_tag, xmlns, request_id, dict_data,
                         observed):
        # NOTE(ft): we cann't use matchers.XMLMatches since it makes comparison
        # based on the order of tags
        xml = etree.fromstring(observed)
        self.assertEqual(xmlns, xml.nsmap.get(None))
        observed_data = tools.parse_xml(observed)
        expected = {root_tag: tools.update_dict(
                                  dict_data,
                                  {'requestId': request_id})}
        self.assertThat(observed_data, matchers.DictMatches(expected))

Source File: SamlSigner.py From ADFSpoof with Apache License 2.0

5 votes

def sign_XML(self, params, id_attribute, algorithm, digest):
        saml_string = string.Template(self.saml_template).substitute(params)
        data = etree.fromstring(saml_string)

        signed_xml = XMLSigner(c14n_algorithm="http://www.w3.org/2001/10/xml-exc-c14n#", signature_algorithm=algorithm, digest_algorithm=digest).sign(data, key=self.key, cert=[self.cert], reference_uri=params.get('AssertionID'), id_attribute=id_attribute)
        signed_saml_string = etree.tostring(signed_xml).replace(b'\n', b'')
        signed_saml_string = re.sub(b'-----(BEGIN|END) CERTIFICATE-----', b'', signed_saml_string)
        return signed_saml_string

Python lxml.etree.fromstring() Examples