Python lxml() Examples

The following are 30 code examples of lxml(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lxml.etree , or try the search function .
Example #1
Source File: tags.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def _details_prepare_merge(details):
    # We may mutate the details later, so copy now to prevent
    # affecting the caller's data.
    details = details.copy()

    # Prepare an nsmap in an OrderedDict. This ensures that lxml
    # serializes namespace declarations in a stable order.
    nsmap = OrderedDict((ns, ns) for ns in sorted(details))

    # Root everything in a namespace-less element. Setting the nsmap
    # here ensures that prefixes are preserved when dumping later.
    # This element will be replaced by the root of the lshw detail.
    # However, if there is no lshw detail, this root element shares
    # its tag with the tag of an lshw XML tree, so that XPath
    # expressions written with the lshw tree in mind will still work
    # without it, e.g. "/list//{lldp}something".
    root = etree.Element("list", nsmap=nsmap)

    # We have copied details, and root is new.
    return details, root 
Example #2
Source File: tags.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def _details_do_merge(details, root):
    # Merge the remaining details into the composite document.
    for namespace in sorted(details):
        xmldata = details[namespace]
        if xmldata is not None:
            try:
                detail = etree.fromstring(xmldata)
            except etree.XMLSyntaxError as e:
                maaslog.warning("Invalid %s details: %s", namespace, e)
            else:
                # Add the namespace to all unqualified elements.
                for elem in detail.iter("{}*"):
                    elem.tag = etree.QName(namespace, elem.tag)
                root.append(detail)

    # Re-home `root` in a new tree. This ensures that XPath
    # expressions like "/some-tag" work correctly. Without this, when
    # there's well-formed lshw data -- see the backward-compatibilty
    # hack futher up -- expressions would be evaluated from the first
    # root created in this function, even though that root is now the
    # parent of the current `root`.
    return etree.ElementTree(root) 
Example #3
Source File: tags.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def merge_details_cleanly(details):
    """Merge node details into a single XML document.

    `details` should be of the form::

      {"name": xml-as-bytes, "name2": xml-as-bytes, ...}

    where `name` is the namespace (and prefix) where each detail's XML
    should be placed in the composite document; elements in each
    detail document without a namespace are moved into that namespace.

    This is similar to `merge_details`, but the ``lshw`` detail is not
    treated specially. The result of this function is not compatible
    with XPath expressions created for old releases of MAAS.

    The returned document is always rooted with a ``list`` element.
    """
    details, root = _details_prepare_merge(details)
    return _details_do_merge(details, root) 
Example #4
Source File: lxmlGramplet.py    From addons-source with GNU General Public License v2.0 6 votes vote down vote up
def post(self, html):
        """
        Try to play with request ...
        """

        import urllib2

        response = urllib2.urlopen('file://%s' % html)
        data = response.read()

        post = etree.HTML(data)

        # find text function

        find_text = etree.XPath("//text()", smart_strings=False)

        LOG.info(find_text(post))

        post.clear() 
Example #5
Source File: test_config_forms.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_DictCharWidget_renders_with_empty_string_as_input_data(self):
        names = [factory.make_string(), factory.make_string()]
        initials = []
        labels = [factory.make_string(), factory.make_string()]
        widget = DictCharWidget(
            [widgets.TextInput, widgets.TextInput, widgets.CheckboxInput],
            names,
            initials,
            labels,
            skip_check=True,
        )
        name = factory.make_string()
        html_widget = fromstring(
            "<root>" + widget.render(name, "") + "</root>"
        )
        widget_names = XPath("fieldset/input/@name")(html_widget)
        widget_labels = XPath("fieldset/label/text()")(html_widget)
        expected_names = [
            "%s_%s" % (name, widget_name) for widget_name in names
        ]
        self.assertEqual(
            [expected_names, labels], [widget_names, widget_labels]
        ) 
Example #6
Source File: populate_tags.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def populate_tag_for_multiple_nodes(tag, nodes, batch_size=DEFAULT_BATCH_SIZE):
    """Reevaluate a single tag for a multiple nodes.

    Presumably this tag's expression has recently changed. Use `populate_tags`
    when many nodes need reevaluating AND there are rack controllers available
    to which to farm-out work. Use this only when many nodes need reevaluating
    locally, i.e. when there are no rack controllers connected.
    """
    # Same expression, multuple documents: compile expression with XPath.
    xpath = etree.XPath(tag.definition, namespaces=tag_nsmap)
    # The XML details documents can be large so work in batches.
    for batch in gen_batches(nodes, batch_size):
        probed_details = get_probed_details(batch)
        probed_details_docs_by_node = {
            node: merge_details(probed_details[node.system_id])
            for node in batch
        }
        nodes_matching, nodes_nonmatching = classify(
            partial(try_match_xpath, xpath, logger=maaslog),
            probed_details_docs_by_node.items(),
        )
        tag.node_set.remove(*nodes_nonmatching)
        tag.node_set.add(*nodes_matching) 
Example #7
Source File: xpath.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def match_xpath(xpath, doc):
    """Return a match of expression `xpath` against document `doc`.

    :type xpath: Either `unicode` or `etree.XPath`
    :type doc: Either `etree._ElementTree` or `etree.XPathDocumentEvaluator`

    :rtype: bool
    """
    is_xpath_compiled = is_compiled_xpath(xpath)
    is_doc_compiled = is_compiled_doc(doc)

    if is_xpath_compiled and is_doc_compiled:
        return doc(xpath.path)
    elif is_xpath_compiled:
        return xpath(doc)
    elif is_doc_compiled:
        return doc(xpath)
    else:
        return doc.xpath(xpath) 
Example #8
Source File: test_parsing.py    From bellybutton with MIT License 6 votes vote down vote up
def test_parse_rule():
    """Ensure parse_rule returns expected output."""
    expr = XPath("//Num")
    assert parse_rule(
        rule_name='',
        rule_values=dict(
            description='',
            expr=expr,
            example="a = 1",
            instead="a = int('1')",
            settings=Settings(included=[], excluded=[], allow_ignore=True),
        )
    ) == Rule(
        name='',
        description='',
        expr=expr,
        example="a = 1",
        instead="a = int('1')",
        settings=Settings(included=[], excluded=[], allow_ignore=True)
    ) 
Example #9
Source File: xpath.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def try_match_xpath(xpath, doc, logger=logging):
    """See if the XPath expression matches the given XML document.

    Invalid XPath expressions are logged, and are returned as a
    non-match.

    :type xpath: Either `unicode` or `etree.XPath`
    :type doc: Either `etree._ElementTree` or `etree.XPathDocumentEvaluator`

    :rtype: bool
    """
    try:
        # Evaluating an XPath expression against a document with LXML
        # can return a list or a string, and perhaps other types.
        # Casting the return value into a boolean context appears to
        # be the most reliable way of detecting a match.
        return bool(match_xpath(xpath, doc))
    except etree.XPathEvalError as error:
        # Get a plaintext version of `xpath`.
        expr = xpath.path if is_compiled_xpath(xpath) else xpath
        logger.warning("Invalid expression '%s': %s", expr, str(error))
        return False 
Example #10
Source File: test_xpath.py    From maas with GNU Affero General Public License v3.0 6 votes vote down vote up
def scenario(name, xpath, doc, expected_result, expected_log=""):
        """Return a scenario (for `testscenarios`) to test `try_match_xpath`.

        This is a convenience function to reduce the amount of
        boilerplate when constructing `scenarios_inputs` later on.

        The scenario it constructs defines an XML document, and XPath
        expression, the expectation as to whether it will match or
        not, and the expected log output.
        """
        doc = etree.fromstring(doc).getroottree()
        return (
            name,
            dict(
                xpath=xpath,
                doc=doc,
                expected_result=expected_result,
                expected_log=dedent(expected_log),
            ),
        )

    # Exercise try_match_xpath with a variety of different inputs. 
Example #11
Source File: test_tags.py    From maas with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_merges_into_new_tree(self):
        xml = self.do_merge_details(
            {
                "lshw": b"<list><foo>Hello</foo></list>",
                "lldp": b"<node><foo>Hello</foo></node>",
            }
        )
        # The presence of a getroot() method indicates that this is a
        # tree object, not an element.
        self.assertThat(xml, MatchesStructure(getroot=IsCallable()))
        # The list tag can be obtained using an XPath expression
        # starting from the root of the tree.
        self.assertSequenceEqual(
            ["list"], [elem.tag for elem in xml.xpath("/list")]
        ) 
Example #12
Source File: tags.py    From maas with GNU Affero General Public License v3.0 5 votes vote down vote up
def process_node_tags(
    rack_id,
    nodes,
    tag_name,
    tag_definition,
    tag_nsmap,
    client,
    batch_size=None,
):
    """Update the nodes for a new/changed tag definition.

    :param rack_id: System ID for the rack controller.
    :param nodes: List of nodes to process tags for.
    :param client: A `MAASClient` used to fetch the node's details via
        calls to the web API.
    :param tag_name: Name of the tag to update nodes for
    :param tag_definition: Tag definition
    :param batch_size: Size of batch
    """
    # We evaluate this early, so we can fail before sending a bunch of data to
    # the server
    xpath = etree.XPath(tag_definition, namespaces=tag_nsmap)
    system_ids = [node["system_id"] for node in nodes]
    process_all(
        client,
        rack_id,
        tag_name,
        tag_definition,
        system_ids,
        xpath,
        batch_size=batch_size,
    ) 
Example #13
Source File: xpath.py    From maas with GNU Affero General Public License v3.0 5 votes vote down vote up
def is_compiled_xpath(xpath):
    """Is `xpath` a compiled expression?"""
    return isinstance(xpath, etree.XPath) 
Example #14
Source File: test_xpath.py    From maas with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_logs_to_specified_logger(self):
        xpath = etree.XPath("/foo:bar")
        doc = etree.XML("<foo/>")
        root_logger = self.useFixture(FakeLogger())
        callers_logger = Mock()
        try_match_xpath(xpath, doc, callers_logger)
        self.assertEqual("", root_logger.output)
        self.assertThat(
            callers_logger.warning,
            MockCalledOnceWith(
                "Invalid expression '%s': %s",
                "/foo:bar",
                "Undefined namespace prefix",
            ),
        ) 
Example #15
Source File: svg2tikz.py    From svg2tikz with GNU Lesser General Public License v3.0 5 votes vote down vote up
def escape_text(self, txt):
        result = txt
        for k,v in TiKZMaker.escapes.items():
            result = result.replace(k,v)
        return result

    # get_all_text = etree.XPath('.//text()') 
Example #16
Source File: __init__.py    From maas with GNU Affero General Public License v3.0 5 votes vote down vote up
def clean_definition(self):
        definition = self.cleaned_data["definition"]
        if not definition:
            return ""
        try:
            etree.XPath(definition)
        except etree.XPathSyntaxError as e:
            raise ValidationError("Invalid xpath expression: %s" % (e,))
        return definition 
Example #17
Source File: tag.py    From maas with GNU Affero General Public License v3.0 5 votes vote down vote up
def clean_definition(self):
        if self.is_defined:
            try:
                etree.XPath(self.definition)
            except etree.XPathSyntaxError as e:
                msg = "Invalid XPath expression: %s" % (e,)
                raise ValidationError({"definition": [msg]}) 
Example #18
Source File: test_config_forms.py    From maas with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_DictCharWidget_renders_fieldset_with_label_and_field_names(self):
        names = [factory.make_string(), factory.make_string()]
        initials = []
        labels = [factory.make_string(), factory.make_string()]
        values = [factory.make_string(), factory.make_string()]
        widget = DictCharWidget(
            [widgets.TextInput, widgets.TextInput, widgets.CheckboxInput],
            names,
            initials,
            labels,
            skip_check=True,
        )
        name = factory.make_string()
        html_widget = fromstring(
            "<root>" + widget.render(name, values) + "</root>"
        )
        widget_names = XPath("fieldset/input/@name")(html_widget)
        widget_labels = XPath("fieldset/label/text()")(html_widget)
        widget_values = XPath("fieldset/input/@value")(html_widget)
        expected_names = [
            "%s_%s" % (name, widget_name) for widget_name in names
        ]
        self.assertEqual(
            [expected_names, labels, values],
            [widget_names, widget_labels, widget_values],
        ) 
Example #19
Source File: xmlclass.py    From OpenPLC_Editor with GNU General Public License v3.0 5 votes vote down vote up
def GenerateContentInfos(factory, name, choices):
    choices_dict = {}
    for choice_name, infos in choices:
        if choice_name == "sequence":
            for element in infos["elements"]:
                if element["type"] == CHOICE:
                    element["elmt_type"] = GenerateContentInfos(factory, name, ComputeContentChoices(factory, name, element))
                elif element["name"] in choices_dict:
                    raise ValueError("'%s' element defined two times in choice" % choice_name)
                else:
                    choices_dict[element["name"]] = infos
        else:
            if choice_name in choices_dict:
                raise ValueError("'%s' element defined two times in choice" % choice_name)
            choices_dict[choice_name] = infos
    prefix = ("%s:" % factory.TargetNamespace
              if factory.TargetNamespace is not None else "")
    choices_xpath = "|".join(map(lambda x: prefix + x, choices_dict.keys()))

    def GetContentInitial():
        content_name, infos = choices[0]
        if content_name == "sequence":
            content_value = []
            for dummy in xrange(infos["minOccurs"]):
                for element_infos in infos["elements"]:
                    content_value.extend(GetElementInitialValue(factory, element_infos))
        else:
            content_value = GetElementInitialValue(factory, infos)
        return content_value

    return {
        "type": COMPLEXTYPE,
        "choices_xpath": etree.XPath(choices_xpath, namespaces=factory.NSMAP),
        "initial": GetContentInitial,
    }

# -------------------------------------------------------------------------------
#                           Structure extraction functions
# ------------------------------------------------------------------------------- 
Example #20
Source File: etherlab.py    From OpenPLC_Editor with GNU General Public License v3.0 5 votes vote down vote up
def EtherCATInfo_XPath(xpath):
    return etree.XPath(xpath) 
Example #21
Source File: plcopen.py    From OpenPLC_Editor with GNU General Public License v3.0 5 votes vote down vote up
def PLCOpen_XPath(xpath):
    return etree.XPath(xpath, namespaces=PLCOpenParser.NSMAP) 
Example #22
Source File: samlauthenticator.py    From jupyterhub-samlauthenticator with MIT License 5 votes vote down vote up
def _get_roles_from_saml_etree(self, signed_xml):
        if self.xpath_role_location:
            xpath_with_namespaces = self._make_xpath_builder()
            xpath_fun = xpath_with_namespaces(self.xpath_role_location)
            xpath_result = xpath_fun(signed_xml)

            if xpath_result:
                return xpath_result

            self.log.warning('Could not find role from role XPath')
        else:
            self.log.warning('Role XPath not set')

        return [] 
Example #23
Source File: request_splitter.py    From openSUSE-release-tools with GNU General Public License v2.0 5 votes vote down vote up
def group_by(self, xpath, required=False):
        self.groups.append(ET.XPath(xpath))
        if required:
            self.filter_add(xpath) 
Example #24
Source File: request_splitter.py    From openSUSE-release-tools with GNU General Public License v2.0 5 votes vote down vote up
def filter_add(self, xpath):
        self.filters.append(ET.XPath(xpath)) 
Example #25
Source File: request_splitter.py    From openSUSE-release-tools with GNU General Public License v2.0 5 votes vote down vote up
def group_by(self, xpath, required=False):
        self.groups.append(ET.XPath(xpath))
        if required:
            self.filter_add(xpath) 
Example #26
Source File: dumpgenerator.py    From wikiteam with GNU General Public License v3.0 5 votes vote down vote up
def makeXmlPageFromRaw(xml):
    """ Discard the metadata around a <page> element in <mediawiki> string"""
    root = etree.XML(xml)
    find = etree.XPath("//*[local-name() = 'page']")
    # The tag will inherit the namespace, like:
    # <page xmlns="http://www.mediawiki.org/xml/export-0.10/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    # FIXME: pretty_print doesn't seem to work, only adds a newline
    return etree.tostring(find(root)[0], pretty_print=True) 
Example #27
Source File: naive_util.py    From Ossian with Apache License 2.0 5 votes vote down vote up
def final_attribute_name(xpath):
    """
    Find the final text element of an xpath which we will assume is the name
    of an attribute.    
    
    TODO: find a better and less error-prone way to do this!
    """
    if type(xpath) == XPath: ## in case compiled:
        pathstring = xpath.path
    else:
        pathstring = xpath
    fragments = re.split("[/:@\(\)]+", pathstring)  
    return fragments[-1] 
Example #28
Source File: samlauthenticator.py    From jupyterhub-samlauthenticator with MIT License 5 votes vote down vote up
def _make_xpath_builder(self):
        namespaces = {
            'ds'   : 'http://www.w3.org/2000/09/xmldsig#',
            'md'   : 'urn:oasis:names:tc:SAML:2.0:metadata',
            'saml' : 'urn:oasis:names:tc:SAML:2.0:assertion',
            'samlp': 'urn:oasis:names:tc:SAML:2.0:protocol'
        }

        def xpath_with_namespaces(xpath_str):
            return etree.XPath(xpath_str, namespaces=namespaces)

        return xpath_with_namespaces 
Example #29
Source File: samlauthenticator.py    From jupyterhub-samlauthenticator with MIT License 5 votes vote down vote up
def _get_username_from_saml_etree(self, signed_xml):
        xpath_with_namespaces = self._make_xpath_builder()

        xpath_fun = xpath_with_namespaces(self.xpath_username_location)
        xpath_result = xpath_fun(signed_xml)

        if isinstance(xpath_result, etree._ElementUnicodeResult):
            return xpath_result
        if type(xpath_result) is list and len(xpath_result) > 0:
            return xpath_result[0]

        self.log.warning('Could not find name from name XPath')
        return None 
Example #30
Source File: html.py    From capybara.py with MIT License 5 votes vote down vote up
def css(self, css):
        return etree.XPath(HTMLTranslator().css_to_xpath(css))(self.tree)