Python lxml.etree.Comment() Examples
The following are 30
code examples of lxml.etree.Comment().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: reference.py From pyfixmsg with Apache License 2.0 | 6 votes |
def _extract_composition(element, spec): """ Parse XML spec to extract the composition of a nested structure (Component, Group or MsgType) """ returned = [] for elem in list(element): if elem.tag == "field": returned.append((spec.tags.by_name(elem.get('name')), elem.get('required') == "Y")) elif elem.tag == 'component': returned.append((Component(elem, spec), elem.get('required') == "Y")) elif elem.tag == 'group': returned.append((Group.from_element(elem, spec), elem.get('required') == "Y")) elif (parse.__module__ == 'lxml.etree') and (elem.tag == Comment): pass else: raise ValueError("Could not process element '{}'".format(elem.tag)) return returned
Example #2
Source File: process_includes.py From nyoka with Apache License 2.0 | 6 votes |
def collect_inserts_aux(child, params, inserts, options): roots = [] save_base_url = params.base_url string_content = resolve_ref(child, params, options) if string_content is not None: root = etree.fromstring(string_content, base_url=params.base_url) roots.append(root) for child1 in root: if not isinstance(child1, etree._Comment): namespace = child1.nsmap[child1.prefix] if (child1.tag != '{%s}include' % (namespace, ) and child1.tag != '{%s' % (namespace, )): comment = etree.Comment(etree.tostring(child)) comment.tail = '\n' inserts.append(comment) inserts.append(child1) insert_roots = collect_inserts(root, params, inserts, options) roots.extend(insert_roots) params.base_url = save_base_url return roots
Example #3
Source File: etree_lxml.py From stopstalk-deployment with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #4
Source File: _html5builder.py From learn_python3_spider with MIT License | 5 votes |
def __init__(self, *args, **kwargs): html_builder = etree_builders.getETreeModule(html, fullTree=False) etree_builder = etree_builders.getETreeModule(etree, fullTree=False) self.elementClass = html_builder.Element self.commentClass = etree_builder.Comment _base.TreeBuilder.__init__(self, *args, **kwargs)
Example #5
Source File: _html5builder.py From stopstalk-deployment with MIT License | 5 votes |
def __init__(self, *args, **kwargs): html_builder = etree_builders.getETreeModule(html, fullTree=False) etree_builder = etree_builders.getETreeModule(etree, fullTree=False) self.elementClass = html_builder.Element self.commentClass = etree_builder.Comment _base.TreeBuilder.__init__(self, *args, **kwargs)
Example #6
Source File: clean.py From learn_python3_spider with MIT License | 5 votes |
def kill_conditional_comments(self, doc): """ IE conditional comments basically embed HTML that the parser doesn't normally see. We can't allow anything like that, so we'll kill any comments that could be conditional. """ bad = [] self._kill_elements( doc, lambda el: _conditional_comment_re.search(el.text), etree.Comment)
Example #7
Source File: markup.py From ChemDataExtractor with MIT License | 5 votes |
def _parse_element_r(self, el, specials, refs, id=None, element_cls=Paragraph): """Recursively parse HTML/XML element and its children into a list of Document elements.""" elements = [] if el.tag in {etree.Comment, etree.ProcessingInstruction}: return [] # if el in refs: # return [element_cls('', references=refs[el])] if el in specials: return specials[el] id = el.get('id', id) references = refs.get(el, []) if el.text is not None: elements.append(element_cls(six.text_type(el.text), id=id, references=references)) elif references: elements.append(element_cls('', id=id, references=references)) for child in el: # br is a special case - technically inline, but we want to split if child.tag not in {etree.Comment, etree.ProcessingInstruction} and child.tag.lower() == 'br': elements.append(element_cls('')) child_elements = self._parse_element_r(child, specials=specials, refs=refs, id=id, element_cls=element_cls) if (self._is_inline(child) and len(elements) > 0 and len(child_elements) > 0 and isinstance(elements[-1], (Text, Sentence)) and isinstance(child_elements[0], (Text, Sentence)) and type(elements[-1]) == type(child_elements[0])): elements[-1] += child_elements.pop(0) elements.extend(child_elements) if child.tail is not None: if self._is_inline(child) and len(elements) > 0 and isinstance(elements[-1], element_cls): elements[-1] += element_cls(six.text_type(child.tail), id=id) else: elements.append(element_cls(six.text_type(child.tail), id=id)) return elements
Example #8
Source File: markup.py From ChemDataExtractor with MIT License | 5 votes |
def _is_inline(self, element): """Return True if an element is inline.""" if element.tag not in {etree.Comment, etree.ProcessingInstruction} and element.tag.lower() in self.inline_elements: return True return False
Example #9
Source File: etree_lxml.py From python with Apache License 2.0 | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #10
Source File: lxmletree.py From Flask-P2P with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return _base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (_base.DOCUMENT,) elif isinstance(node, Doctype): return _base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return _base.TEXT, node.obj elif node.tag == etree.Comment: return _base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #11
Source File: pcbdraw.py From PcbDraw with MIT License | 5 votes |
def component_from_library(lib, name, value, ref, pos, comp, highlight, silent): if not name: return if comp["filter"] is not None and ref not in comp["filter"]: return f = get_model_file(comp["libraries"], lib, name, ref, comp["remapping"]) if not f: if not silent: print("Warning: component '{}' for footprint '{}' from library '{}' was not found".format(name, ref, lib)) if comp["placeholder"]: etree.SubElement(comp["container"], "rect", x=str(ki2dmil(pos[0]) - 150), y=str(ki2dmil(pos[1]) - 150), width="300", height="300", style="fill:red;") return comp["container"].append(etree.Comment("{}:{}".format(lib, name))) r = etree.SubElement(comp["container"], "g") svg_tree = read_svg_unique(f) for x in extract_svg_content(svg_tree): r.append(x) origin_x = 0 origin_y = 0 origin = r.find(".//*[@id='origin']") if origin is not None: origin_x, origin_y = element_position(origin, root=r) origin.getparent().remove(origin) else: print("Warning: component '{}' from library '{}' has no ORIGIN".format(name, lib)) r.attrib["transform"] = "translate({} {}) scale(393.700787402) rotate({}) translate({}, {})".format( ki2dmil(pos[0]), ki2dmil(pos[1]), -math.degrees(pos[2]), -origin_x, -origin_y) if ref in highlight["items"]: if "width" in svg_tree.attrib and "height" in svg_tree.attrib: w = to_user_units(svg_tree.attrib["width"]) h = to_user_units(svg_tree.attrib["height"]) build_highlight(highlight, w, h, pos, (origin_x, origin_y), ref) elif "viewBox" in svg_tree.attrib: viewbox = re.split(" |,", svg_tree.attrib["viewBox"]) w = to_user_units(viewbox[2]) h = to_user_units(viewbox[3]) build_highlight(highlight, w, h, pos, (origin_x, origin_y), ref) else: print("Warning: component '{}' from library '{}' has no viewBox. Cannot highlight".format(name, lib))
Example #12
Source File: etree_lxml.py From Weapon-Detection-And-Classification with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #13
Source File: etree_lxml.py From planespotter with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #14
Source File: lxmletree.py From MARA_Framework with GNU Lesser General Public License v3.0 | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return _base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (_base.DOCUMENT,) elif isinstance(node, Doctype): return _base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return _base.TEXT, node.obj elif node.tag == etree.Comment: return _base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #15
Source File: lxmletree.py From Financial-Portfolio-Flask with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return _base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (_base.DOCUMENT,) elif isinstance(node, Doctype): return _base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return _base.TEXT, node.obj elif node.tag == etree.Comment: return _base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #16
Source File: etree_lxml.py From learn_python3_spider with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #17
Source File: etree_lxml.py From scylla with Apache License 2.0 | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #18
Source File: clean.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def kill_conditional_comments(self, doc): """ IE conditional comments basically embed HTML that the parser doesn't normally see. We can't allow anything like that, so we'll kill any comments that could be conditional. """ bad = [] self._kill_elements( doc, lambda el: _conditional_comment_re.search(el.text), etree.Comment)
Example #19
Source File: _html5builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def insertRoot(self, name): buf = [] if self.doctype and self.doctype.name: buf.append('<!DOCTYPE %s' % self.doctype.name) if self.doctype.publicId is not None or self.doctype.systemId is not None: buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId, self.doctype.systemId)) buf.append('>') buf.append('<html></html>') root = html.fromstring(''.join(buf)) # Append the initial comments: for comment in self.initialComments: root.addprevious(etree.Comment(comment)) # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() # Add the root element to the internal child/open data structures root_element = self.elementClass(name) root_element._element = root self.document.childNodes.append(root_element) self.openElements.append(root_element) self.rootInserted = True
Example #20
Source File: _html5builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def __init__(self, *args, **kwargs): html_builder = etree_builders.getETreeModule(html, fullTree=False) etree_builder = etree_builders.getETreeModule(etree, fullTree=False) self.elementClass = html_builder.Element self.commentClass = etree_builder.Comment _base.TreeBuilder.__init__(self, *args, **kwargs)
Example #21
Source File: _html5builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def insertRoot(self, name): buf = [] if self.doctype and self.doctype.name: buf.append('<!DOCTYPE %s' % self.doctype.name) if self.doctype.publicId is not None or self.doctype.systemId is not None: buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId, self.doctype.systemId)) buf.append('>') buf.append('<html></html>') root = html.fromstring(''.join(buf)) # Append the initial comments: for comment in self.initialComments: root.addprevious(etree.Comment(comment)) # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() # Add the root element to the internal child/open data structures root_element = self.elementClass(name) root_element._element = root self.document.childNodes.append(root_element) self.openElements.append(root_element) self.rootInserted = True
Example #22
Source File: _html5builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def __init__(self, *args, **kwargs): html_builder = etree_builders.getETreeModule(html, fullTree=False) etree_builder = etree_builders.getETreeModule(etree, fullTree=False) self.elementClass = html_builder.Element self.commentClass = etree_builder.Comment _base.TreeBuilder.__init__(self, *args, **kwargs)
Example #23
Source File: clean.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def kill_conditional_comments(self, doc): """ IE conditional comments basically embed HTML that the parser doesn't normally see. We can't allow anything like that, so we'll kill any comments that could be conditional. """ bad = [] self._kill_elements( doc, lambda el: _conditional_comment_re.search(el.text), etree.Comment)
Example #24
Source File: _html5builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def insertRoot(self, name): buf = [] if self.doctype and self.doctype.name: buf.append('<!DOCTYPE %s' % self.doctype.name) if self.doctype.publicId is not None or self.doctype.systemId is not None: buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId, self.doctype.systemId)) buf.append('>') buf.append('<html></html>') root = html.fromstring(''.join(buf)) # Append the initial comments: for comment in self.initialComments: root.addprevious(etree.Comment(comment)) # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() # Add the root element to the internal child/open data structures root_element = self.elementClass(name) root_element._element = root self.document.childNodes.append(root_element) self.openElements.append(root_element) self.rootInserted = True
Example #25
Source File: clean.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def kill_conditional_comments(self, doc): """ IE conditional comments basically embed HTML that the parser doesn't normally see. We can't allow anything like that, so we'll kill any comments that could be conditional. """ bad = [] self._kill_elements( doc, lambda el: _conditional_comment_re.search(el.text), etree.Comment)
Example #26
Source File: _html5builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def insertRoot(self, name): buf = [] if self.doctype and self.doctype.name: buf.append('<!DOCTYPE %s' % self.doctype.name) if self.doctype.publicId is not None or self.doctype.systemId is not None: buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId, self.doctype.systemId)) buf.append('>') buf.append('<html></html>') root = html.fromstring(''.join(buf)) # Append the initial comments: for comment in self.initialComments: root.addprevious(etree.Comment(comment)) # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() # Add the root element to the internal child/open data structures root_element = self.elementClass(name) root_element._element = root self.document.childNodes.append(root_element) self.openElements.append(root_element) self.rootInserted = True
Example #27
Source File: _html5builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def __init__(self, *args, **kwargs): html_builder = etree_builders.getETreeModule(html, fullTree=False) etree_builder = etree_builders.getETreeModule(etree, fullTree=False) self.elementClass = html_builder.Element self.commentClass = etree_builder.Comment _base.TreeBuilder.__init__(self, *args, **kwargs)
Example #28
Source File: etree_lxml.py From Building-Recommendation-Systems-with-Python with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #29
Source File: etree_lxml.py From pySINDy with MIT License | 5 votes |
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
Example #30
Source File: diff.py From xmldiff with MIT License | 5 votes |
def node_ratio(self, left, right): if left.tag is etree.Comment or right.tag is etree.Comment: if left.tag is etree.Comment and right.tag is etree.Comment: # comments self._sequencematcher.set_seqs(left.text, right.text) return self._sequence_ratio() # One is a comment the other is not: return 0 for attr in self.uniqueattrs: if not isinstance(attr, str): # If it's actually a sequence of (tag, attr), the tags must # match first. tag, attr = attr if tag != left.tag or tag != right.tag: continue if attr in left.attrib or attr in right.attrib: # One of the nodes have a unique attribute, we check only that. # If only one node has it, it means they are not the same. return int(left.attrib.get(attr) == right.attrib.get(attr)) match = self.leaf_ratio(left, right) child_ratio = self.child_ratio(left, right) if child_ratio is not None: match = (match + child_ratio) / 2 return match