Python lxml.etree.iselement() Examples
The following are 30
code examples of lxml.etree.iselement().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: diff.py From stopstalk-deployment with MIT License | 6 votes |
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
Example #2
Source File: diff.py From lambda-text-extractor with Apache License 2.0 | 6 votes |
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
Example #3
Source File: diff.py From lambda-text-extractor with Apache License 2.0 | 6 votes |
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
Example #4
Source File: service.py From gdata-python3 with Apache License 2.0 | 6 votes |
def CalculateDataLength(data): """Attempts to determine the length of the data to send. This method will respond with a length only if the data is a string or and ElementTree element. Args: data: object If this is not a string or ElementTree element this funtion will return None. """ if isinstance(data, str): return len(data) elif isinstance(data, list): return None elif ElementTree.iselement(data): return len(ElementTree.tostring(data)) elif hasattr(data, 'read'): # If this is a file-like object, don't try to guess the length. return None else: return len(str(data))
Example #5
Source File: service.py From gdata-python3 with Apache License 2.0 | 6 votes |
def __SendDataPart(data, connection): """This method is deprecated, use atom.http._send_data_part""" deprecated('call to deprecated function __SendDataPart') if isinstance(data, str): # TODO add handling for unicode. connection.send(data) return elif ElementTree.iselement(data): connection.send(ElementTree.tostring(data)) return # Check to see if data is a file-like object that has a read method. elif hasattr(data, 'read'): # Read the file and send it a chunk at a time. while 1: binarydata = data.read(100000) if binarydata == '': break connection.send(binarydata) return else: # The data object was not a file. # Try to convert to a string and send the data. connection.send(str(data)) return
Example #6
Source File: __init__.py From gdata-python3 with Apache License 2.0 | 6 votes |
def SetXmlBlob(self, blob): """Sets the contents of the extendedProperty to XML as a child node. Since the extendedProperty is only allowed one child element as an XML blob, setting the XML blob will erase any preexisting extension elements in this object. Args: blob: str, ElementTree Element or atom.ExtensionElement representing the XML blob stored in the extendedProperty. """ # Erase any existing extension_elements, clears the child nodes from the # extendedProperty. self.extension_elements = [] if isinstance(blob, atom.ExtensionElement): self.extension_elements.append(blob) elif ElementTree.iselement(blob): self.extension_elements.append(atom._ExtensionElementFromElementTree( blob)) else: self.extension_elements.append(atom.ExtensionElementFromString(blob))
Example #7
Source File: client.py From pyvas with MIT License | 6 votes |
def _send_request(self, request): """Send XML data to OpenVAS Manager and get results""" block_size = 1024 if etree.iselement(request): root = etree.ElementTree(request) root.write(self.socket, encoding="utf-8") else: if isinstance(request, six.text_type): request = request.encode("utf-8") self.socket.send(request) parser = etree.XMLTreeBuilder() while True: response = self.socket.recv(block_size) parser.feed(response) if len(response) < block_size: break root = parser.close() return root
Example #8
Source File: xml.py From peach with Mozilla Public License 2.0 | 6 votes |
def handleElement(self, node, parent): """ Handle an XML element, children and attributes. Returns an XmlElement object. """ doc = node.getroottree() # Element element = XmlElement(None, parent) ns, tag = split_ns(node.tag) if ns is not None: element.xmlNamespace = ns element.elementName = tag # Element attributes for attrib in node.keys(): attribElement = self.handleAttribute(attrib, node.get(attrib), element) element.append(attribElement) # Element children self._handleText(node.text, element) for child in node.iterchildren(): if etree.iselement(child): # TODO: skip comments childElement = self.handleElement(child, element) element.append(childElement) self._handleText(child.tail, element) return element
Example #9
Source File: xml.py From peach with Mozilla Public License 2.0 | 6 votes |
def handleElement(self, node, parent): """ Handle an XML element, children and attributes. Returns an XmlElement object. """ if parent is None: return None # Element element = etree.Element("XmlElement") ns, tag = split_ns(node.tag) element.set("elementName", tag) if ns is not None: element.set("ns", ns) parent.append(element) # Element attributes for attrib in node.keys(): attribElement = self.handleAttribute(attrib, node.get(attrib), element) element.append(attribElement) # Element children self._handleText(node.text, element) for child in node.iterchildren(): if etree.iselement(child): # TODO: skip comments self.handleElement(child, element) self._handleText(child.tail, element) return element
Example #10
Source File: xml.py From python-gvm with GNU General Public License v3.0 | 6 votes |
def pretty_print(xml): """Prints beautiful XML-Code This function gets a string containing the xml, an object of List[lxml.etree.Element] or directly a lxml element. Print it with good readable format. Arguments: xml (str, List[lxml.etree.Element] or lxml.etree.Element): xml as string, List[lxml.etree.Element] or directly a lxml element. """ if isinstance(xml, list): for item in xml: if etree.iselement(item): print(etree.tostring(item, pretty_print=True).decode("utf-8")) else: print(item) elif etree.iselement(xml): print(etree.tostring(xml, pretty_print=True).decode("utf-8")) elif isinstance(xml, str): tree = secET.fromstring(xml) print(etree.tostring(tree, pretty_print=True).decode("utf-8"))
Example #11
Source File: diff.py From learn_python3_spider with MIT License | 6 votes |
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
Example #12
Source File: diff.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
Example #13
Source File: diff.py From xmldiff with MIT License | 6 votes |
def set_trees(self, left, right): self.clear() # Make sure we were passed two lxml elements: if isinstance(left, etree._ElementTree): left = left.getroot() if isinstance(right, etree._ElementTree): right = right.getroot() if not (etree.iselement(left) and etree.iselement(right)): raise TypeError("The 'left' and 'right' parameters must be " "lxml Elements.") # Left gets modified as a part of the diff, deepcopy it first. self.left = deepcopy(left) self.right = right
Example #14
Source File: diff.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
Example #15
Source File: diff.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def tokenize(html, include_hrefs=True): """ Parse the given HTML and returns token objects (words with attached tags). This parses only the content of a page; anything in the head is ignored, and the <head> and <body> elements are themselves optional. The content is then parsed by lxml, which ensures the validity of the resulting parsed document (though lxml may make incorrect guesses when the markup is particular bad). <ins> and <del> tags are also eliminated from the document, as that gets confusing. If include_hrefs is true, then the href attribute of <a> tags is included as a special kind of diffable token.""" if etree.iselement(html): body_el = html else: body_el = parse_html(html, cleanup=True) # Then we split the document into text chunks for each tag, word, and end tag: chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) # Finally re-joining them into token objects: return fixup_chunks(chunks)
Example #16
Source File: test_response.py From pyvas with MIT License | 5 votes |
def test_response_init(response): # attributes assert response.ok assert response.status_code is 200 assert response.command == "test" assert iselement(response.xml) # data dict elements assert response["@test_id"] == "1234" assert response["child"]["@id"] == "1234"
Example #17
Source File: test_etree_check_command_transform.py From python-gvm with GNU General Public License v3.0 | 5 votes |
def test_success_response(self): transform = EtreeCheckCommandTransform() root = etree.Element('foo_response') root.set('status', '200') response = etree.tostring(root).decode('utf-8') result = transform(response) self.assertTrue(etree.iselement(result)) self.assertEqual(result.tag, 'foo_response') self.assertEqual(result.get('status'), '200')
Example #18
Source File: builder.py From lambda-text-extractor with Apache License 2.0 | 5 votes |
def __call__(self, tag, *children, **attrib): typemap = self._typemap if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: typemap[dict](elem, attrib) for item in children: if callable(item): item = item() t = typemap.get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = typemap.get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: typemap.get(type(v))(elem, v) return elem
Example #19
Source File: test_etree_transform.py From python-gvm with GNU General Public License v3.0 | 5 votes |
def test_transform_response(self): transform = EtreeTransform() result = transform('<foo/') self.assertTrue(etree.iselement(result))
Example #20
Source File: builder.py From lambda-text-extractor with Apache License 2.0 | 5 votes |
def __call__(self, tag, *children, **attrib): typemap = self._typemap if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: typemap[dict](elem, attrib) for item in children: if callable(item): item = item() t = typemap.get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = typemap.get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: typemap.get(type(v))(elem, v) return elem
Example #21
Source File: test_etree_transform.py From python-gvm with GNU General Public License v3.0 | 5 votes |
def test_transform_more_complex_response(self): transform = EtreeTransform() result = transform('<foo id="bar"><lorem/><ipsum/></foo>') self.assertTrue(etree.iselement(result)) self.assertEqual(result.tag, 'foo') self.assertEqual(result.get('id'), 'bar') self.assertEqual(len(result), 2)
Example #22
Source File: builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def __call__(self, tag, *children, **attrib): get = self._typemap.get if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: get(dict)(elem, attrib) for item in children: if callable(item): item = item() t = get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: get(type(v))(elem, v) return elem
Example #23
Source File: test_client.py From pyvas with MIT License | 5 votes |
def test_download_report_with_xml_format(self, client, report): response = client.download_report(uuid=report["@id"]) assert etree.iselement(response) assert response.attrib["id"] == report["@id"]
Example #24
Source File: test_client.py From pyvas with MIT License | 5 votes |
def test_client_send_request(client): response = client._send_request("<describe_auth/>") assert etree.iselement(response)
Example #25
Source File: parser.py From peach with Mozilla Public License 2.0 | 5 votes |
def StripComments(self, node): i = 0 while i < len(node): if not etree.iselement(node[i]): del node[i] # may not preserve text, don't care else: self.StripComments(node[i]) i += 1
Example #26
Source File: builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def __call__(self, tag, *children, **attrib): get = self._typemap.get if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: get(dict)(elem, attrib) for item in children: if callable(item): item = item() t = get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: get(type(v))(elem, v) return elem
Example #27
Source File: builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def __call__(self, tag, *children, **attrib): get = self._typemap.get if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: get(dict)(elem, attrib) for item in children: if callable(item): item = item() t = get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: get(type(v))(elem, v) return elem
Example #28
Source File: builder.py From stopstalk-deployment with MIT License | 5 votes |
def __call__(self, tag, *children, **attrib): get = self._typemap.get if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: get(dict)(elem, attrib) for item in children: if callable(item): item = item() t = get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: get(type(v))(elem, v) return elem
Example #29
Source File: builder.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def __call__(self, tag, *children, **attrib): get = self._typemap.get if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: get(dict)(elem, attrib) for item in children: if callable(item): item = item() t = get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: get(type(v))(elem, v) return elem
Example #30
Source File: builder.py From learn_python3_spider with MIT License | 5 votes |
def __call__(self, tag, *children, **attrib): typemap = self._typemap if self._namespace is not None and tag[0] != '{': tag = self._namespace + tag elem = self._makeelement(tag, nsmap=self._nsmap) if attrib: typemap[dict](elem, attrib) for item in children: if callable(item): item = item() t = typemap.get(type(item)) if t is None: if ET.iselement(item): elem.append(item) continue for basetype in type(item).__mro__: # See if the typemap knows of any of this type's bases. t = typemap.get(basetype) if t is not None: break else: raise TypeError("bad argument type: %s(%r)" % (type(item).__name__, item)) v = t(elem, item) if v: typemap.get(type(v))(elem, v) return elem