Python lxml.etree.ProcessingInstruction() Examples
The following are 23
code examples of lxml.etree.ProcessingInstruction().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: sax.py From learn_python3_spider with MIT License | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #2
Source File: sax.py From lambda-text-extractor with Apache License 2.0 | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #3
Source File: sax.py From lambda-text-extractor with Apache License 2.0 | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #4
Source File: sax.py From stopstalk-deployment with MIT License | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #5
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #6
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #7
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #8
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
Example #9
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #10
Source File: sax.py From lambda-text-extractor with Apache License 2.0 | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #11
Source File: sax.py From lambda-text-extractor with Apache License 2.0 | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #12
Source File: markup.py From ChemDataExtractor with MIT License | 5 votes |
def _is_inline(self, element): """Return True if an element is inline.""" if element.tag not in {etree.Comment, etree.ProcessingInstruction} and element.tag.lower() in self.inline_elements: return True return False
Example #13
Source File: markup.py From ChemDataExtractor with MIT License | 5 votes |
def _parse_element_r(self, el, specials, refs, id=None, element_cls=Paragraph): """Recursively parse HTML/XML element and its children into a list of Document elements.""" elements = [] if el.tag in {etree.Comment, etree.ProcessingInstruction}: return [] # if el in refs: # return [element_cls('', references=refs[el])] if el in specials: return specials[el] id = el.get('id', id) references = refs.get(el, []) if el.text is not None: elements.append(element_cls(six.text_type(el.text), id=id, references=references)) elif references: elements.append(element_cls('', id=id, references=references)) for child in el: # br is a special case - technically inline, but we want to split if child.tag not in {etree.Comment, etree.ProcessingInstruction} and child.tag.lower() == 'br': elements.append(element_cls('')) child_elements = self._parse_element_r(child, specials=specials, refs=refs, id=id, element_cls=element_cls) if (self._is_inline(child) and len(elements) > 0 and len(child_elements) > 0 and isinstance(elements[-1], (Text, Sentence)) and isinstance(child_elements[0], (Text, Sentence)) and type(elements[-1]) == type(child_elements[0])): elements[-1] += child_elements.pop(0) elements.extend(child_elements) if child.tail is not None: if self._is_inline(child) and len(elements) > 0 and isinstance(elements[-1], element_cls): elements[-1] += element_cls(six.text_type(child.tail), id=id) else: elements.append(element_cls(six.text_type(child.tail), id=id)) return elements
Example #14
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #15
Source File: sax.py From stopstalk-deployment with MIT License | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #16
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #17
Source File: sax.py From learn_python3_spider with MIT License | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #18
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 5 votes |
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
Example #19
Source File: sax.py From stopstalk-deployment with MIT License | 4 votes |
def _recursive_saxify(self, element, prefixes): content_handler = self._content_handler tag = element.tag if tag is Comment or tag is ProcessingInstruction: if tag is ProcessingInstruction: content_handler.processingInstruction( element.target, element.text) if element.tail: content_handler.characters(element.tail) return new_prefixes = [] build_qname = self._build_qname attribs = element.items() if attribs: attr_values = {} attr_qnames = {} for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(tag) qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) for prefix, uri in new_prefixes: content_handler.startPrefixMapping(prefix, uri) content_handler.startElementNS((ns_uri, local_name), qname, sax_attributes) if element.text: content_handler.characters(element.text) for child in element: self._recursive_saxify(child, prefixes) content_handler.endElementNS((ns_uri, local_name), qname) for prefix, uri in new_prefixes: content_handler.endPrefixMapping(prefix) if element.tail: content_handler.characters(element.tail)
Example #20
Source File: sax.py From learn_python3_spider with MIT License | 4 votes |
def _recursive_saxify(self, element, parent_nsmap): content_handler = self._content_handler tag = element.tag if tag is Comment or tag is ProcessingInstruction: if tag is ProcessingInstruction: content_handler.processingInstruction( element.target, element.text) tail = element.tail if tail: content_handler.characters(tail) return element_nsmap = element.nsmap new_prefixes = [] if element_nsmap != parent_nsmap: # There have been updates to the namespace for prefix, ns_uri in element_nsmap.items(): if parent_nsmap.get(prefix) != ns_uri: new_prefixes.append( (prefix, ns_uri) ) attribs = element.items() if attribs: attr_values = {} attr_qnames = {} for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value attr_qnames[attr_ns_tuple] = self._build_qname( attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap, preferred_prefix=None, is_attribute=True) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(tag) qname = self._build_qname( ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False) for prefix, uri in new_prefixes: content_handler.startPrefixMapping(prefix, uri) content_handler.startElementNS( (ns_uri, local_name), qname, sax_attributes) text = element.text if text: content_handler.characters(text) for child in element: self._recursive_saxify(child, element_nsmap) content_handler.endElementNS((ns_uri, local_name), qname) for prefix, uri in new_prefixes: content_handler.endPrefixMapping(prefix) tail = element.tail if tail: content_handler.characters(tail)
Example #21
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 4 votes |
def _recursive_saxify(self, element, prefixes): content_handler = self._content_handler tag = element.tag if tag is Comment or tag is ProcessingInstruction: if tag is ProcessingInstruction: content_handler.processingInstruction( element.target, element.text) if element.tail: content_handler.characters(element.tail) return new_prefixes = [] build_qname = self._build_qname attribs = element.items() if attribs: attr_values = {} attr_qnames = {} for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(tag) qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) for prefix, uri in new_prefixes: content_handler.startPrefixMapping(prefix, uri) content_handler.startElementNS((ns_uri, local_name), qname, sax_attributes) if element.text: content_handler.characters(element.text) for child in element: self._recursive_saxify(child, prefixes) content_handler.endElementNS((ns_uri, local_name), qname) for prefix, uri in new_prefixes: content_handler.endPrefixMapping(prefix) if element.tail: content_handler.characters(element.tail)
Example #22
Source File: sax.py From lambda-text-extractor with Apache License 2.0 | 4 votes |
def _recursive_saxify(self, element, prefixes): content_handler = self._content_handler tag = element.tag if tag is Comment or tag is ProcessingInstruction: if tag is ProcessingInstruction: content_handler.processingInstruction( element.target, element.text) if element.tail: content_handler.characters(element.tail) return new_prefixes = [] build_qname = self._build_qname attribs = element.items() if attribs: attr_values = {} attr_qnames = {} for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(tag) qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) for prefix, uri in new_prefixes: content_handler.startPrefixMapping(prefix, uri) content_handler.startElementNS((ns_uri, local_name), qname, sax_attributes) if element.text: content_handler.characters(element.text) for child in element: self._recursive_saxify(child, prefixes) content_handler.endElementNS((ns_uri, local_name), qname) for prefix, uri in new_prefixes: content_handler.endPrefixMapping(prefix) if element.tail: content_handler.characters(element.tail)
Example #23
Source File: sax.py From aws-lambda-lxml with GNU General Public License v3.0 | 4 votes |
def _recursive_saxify(self, element, prefixes): content_handler = self._content_handler tag = element.tag if tag is Comment or tag is ProcessingInstruction: if tag is ProcessingInstruction: content_handler.processingInstruction( element.target, element.text) if element.tail: content_handler.characters(element.tail) return new_prefixes = [] build_qname = self._build_qname attribs = element.items() if attribs: attr_values = {} attr_qnames = {} for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(tag) qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) for prefix, uri in new_prefixes: content_handler.startPrefixMapping(prefix, uri) content_handler.startElementNS((ns_uri, local_name), qname, sax_attributes) if element.text: content_handler.characters(element.text) for child in element: self._recursive_saxify(child, prefixes) content_handler.endElementNS((ns_uri, local_name), qname) for prefix, uri in new_prefixes: content_handler.endPrefixMapping(prefix) if element.tail: content_handler.characters(element.tail)