Python lxml.etree.tounicode() Examples
The following are 27
code examples of lxml.etree.tounicode().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: test_formatting.py From xmldiff with MIT License | 6 votes |
def test_do_undo_element_double_format(self): replacer = formatting.PlaceholderMaker(['p'], ['b', 'u']) # Formatting tags get replaced, and the content remains text = u'<p>This is <u>doubly <b>formatted</b></u> text.</p>' element = etree.fromstring(text) replacer.do_element(element) self.assertEqual( element.text, u'This is \ue006doubly \ue008formatted\ue007' u'\ue005 text.') replacer.undo_element(element) result = etree.tounicode(element) self.assertEqual(result, text)
Example #2
Source File: main.py From python-examples with MIT License | 6 votes |
def execute(url): html = etree.parse(url) #print(etree.tounicode(html)) root = html.getroot() #print(root) for tag in root: #print('tag:', tag.tag) #for subtag in tag: # print('subtag:', subtag.tag, '=', subtag.text) if tag.tag == 'pozycja': print( [subtag.text for subtag in tag if tag.tag == 'pozycja'] ) #print('-----')
Example #3
Source File: base.py From PySIGNFe with GNU Lesser General Public License v2.1 | 6 votes |
def _le_xml(self, arquivo): if arquivo is None: return False if not isinstance(arquivo, basestring): arquivo = etree.tounicode(arquivo) if arquivo is not None: if isinstance(arquivo, basestring): if NAMESPACE_NFSE in arquivo: arquivo = por_acentos(arquivo) if u'<' in arquivo: self._xml = etree.fromstring(tira_abertura(arquivo)) else: arq = open(arquivo) txt = ''.join(arq.readlines()) txt = tira_abertura(txt) arq.close() self._xml = etree.fromstring(txt) else: self._xml = etree.parse(arquivo) return True return False
Example #4
Source File: tests.py From yournextrepresentative with GNU Affero General Public License v3.0 | 5 votes |
def test_all_basic_feed_with_one_item(self): response = self.app.get('/results/all-basic.atom') root = etree.XML(response.content) xml_pretty = etree.tounicode(root, pretty_print=True) result_event = ResultEvent.objects.first() expected = '''<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-gb"> <title>Election results from example.com</title> <link href="http://example.com/" rel="alternate"/> <link href="http://example.com/results/all-basic.atom" rel="self"/> <id>http://example.com/</id> <updated>{updated}</updated> <entry> <title>Tessa Jowell (Labour Party) won in Member of Parliament for Dulwich and West Norwood</title> <link href="http://example.com/#{item_id}" rel="alternate"/> <published>{updated}</published> <updated>{updated}</updated> <author> <name>john</name> </author> <id>http://example.com/#{item_id}</id> <summary type="html">A example.com volunteer recorded at {space_separated} that Tessa Jowell (Labour Party) won the ballot in Member of Parliament for Dulwich and West Norwood, quoting the source 'Seen on the BBC news').</summary> </entry> </feed> '''.format( updated=rfc3339_date(result_event.created), space_separated=result_event.created.strftime("%Y-%m-%d %H:%M:%S"), item_id=result_event.id, ) self.compare_xml(expected, xml_pretty)
Example #5
Source File: rpc_response.py From voltha with Apache License 2.0 | 5 votes |
def build_yang_response(self, root, request, yang_options=None, custom_rpc=False): try: self.custom_rpc = custom_rpc yang_xml = self.to_yang_xml(root, request, yang_options, custom_rpc) log.info('yang-xml', yang_xml=etree.tounicode(yang_xml, pretty_print=True)) return self.build_xml_response(request, yang_xml, custom_rpc) except Exception as e: log.exception('error-building-yang-response', request=request, xml=etree.tostring(root)) self.rpc_response.is_error = True self.rpc_response.node = ncerror.BadMsg(request) return
Example #6
Source File: nc_protocol_handler.py From voltha with Apache License 2.0 | 5 votes |
def send_custom_rpc_reply(self, rpc_reply, origmsg): reply = etree.Element(qmap(C.NC) + C.RPC_REPLY, attrib=origmsg.attrib, nsmap=rpc_reply.nsmap) try: reply.extend(rpc_reply.getchildren()) except AttributeError: reply.extend(rpc_reply) ucode = etree.tounicode(reply, pretty_print=True) log.info("Custom-RPC-Reply", reply=ucode) self.send_message(ucode)
Example #7
Source File: nc_protocol_handler.py From voltha with Apache License 2.0 | 5 votes |
def send_rpc_reply(self, rpc_reply, origmsg): reply = etree.Element(qmap(C.NC) + C.RPC_REPLY, attrib=origmsg.attrib, nsmap=origmsg.nsmap) try: rpc_reply.getchildren reply.append(rpc_reply) except AttributeError: reply.extend(rpc_reply) ucode = etree.tounicode(reply, pretty_print=True) log.info("RPC-Reply", reply=ucode) self.send_message(ucode)
Example #8
Source File: error.py From voltha with Apache License 2.0 | 5 votes |
def get_xml_reply(self): return etree.tounicode(self.reply)
Example #9
Source File: error.py From voltha with Apache License 2.0 | 5 votes |
def get_xml_reply(self): return etree.tounicode(self.reply)
Example #10
Source File: error.py From voltha with Apache License 2.0 | 5 votes |
def get_xml_reply(self): return etree.tounicode(self.reply)
Example #11
Source File: error.py From voltha with Apache License 2.0 | 5 votes |
def get_xml_reply(self): return etree.tounicode(self.reply)
Example #12
Source File: error.py From voltha with Apache License 2.0 | 5 votes |
def get_xml_reply(self): return etree.tounicode(self.reply)
Example #13
Source File: error.py From voltha with Apache License 2.0 | 5 votes |
def get_xml_reply(self): return etree.tounicode(self.reply)
Example #14
Source File: dataset.py From calamari with Apache License 2.0 | 5 votes |
def store(self, extension): if self._last_page_id: self._store_page(extension, self._last_page_id) self._last_page_id = None else: for xml, page in tqdm(zip(self.xmlfiles, self.pages), desc="Writing PageXML files", total=len(self.xmlfiles)): with open(split_all_ext(xml)[0] + extension, 'w') as f: f.write(etree.tounicode(page.getroottree()))
Example #15
Source File: tests.py From yournextrepresentative with GNU Affero General Public License v3.0 | 5 votes |
def test_all_feed_with_one_item(self): response = self.app.get('/results/all.atom') root = etree.XML(response.content) xml_pretty = etree.tounicode(root, pretty_print=True) result_event = ResultEvent.objects.first() expected = '''<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-gb"> <title>Election results from example.com (with extra data)</title> <link href="http://example.com/" rel="alternate"/> <link href="http://example.com/results/all.atom" rel="self"/> <id>http://example.com/</id> <updated>{updated}</updated> <entry> <title>Tessa Jowell (Labour Party) won in Member of Parliament for Dulwich and West Norwood</title> <link href="http://example.com/#{item_id}" rel="alternate"/> <published>{updated}</published> <updated>{updated}</updated> <author> <name>john</name> </author> <id>http://example.com/#{item_id}</id> <summary type="html">A example.com volunteer recorded at {space_separated} that Tessa Jowell (Labour Party) won the ballot in Member of Parliament for Dulwich and West Norwood, quoting the source 'Seen on the BBC news').</summary> <post_id>65808</post_id> <winner_person_id>4322</winner_person_id> <winner_person_name>Tessa Jowell</winner_person_name> <winner_party_id>party:53</winner_party_id> <winner_party_name>Labour Party</winner_party_name> <user_id>{user_id}</user_id> <post_name>Member of Parliament for Dulwich and West Norwood</post_name> <information_source>Seen on the BBC news</information_source> <parlparse_id>uk.org.publicwhip/person/123456</parlparse_id> </entry> </feed> '''.format( updated=rfc3339_date(result_event.created), space_separated=result_event.created.strftime("%Y-%m-%d %H:%M:%S"), item_id=result_event.id, user_id=self.user.id, ) self.compare_xml(expected, xml_pretty)
Example #16
Source File: test_patch.py From xmldiff with MIT License | 5 votes |
def _test(self, start, action, end): tree = etree.fromstring(start) self.patcher.handle_action(action, tree) self.assertEqual(etree.tounicode(tree), end)
Example #17
Source File: test_formatting.py From xmldiff with MIT License | 5 votes |
def test_do_element(self): replacer = formatting.PlaceholderMaker(['p'], ['b']) # Formatting tags get replaced, and the content remains text = u'<p>This is a tag with <b>formatted</b> text.</p>' element = etree.fromstring(text) replacer.do_element(element) self.assertEqual( etree.tounicode(element), u'<p>This is a tag with \ue006formatted\ue005 text.</p>') replacer.undo_element(element) self.assertEqual(etree.tounicode(element), text) # Non formatting tags get replaced with content text = u'<p>This is a tag with <foo>formatted</foo> text.</p>' element = etree.fromstring(text) replacer.do_element(element) result = etree.tounicode(element) self.assertEqual( result, u'<p>This is a tag with \ue007 text.</p>') # Single formatting tags still get two placeholders. text = u'<p>This is a <b/> with <foo/> text.</p>' element = etree.fromstring(text) replacer.do_element(element) result = etree.tounicode(element) self.assertEqual( result, u'<p>This is a \ue009\ue008 with \ue00a text.</p>')
Example #18
Source File: formatting.py From xmldiff with MIT License | 5 votes |
def render(self, result): return etree.tounicode(result, pretty_print=self.pretty_print)
Example #19
Source File: formatting.py From xmldiff with MIT License | 5 votes |
def get_placeholder(self, element, ttype, close_ph): tag = etree.tounicode(element) ph = self.tag2placeholder.get((tag, ttype, close_ph)) if ph is not None: return ph self.placeholder += 1 ph = six.unichr(self.placeholder) self.placeholder2tag[ph] = PlaceholderEntry(element, ttype, close_ph) self.tag2placeholder[tag, ttype, close_ph] = ph return ph
Example #20
Source File: main.py From xmldiff with MIT License | 5 votes |
def patch_file(actions, tree): """Takes two filenames or streams, one with XML the other a diff""" tree = etree.parse(tree) if isinstance(actions, six.string_types): # It's a string, so it's a filename with open(actions) as f: actions = f.read() else: # We assume it's a stream actions = actions.read() actions = patch.DiffParser().parse(actions) tree = patch_tree(actions, tree) return etree.tounicode(tree)
Example #21
Source File: main.py From xmldiff with MIT License | 5 votes |
def patch_text(actions, tree): """Takes a string with XML and a string with actions""" tree = etree.fromstring(tree) actions = patch.DiffParser().parse(actions) tree = patch_tree(actions, tree) return etree.tounicode(tree)
Example #22
Source File: content.py From xiachufang-api with Apache License 2.0 | 5 votes |
def clean_steps(self, nodes): # HTML tag <p/> re_p = re.compile('</?p[^>]*>') # HTML tag <br/> re_br = re.compile('<br\s*?/?>') steps = [{ 'step': idx + 1, 'desc': re_br.sub('\n', re_p.sub('', etree.tounicode(node.find('p')).strip())).strip(), 'img': node.find('img').get('src') if node.find('img') is not None else '' } for idx, node in enumerate(nodes)] return steps
Example #23
Source File: dataset.py From calamari with Apache License 2.0 | 5 votes |
def _store_page(self, extension, page_id): page = self.pages[self.xmlfiles.index(page_id)] with open(split_all_ext(page_id)[0] + extension, 'w') as f: f.write(etree.tounicode(page.getroottree()))
Example #24
Source File: bmi_wrapper.py From indra with BSD 2-Clause "Simplified" License | 4 votes |
def make_repository_component(self): """Return an XML string representing this BMI in a workflow. This description is required by EMELI to discover and load models. Returns ------- xml : str String serialized XML representation of the component in the model repository. """ component = etree.Element('component') comp_name = etree.Element('comp_name') comp_name.text = self.model.name component.append(comp_name) mod_path = etree.Element('module_path') mod_path.text = os.getcwd() component.append(mod_path) mod_name = etree.Element('module_name') mod_name.text = self.model.name component.append(mod_name) class_name = etree.Element('class_name') class_name.text = 'model_class' component.append(class_name) model_name = etree.Element('model_name') model_name.text = self.model.name component.append(model_name) lang = etree.Element('language') lang.text = 'python' component.append(lang) ver = etree.Element('version') ver.text = self.get_attribute('version') component.append(ver) au = etree.Element('author') au.text = self.get_attribute('author_name') component.append(au) hu = etree.Element('help_url') hu.text = 'http://github.com/sorgerlab/indra' component.append(hu) for tag in ('cfg_template', 'time_step_type', 'time_units', 'grid_type', 'description', 'comp_type', 'uses_types'): elem = etree.Element(tag) elem.text = tag component.append(elem) return etree.tounicode(component, pretty_print=True)
Example #25
Source File: agenda.py From legco-watch with MIT License | 4 votes |
def __init__(self, elements, english=True): self._elements = elements # Get the asker text = elements[0].text_content().strip() pattern = QUESTION_PATTERN_E if english else QUESTION_PATTERN_C match = re.match(pattern, text) if match is not None: self.number = match.group(1) self.asker = match.group(2) # Get question type # Can be oral or written. Could also be urgent, but have not yet seen how these are # indicated if text.startswith('*'): self.type = self.QTYPE_WRITTEN else: self.type = self.QTYPE_ORAL else: logger.warn(u'Could not find asker of question in element: {}'.format(text)) self.number = None self.asker = None self.type = None # Get the responder # If the question is the last question, then there may be a note # that begins with an asterisk that says which questions were # for written reply # As a heuristic, just search the last two elements, and keep track # of which is the last index of the body of the question # In other cases, if there is more than one public officer to reply, then # the list of public officers could be split across two elements. See, for example, # the agenda from June 18, 2014, question 1 ending_index = -2 for e in elements[-2:]: text = e.text_content().strip() match = re.search(AgendaQuestion.RESPONDER_PATTERN, text) if match is not None: self.replier = match.group(1) break ending_index += 1 else: logger.warn(u'Could not find responder of question in element: {}'.format(text)) self.replier = None # Store the rest of the elements into the body as html self.body = ''.join([etree.tounicode(xx, method='html') for xx in elements[1:ending_index]])
Example #26
Source File: conftest.py From docxcompose with MIT License | 4 votes |
def pytest_assertrepr_compare(config, op, left, right): if (isinstance(left, ComparableDocument) and isinstance(right, ComparableDocument) and op == "=="): left.post_compare_failed(right) right.post_compare_failed(left) if left.has_neq_partnames: extra_right = [ item for item in right.partnames if item not in left.partnames] extra_left = [ item for item in left.partnames if item not in right.partnames] explanation = ['documents contain same parts'] if right.doc is None: explanation.append('Right document is None') if left.doc is None: explanation.append('Left document is None') if extra_left: explanation.append('Left contains extra parts {}'.format( ', '.join(extra_left))) if extra_right: explanation.append('Right contains extra parts {}'.format( ', '.join(extra_right))) return explanation diffs = [] for lpart, rpart in left.neq_parts: if not lpart.partname.endswith('.xml'): diffs.append('Binary parts differ {}'.format(lpart.partname)) diffs.append('') continue doc = etree.fromstring(lpart.blob) left_xml = etree.tounicode(doc, pretty_print=True) doc = etree.fromstring(rpart.blob) right_xml = etree.tounicode(doc, pretty_print=True) diffs.extend(unified_diff( left_xml.splitlines(), right_xml.splitlines(), fromfile=lpart.partname, tofile=lpart.partname)) diffs.append('') if diffs: filenames = [p[0].partname for p in left.neq_parts] diffs.insert( 0, 'document parts are equal. Not equal parts: {}'.format( ', '.join(filenames))) return diffs
Example #27
Source File: test_formatting.py From xmldiff with MIT License | 4 votes |
def test_rml_bug(self): etree.register_namespace(formatting.DIFF_PREFIX, formatting.DIFF_NS) before_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff"> <section> <para> <ref>4</ref>. <u><b>At Will Employment</b></u> .\u201cText\u201d </para> </section> </document>""" tree = etree.fromstring(before_diff) replacer = formatting.PlaceholderMaker( text_tags=('para',), formatting_tags=('b', 'u', 'i',)) replacer.do_tree(tree) after_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff"> <section> <para> <insert>\ue005</insert>. \ue007\ue009At Will Employment\ue008\ue006 .\u201c<insert>New </insert>Text\u201d </para> </section> </document>""" # The diff formatting will find some text to insert. delete_attrib = u'{%s}delete-format' % formatting.DIFF_NS replacer.placeholder2tag[u'\ue006' ].element.attrib[delete_attrib] = '' replacer.placeholder2tag[u'\ue007' ].element.attrib[delete_attrib] = '' tree = etree.fromstring(after_diff) replacer.undo_tree(tree) result = etree.tounicode(tree) expected = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff"> <section> <para> <insert><ref>4</ref></insert>. <u diff:delete-format=""><b>At Will Employment</b></u> .\u201c<insert>New </insert>Text\u201d </para> </section> </document>""" self.assertEqual(result, expected)