Python htmlentitydefs.entitydefs() Examples
The following are 18
code examples of htmlentitydefs.entitydefs().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
htmlentitydefs
, or try the search function
.
Example #1
Source File: feedparser.py From pyrobotlab with Apache License 2.0 | 6 votes |
def handle_entityref(self, ref): # called for each entity reference, e.g. for '©', ref will be 'copy' if not self.elementstack: return if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): text = '&%s;' % ref else: # entity resolution graciously donated by Aaron Swartz def name2cp(k): import htmlentitydefs if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] k = htmlentitydefs.entitydefs[k] if k.startswith('&#') and k.endswith(';'): return int(k[2:-1]) # not in latin-1 return ord(k) try: name2cp(ref) except KeyError: text = '&%s;' % ref else: text = unichr(name2cp(ref)).encode('utf-8') self.elementstack[-1][2].append(text)
Example #2
Source File: relextract.py From luscan-devel with GNU General Public License v2.0 | 6 votes |
def descape_entity(m, defs=htmlentitydefs.entitydefs): """ Translate one entity to its ISO Latin value. Inspired by example from effbot.org """ #s = 'mcglashan_&_sarrail' #l = ['mcglashan', '&', 'sarrail'] #pattern = re.compile("&(\w+?);") #new = list2sym(l) #s = pattern.sub(descape_entity, s) #print s, new try: return defs[m.group(1)] except KeyError: return m.group(0) # use as is
Example #3
Source File: SgmlopXMLTreeBuilder.py From ru with GNU General Public License v2.0 | 6 votes |
def __init__(self, html=0): try: import sgmlop except ImportError: raise RuntimeError("sgmlop parser not available") self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) self.__parser = sgmlop.XMLParser() self.__parser.register(self) ## # Feeds data to the parser. # # @param data Encoded data.
Example #4
Source File: xxsdefense.py From d4rkc0de with GNU General Public License v2.0 | 5 votes |
def handle_entityref(self, ref): if ref in entitydefs: self.result += '&%s;' % ref else: self.result += xssescape('&%s' % ref)
Example #5
Source File: SimpleXMLTreeBuilder.py From canape with GNU General Public License v3.0 | 5 votes |
def __init__(self, html=0, target=None, encoding=None): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
Example #6
Source File: SimpleXMLTreeBuilder.py From unity-python with MIT License | 5 votes |
def __init__(self, html=0, target=None, encoding=None): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
Example #7
Source File: SimpleXMLTreeBuilder.py From RevitBatchProcessor with GNU General Public License v3.0 | 5 votes |
def __init__(self, html=0, target=None, encoding=None): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
Example #8
Source File: html2text.py From RedditBots with MIT License | 5 votes |
def name2cp(k): if k == 'apos': return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0])
Example #9
Source File: sanitizer.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 5 votes |
def handle_entityref(self, ref): if self.in_disallowed: return elif ref in entitydefs: self.result += '&%s;' % ref else: self.result += xssescape('&%s' % ref)
Example #10
Source File: xxsdefense.py From darkc0de-old-stuff with GNU General Public License v3.0 | 5 votes |
def handle_entityref(self, ref): if ref in entitydefs: self.result += '&%s;' % ref else: self.result += xssescape('&%s' % ref)
Example #11
Source File: SimpleXMLTreeBuilder.py From ru with GNU General Public License v2.0 | 5 votes |
def __init__(self, html=0): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
Example #12
Source File: SimpleXMLTreeBuilder.py From meddle with MIT License | 5 votes |
def __init__(self, html=0, target=None, encoding=None): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
Example #13
Source File: html2text.py From PyDataset with MIT License | 5 votes |
def name2cp(k): if k == 'apos': return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0])
Example #14
Source File: html2text.py From arlo with Apache License 2.0 | 5 votes |
def name2cp(k): if k == 'apos': return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0])
Example #15
Source File: sanitizer.py From yatl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def handle_entityref(self, ref): if self.in_disallowed[-1]: return elif ref in entitydefs: self.result += '&%s;' % ref else: self.result += xmlescape('&%s' % ref)
Example #16
Source File: DialogReviews.py From repository.evgen_dev.xbmc-addons with GNU General Public License v2.0 | 5 votes |
def html_entity_decode_char(m, defs=htmlentitydefs.entitydefs): try: return defs[m.group(1)] except KeyError: return m.group(0)
Example #17
Source File: SimpleXMLTreeBuilder.py From ironpython2 with Apache License 2.0 | 5 votes |
def __init__(self, html=0, target=None, encoding=None): self.__builder = ElementTree.TreeBuilder() if html: import htmlentitydefs self.entitydefs.update(htmlentitydefs.entitydefs) xmllib.XMLParser.__init__(self) ## # Feeds data to the parser. # # @param data Encoded data.
Example #18
Source File: extract_recipe.py From extract_recipe with Apache License 2.0 | 4 votes |
def name2cp(k): if k == 'apos': return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0])