Python htmlentitydefs.entitydefs() Examples

The following are 18 code examples of htmlentitydefs.entitydefs(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module htmlentitydefs , or try the search function

Example #1

Source File: feedparser.py From pyrobotlab with Apache License 2.0

6 votes

def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
        if not self.elementstack: return
        if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        else:
            # entity resolution graciously donated by Aaron Swartz
            def name2cp(k):
                import htmlentitydefs
                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
                    return htmlentitydefs.name2codepoint[k]
                k = htmlentitydefs.entitydefs[k]
                if k.startswith('&#') and k.endswith(';'):
                    return int(k[2:-1]) # not in latin-1
                return ord(k)
            try: name2cp(ref)
            except KeyError: text = '&%s;' % ref
            else: text = unichr(name2cp(ref)).encode('utf-8')
        self.elementstack[-1][2].append(text)

Example #2

Source File: relextract.py From luscan-devel with GNU General Public License v2.0

6 votes

def descape_entity(m, defs=htmlentitydefs.entitydefs):
    """
    Translate one entity to its ISO Latin value.
    Inspired by example from effbot.org


    """
    #s = 'mcglashan_&amp;_sarrail'
    #l = ['mcglashan', '&amp;', 'sarrail']
    #pattern = re.compile("&(\w+?);")
    #new = list2sym(l)
    #s = pattern.sub(descape_entity, s)
    #print s, new
    try:
        return defs[m.group(1)]

    except KeyError:
        return m.group(0) # use as is

Example #3

Source File: SgmlopXMLTreeBuilder.py From ru with GNU General Public License v2.0

6 votes

def __init__(self, html=0):
        try:
            import sgmlop
        except ImportError:
            raise RuntimeError("sgmlop parser not available")
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        self.__parser = sgmlop.XMLParser()
        self.__parser.register(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.

Example #4

Source File: xxsdefense.py From d4rkc0de with GNU General Public License v2.0

5 votes

def handle_entityref(self, ref):
        if ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)

Example #5

Source File: SimpleXMLTreeBuilder.py From canape with GNU General Public License v3.0

5 votes

def __init__(self, html=0, target=None, encoding=None):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.

Example #6

Source File: SimpleXMLTreeBuilder.py From unity-python with MIT License

5 votes

def __init__(self, html=0, target=None, encoding=None):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.

Example #7

Source File: SimpleXMLTreeBuilder.py From RevitBatchProcessor with GNU General Public License v3.0

5 votes

def __init__(self, html=0, target=None, encoding=None):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.

Example #8

Source File: html2text.py From RedditBots with MIT License

5 votes

def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])

Example #9

Source File: sanitizer.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License

5 votes

def handle_entityref(self, ref):
        if self.in_disallowed:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)

Example #10

Source File: xxsdefense.py From darkc0de-old-stuff with GNU General Public License v3.0

5 votes

def handle_entityref(self, ref):
        if ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xssescape('&%s' % ref)

Example #11

Source File: SimpleXMLTreeBuilder.py From ru with GNU General Public License v2.0

5 votes

def __init__(self, html=0):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.

Example #12

Source File: SimpleXMLTreeBuilder.py From meddle with MIT License

5 votes

def __init__(self, html=0, target=None, encoding=None):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.

Example #13

Source File: html2text.py From PyDataset with MIT License

5 votes

def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])

Example #14

Source File: html2text.py From arlo with Apache License 2.0

5 votes

def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])

Example #15

Source File: sanitizer.py From yatl with BSD 3-Clause "New" or "Revised" License

5 votes

def handle_entityref(self, ref):
        if self.in_disallowed[-1]:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xmlescape('&%s' % ref)

Example #16

Source File: DialogReviews.py From repository.evgen_dev.xbmc-addons with GNU General Public License v2.0

5 votes

def html_entity_decode_char(m, defs=htmlentitydefs.entitydefs):
    try:
        return defs[m.group(1)]
    except KeyError:
        return m.group(0)

Example #17

Source File: SimpleXMLTreeBuilder.py From ironpython2 with Apache License 2.0

5 votes

def __init__(self, html=0, target=None, encoding=None):
        self.__builder = ElementTree.TreeBuilder()
        if html:
            import htmlentitydefs
            self.entitydefs.update(htmlentitydefs.entitydefs)
        xmllib.XMLParser.__init__(self)

    ##
    # Feeds data to the parser.
    #
    # @param data Encoded data.

Example #18

Source File: extract_recipe.py From extract_recipe with Apache License 2.0

4 votes

def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])