Python html5lib.constants.namespaces() Examples

The following are 30 code examples of html5lib.constants.namespaces(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module html5lib.constants , or try the search function .
Example #1
Source File: chunk.py    From budou with Apache License 2.0 5 votes vote down vote up
def wbr_serialize(self):
    """Returns concatenated HTML code with WBR tag. This is still experimental.

    Returns:
      The organized HTML code. (str)
    """
    doc = ET.Element('span')
    doc.attrib['style'] = 'word-break: keep-all'
    for chunk in self:
      if (chunk.has_cjk() and doc.text):
        ele = ET.Element('wbr')
        doc.append(ele)
        doc.getchildren()[-1].tail = chunk.word
      else:
        # add word without span tag for non-CJK text (e.g. English)
        # by appending it after the last element
        if doc.getchildren():
          if doc.getchildren()[-1].tail is None:
            doc.getchildren()[-1].tail = chunk.word
          else:
            doc.getchildren()[-1].tail += chunk.word
        else:
          if doc.text is None:
            doc.text = chunk.word
          else:
            doc.text += chunk.word
    content = ET.tostring(doc, encoding='utf-8').decode('utf-8')
    dom = html5lib.parseFragment(content)
    treewalker = getTreeWalker('etree')
    stream = treewalker(dom)
    serializer = html5lib.serializer.HTMLSerializer(
            quote_attr_values='always')
    allowed_elements = set(sanitizer.allowed_elements)
    allowed_elements.add((namespaces['html'], 'wbr'))
    allowed_css_properties = set(sanitizer.allowed_css_properties)
    allowed_css_properties.add('word-break')
    result = serializer.render(sanitizer.Filter(
        stream, allowed_elements=allowed_elements,
        allowed_css_properties=allowed_css_properties,
        ))
    return result 
Example #2
Source File: _html5lib.py    From moviegrabber with GNU General Public License v3.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #3
Source File: test_parser2.py    From bazarr with GNU General Public License v3.0 5 votes vote down vote up
def test_namespace_html_elements_0_etree():
    doc = parse("<html></html>",
                treebuilder="etree",
                namespaceHTMLElements=True)
    assert doc.tag == "{%s}html" % (namespaces["html"],) 
Example #4
Source File: test_parser2.py    From bazarr with GNU General Public License v3.0 5 votes vote down vote up
def test_namespace_html_elements_0_dom():
    doc = parse("<html></html>",
                treebuilder="dom",
                namespaceHTMLElements=True)
    assert doc.childNodes[0].namespaceURI == namespaces["html"] 
Example #5
Source File: _html5lib.py    From Crunchyroll-XML-Decoder with GNU General Public License v2.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #6
Source File: _html5lib.py    From Crunchyroll-XML-Decoder with GNU General Public License v2.0 5 votes vote down vote up
def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup)) 
Example #7
Source File: _html5lib.py    From POC-EXP with GNU General Public License v3.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #8
Source File: _html5lib.py    From python-for-android with Apache License 2.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #9
Source File: _html5lib.py    From python-for-android with Apache License 2.0 5 votes vote down vote up
def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup)) 
Example #10
Source File: _html5lib.py    From ru with GNU General Public License v2.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #11
Source File: test_parser2.py    From MARA_Framework with GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_namespace_html_elements_0_etree(self):
        parser = html5parser.HTMLParser(namespaceHTMLElements=True)
        doc = parser.parse("<html></html>")
        self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],)) 
Example #12
Source File: test_parser2.py    From MARA_Framework with GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_namespace_html_elements_0_dom(self):
        parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True)
        doc = parser.parse("<html></html>")
        self.assertTrue(doc.childNodes[0].namespaceURI == namespaces["html"]) 
Example #13
Source File: _html5lib.py    From MARA_Framework with GNU Lesser General Public License v3.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #14
Source File: _html5lib.py    From CrisisMappingToolkit with Apache License 2.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #15
Source File: _html5lib.py    From nbaplus-server with Apache License 2.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #16
Source File: _html5lib.py    From pledgeservice with Apache License 2.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #17
Source File: _html5lib.py    From stopstalk-deployment with MIT License 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #18
Source File: soup.py    From nzb-subliminal with GNU General Public License v3.0 5 votes vote down vote up
def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(TextNode(child, self.soup)) 
Example #19
Source File: simpletree.py    From nzb-subliminal with GNU General Public License v3.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #20
Source File: _base.py    From nzb-subliminal with GNU General Public License v3.0 5 votes vote down vote up
def elementInScope(self, target, variant=None):

        #If we pass a node in we match that. if we pass a string
        #match any node with that name
        exactNode = hasattr(target, "nameTuple")

        listElementsMap = {
            None:(scopingElements, False),
            "button":(scopingElements | set([(namespaces["html"], "button")]), False),
            "list":(scopingElements | set([(namespaces["html"], "ol"),
                                           (namespaces["html"], "ul")]), False),
            "table":(set([(namespaces["html"], "html"),
                          (namespaces["html"], "table")]), False),
            "select":(set([(namespaces["html"], "optgroup"), 
                           (namespaces["html"], "option")]), True)
            }
        listElements, invert = listElementsMap[variant]

        for node in reversed(self.openElements):
            if (node.name == target and not exactNode or
                node == target and exactNode):
                return True
            elif (invert ^ (node.nameTuple in listElements)):                
                return False

        assert False # We should never reach this point 
Example #21
Source File: soup.py    From nzb-subliminal with GNU General Public License v3.0 5 votes vote down vote up
def getNodeDetails(self, node):
        if isinstance(node, BeautifulSoup): # Document or DocumentFragment
            return (_base.DOCUMENT,)

        elif isinstance(node, Declaration): # DocumentType
            string = unicode(node.string)
            #Slice needed to remove markup added during unicode conversion,
            #but only in some versions of BeautifulSoup/Python
            if string.startswith('<!') and string.endswith('>'):
                string = string[2:-1]
            m = self.doctype_regexp.match(string)
            #This regexp approach seems wrong and fragile
            #but beautiful soup stores the doctype as a single thing and we want the seperate bits
            #It should work as long as the tree is created by html5lib itself but may be wrong if it's
            #been modified at all
            #We could just feed to it a html5lib tokenizer, I guess...
            assert m is not None, "DOCTYPE did not match expected format"

            name = m.group('name')
            publicId = m.group('publicId')
            if publicId is not None:
                systemId = m.group('systemId1')
            else:
                systemId = m.group('systemId2')
            return _base.DOCTYPE, name, publicId or "", systemId or ""

        elif isinstance(node, Comment):
            string = unicode(node.string)
            if string.startswith('<!--') and string.endswith('-->'):
                string = string[4:-3]
            return _base.COMMENT, string

        elif isinstance(node, unicode): # TextNode
            return _base.TEXT, node

        elif isinstance(node, Tag): # Element
            return (_base.ELEMENT, namespaces["html"], node.name,
                    dict(node.attrs).items(), node.contents)
        else:
            return _base.UNKNOWN, node.__class__.__name__ 
Example #22
Source File: _html5lib.py    From nzb-subliminal with GNU General Public License v3.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #23
Source File: _html5lib.py    From Gank-Alfred-Workflow with MIT License 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #24
Source File: _html5lib.py    From Gank-Alfred-Workflow with MIT License 5 votes vote down vote up
def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup)) 
Example #25
Source File: _html5lib.py    From svg-animation-tools with MIT License 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #26
Source File: _html5lib.py    From svg-animation-tools with MIT License 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #27
Source File: _html5lib.py    From crunchy-xml-decoder with GNU General Public License v2.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #28
Source File: _html5lib.py    From crunchy-xml-decoder with GNU General Public License v2.0 5 votes vote down vote up
def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup)) 
Example #29
Source File: _html5lib.py    From weeman with GNU General Public License v3.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name 
Example #30
Source File: _html5lib.py    From fuzzdb-collect with GNU General Public License v3.0 5 votes vote down vote up
def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name