Python Examples of html5lib.constants.namespaces

Source File: chunk.py From budou with Apache License 2.0

5 votes

def wbr_serialize(self):
    """Returns concatenated HTML code with WBR tag. This is still experimental.

    Returns:
      The organized HTML code. (str)
    """
    doc = ET.Element('span')
    doc.attrib['style'] = 'word-break: keep-all'
    for chunk in self:
      if (chunk.has_cjk() and doc.text):
        ele = ET.Element('wbr')
        doc.append(ele)
        doc.getchildren()[-1].tail = chunk.word
      else:
        # add word without span tag for non-CJK text (e.g. English)
        # by appending it after the last element
        if doc.getchildren():
          if doc.getchildren()[-1].tail is None:
            doc.getchildren()[-1].tail = chunk.word
          else:
            doc.getchildren()[-1].tail += chunk.word
        else:
          if doc.text is None:
            doc.text = chunk.word
          else:
            doc.text += chunk.word
    content = ET.tostring(doc, encoding='utf-8').decode('utf-8')
    dom = html5lib.parseFragment(content)
    treewalker = getTreeWalker('etree')
    stream = treewalker(dom)
    serializer = html5lib.serializer.HTMLSerializer(
            quote_attr_values='always')
    allowed_elements = set(sanitizer.allowed_elements)
    allowed_elements.add((namespaces['html'], 'wbr'))
    allowed_css_properties = set(sanitizer.allowed_css_properties)
    allowed_css_properties.add('word-break')
    result = serializer.render(sanitizer.Filter(
        stream, allowed_elements=allowed_elements,
        allowed_css_properties=allowed_css_properties,
        ))
    return result

Source File: _html5lib.py From moviegrabber with GNU General Public License v3.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: test_parser2.py From bazarr with GNU General Public License v3.0

5 votes

def test_namespace_html_elements_0_etree():
    doc = parse("<html></html>",
                treebuilder="etree",
                namespaceHTMLElements=True)
    assert doc.tag == "{%s}html" % (namespaces["html"],)

Source File: test_parser2.py From bazarr with GNU General Public License v3.0

5 votes

def test_namespace_html_elements_0_dom():
    doc = parse("<html></html>",
                treebuilder="dom",
                namespaceHTMLElements=True)
    assert doc.childNodes[0].namespaceURI == namespaces["html"]

Source File: _html5lib.py From Crunchyroll-XML-Decoder with GNU General Public License v2.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From Crunchyroll-XML-Decoder with GNU General Public License v2.0

5 votes

def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup))

Source File: _html5lib.py From POC-EXP with GNU General Public License v3.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From python-for-android with Apache License 2.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From python-for-android with Apache License 2.0

5 votes

def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup))

Source File: _html5lib.py From ru with GNU General Public License v2.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: test_parser2.py From MARA_Framework with GNU Lesser General Public License v3.0

5 votes

def test_namespace_html_elements_0_etree(self):
        parser = html5parser.HTMLParser(namespaceHTMLElements=True)
        doc = parser.parse("<html></html>")
        self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],))

Source File: test_parser2.py From MARA_Framework with GNU Lesser General Public License v3.0

5 votes

def test_namespace_html_elements_0_dom(self):
        parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True)
        doc = parser.parse("<html></html>")
        self.assertTrue(doc.childNodes[0].namespaceURI == namespaces["html"])

Source File: _html5lib.py From MARA_Framework with GNU Lesser General Public License v3.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From CrisisMappingToolkit with Apache License 2.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From nbaplus-server with Apache License 2.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From pledgeservice with Apache License 2.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From stopstalk-deployment with MIT License

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: soup.py From nzb-subliminal with GNU General Public License v3.0

5 votes

def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(TextNode(child, self.soup))

Source File: simpletree.py From nzb-subliminal with GNU General Public License v3.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _base.py From nzb-subliminal with GNU General Public License v3.0

5 votes

def elementInScope(self, target, variant=None):

        #If we pass a node in we match that. if we pass a string
        #match any node with that name
        exactNode = hasattr(target, "nameTuple")

        listElementsMap = {
            None:(scopingElements, False),
            "button":(scopingElements | set([(namespaces["html"], "button")]), False),
            "list":(scopingElements | set([(namespaces["html"], "ol"),
                                           (namespaces["html"], "ul")]), False),
            "table":(set([(namespaces["html"], "html"),
                          (namespaces["html"], "table")]), False),
            "select":(set([(namespaces["html"], "optgroup"), 
                           (namespaces["html"], "option")]), True)
            }
        listElements, invert = listElementsMap[variant]

        for node in reversed(self.openElements):
            if (node.name == target and not exactNode or
                node == target and exactNode):
                return True
            elif (invert ^ (node.nameTuple in listElements)):                
                return False

        assert False # We should never reach this point

Source File: soup.py From nzb-subliminal with GNU General Public License v3.0

5 votes

def getNodeDetails(self, node):
        if isinstance(node, BeautifulSoup): # Document or DocumentFragment
            return (_base.DOCUMENT,)

        elif isinstance(node, Declaration): # DocumentType
            string = unicode(node.string)
            #Slice needed to remove markup added during unicode conversion,
            #but only in some versions of BeautifulSoup/Python
            if string.startswith('<!') and string.endswith('>'):
                string = string[2:-1]
            m = self.doctype_regexp.match(string)
            #This regexp approach seems wrong and fragile
            #but beautiful soup stores the doctype as a single thing and we want the seperate bits
            #It should work as long as the tree is created by html5lib itself but may be wrong if it's
            #been modified at all
            #We could just feed to it a html5lib tokenizer, I guess...
            assert m is not None, "DOCTYPE did not match expected format"

            name = m.group('name')
            publicId = m.group('publicId')
            if publicId is not None:
                systemId = m.group('systemId1')
            else:
                systemId = m.group('systemId2')
            return _base.DOCTYPE, name, publicId or "", systemId or ""

        elif isinstance(node, Comment):
            string = unicode(node.string)
            if string.startswith('<!--') and string.endswith('-->'):
                string = string[4:-3]
            return _base.COMMENT, string

        elif isinstance(node, unicode): # TextNode
            return _base.TEXT, node

        elif isinstance(node, Tag): # Element
            return (_base.ELEMENT, namespaces["html"], node.name,
                    dict(node.attrs).items(), node.contents)
        else:
            return _base.UNKNOWN, node.__class__.__name__

Source File: _html5lib.py From nzb-subliminal with GNU General Public License v3.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From Gank-Alfred-Workflow with MIT License

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From Gank-Alfred-Workflow with MIT License

5 votes

def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup))

Source File: _html5lib.py From svg-animation-tools with MIT License

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From svg-animation-tools with MIT License

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From crunchy-xml-decoder with GNU General Public License v2.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From crunchy-xml-decoder with GNU General Public License v2.0

5 votes

def reparentChildren(self, newParent):
        while self.element.contents:
            child = self.element.contents[0]
            child.extract()
            if isinstance(child, Tag):
                newParent.appendChild(
                    Element(child, self.soup, namespaces["html"]))
            else:
                newParent.appendChild(
                    TextNode(child, self.soup))

Source File: _html5lib.py From weeman with GNU General Public License v3.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Source File: _html5lib.py From fuzzdb-collect with GNU General Public License v3.0

5 votes

def getNameTuple(self):
        if self.namespace == None:
            return namespaces["html"], self.name
        else:
            return self.namespace, self.name

Python html5lib.constants.namespaces() Examples