Python bs4.element.NavigableString() Examples

The following are 8 code examples of bs4.element.NavigableString(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bs4.element , or try the search function .
Example #1
Source File: plugin.py    From mkdocstrings with ISC License 6 votes vote down vote up
def replace_code_tags(self, soup: BeautifulSoup) -> None:
        """
        Recursively replace code nodes with navigable strings whose values are unique IDs.

        Arguments:
            soup: The root tag of a BeautifulSoup HTML tree.
        """

        def recursive_replace(tag):
            if hasattr(tag, "contents"):
                for i in range(len(tag.contents)):
                    child = tag.contents[i]
                    if child.name == "code":
                        tag.contents[i] = NavigableString(self.store(str(child)))
                    else:
                        recursive_replace(child)

        recursive_replace(soup) 
Example #2
Source File: queries.py    From RTFMbot with Mozilla Public License 2.0 6 votes vote down vote up
def get_content(self, tag):
        """Returns content between two h2 tags"""

        bssiblings = tag.next_siblings
        siblings = []
        for elem in bssiblings:
            # get only tag elements, before the next h2
            # Putting away the comments, we know there's
            # at least one after it.
            if type(elem) == NavigableString:
                continue
            # It's a tag
            if elem.name == 'h2':
                break
            siblings.append(elem.text)
        content = '\n'.join(siblings)
        if len(content) >= 1024:
            content = content[:1021] + '...'

        return content 
Example #3
Source File: EasyLogin.py    From cc98 with MIT License 5 votes vote down vote up
def text(self, target=None, ignore_pureascii_words=False):
        """
        Get all text in HTML, skip script and comment
        :param target: the BeatuifulSoup object, default self.b
        :param ignore_pureascii_words: if set True, only return words that contains Chinese charaters (may be useful for English version website)
        :return: list of str
        """
        if target is None:
            target = self.b
        from bs4 import Comment
        from bs4.element import NavigableString,Doctype
        result = []
        for descendant in target.descendants:
            if not isinstance(descendant, NavigableString) \
                    or isinstance(descendant,Doctype) \
                    or descendant.parent.name in ["script", "style"] \
                    or isinstance(descendant, Comment) \
                    or "none" in descendant.parent.get("style","")\
                    or "font-size:0px" in descendant.parent.get("style",""):
                continue
            data = descendant.strip()
            if len(data) > 0:
                if not ignore_pureascii_words or any([ord(i)>127 for i in data]):
                    if PY2:
                        result.append(data.encode())
                    else:
                        result.append(data)
        return result 
Example #4
Source File: create_official_data.py    From converse_reading_cmr with MIT License 5 votes vote down vote up
def insert_escaped_tags(tags, label=None):
    """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
       so that these tags are still there when html markup is stripped out."""
    found = False
    for tag in tags:
        strs = list(tag.strings)
        if len(strs) > 0:
            if label != None:
                l = label
            else:
                l = tag.name
            strs[0].parent.insert(0, NavigableString("<"+l+">"))
            strs[-1].parent.append(NavigableString("</"+l+">"))
            found = True
    return found 
Example #5
Source File: fetch_realtime_grounding.py    From converse_reading_cmr with MIT License 5 votes vote down vote up
def insert_escaped_tags(self, tags):
		"""For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
			so that these tags are still there when html markup is stripped out."""
		found = False
		for tag in tags:
			strs = list(tag.strings)
			if len(strs) > 0:
				l = tag.name
				strs[0].parent.insert(0, NavigableString("<"+l+">"))
				strs[-1].parent.append(NavigableString("</"+l+">"))
				found = True
		return found 
Example #6
Source File: create_trial_data.py    From icecaps with MIT License 5 votes vote down vote up
def insert_escaped_tags(tags, label=None):
    """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
       so that these tags are still there when html markup is stripped out."""
    found = False
    for tag in tags:
        strs = list(tag.strings)
        if len(strs) > 0:
            if label != None:
                l = label
            else:
                l = tag.name
            strs[0].parent.insert(0, NavigableString("<"+l+">"))
            strs[-1].parent.append(NavigableString("</"+l+">"))
            found = True
    return found 
Example #7
Source File: create_official_data.py    From icecaps with MIT License 5 votes vote down vote up
def insert_escaped_tags(tags, label=None):
    """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
       so that these tags are still there when html markup is stripped out."""
    found = False
    for tag in tags:
        strs = list(tag.strings)
        if len(strs) > 0:
            if label != None:
                l = label
            else:
                l = tag.name
            strs[0].parent.insert(0, NavigableString("<"+l+">"))
            strs[-1].parent.append(NavigableString("</"+l+">"))
            found = True
    return found 
Example #8
Source File: xss_utils.py    From ITWSV with MIT License 4 votes vote down vote up
def study(bs_node, parent=None, keyword=""):
    entries = []

    # if parent is None:
    #  print("Keyword is: {0}".format(keyword))
    if keyword in str(bs_node).lower():
        if isinstance(bs_node, element.Tag):
            if keyword in str(bs_node.attrs):

                for k, v in bs_node.attrs.items():
                    if keyword in v:
                        # print("Found in attribute value {0} of tag {1}".format(k, bs_node.name))
                        noscript = close_noscript(bs_node)
                        d = {"type": "attrval", "name": k, "tag": bs_node.name, "noscript": noscript}
                        if d not in entries:
                            entries.append(d)

                    if keyword in k:
                        # print("Found in attribute name {0} of tag {1}".format(k, bs_node.name))
                        noscript = close_noscript(bs_node)
                        d = {"type": "attrname", "name": k, "tag": bs_node.name, "noscript": noscript}
                        if d not in entries:
                            entries.append(d)

            elif keyword in bs_node.name:
                # print("Found in tag name")
                noscript = close_noscript(bs_node)
                d = {"type": "tag", "value": bs_node.name, "noscript": noscript}
                if d not in entries:
                    entries.append(d)

            # recursively search injection points for the same variable
            for x in bs_node.contents:
                for entry in study(x, parent=bs_node, keyword=keyword):
                    if entry not in entries:
                        entries.append(entry)

        elif isinstance(bs_node, element.Comment):
            # print("Found in comment, tag {0}".format(parent.name))
            noscript = close_noscript(bs_node)
            d = {"type": "comment", "parent": parent.name, "noscript": noscript}
            if d not in entries:
                entries.append(d)

        elif isinstance(bs_node, element.NavigableString):
            # print("Found in text, tag {0}".format(parent.name))
            noscript = close_noscript(bs_node)
            d = {"type": "text", "parent": parent.name, "noscript": noscript}
            if d not in entries:
                entries.append(d)

    return entries


# generate a list of payloads based on where in the webpage the js-code will be injected