Python bs4.element.NavigableString() Examples
The following are 8
code examples of bs4.element.NavigableString().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bs4.element
, or try the search function
.
Example #1
Source File: plugin.py From mkdocstrings with ISC License | 6 votes |
def replace_code_tags(self, soup: BeautifulSoup) -> None: """ Recursively replace code nodes with navigable strings whose values are unique IDs. Arguments: soup: The root tag of a BeautifulSoup HTML tree. """ def recursive_replace(tag): if hasattr(tag, "contents"): for i in range(len(tag.contents)): child = tag.contents[i] if child.name == "code": tag.contents[i] = NavigableString(self.store(str(child))) else: recursive_replace(child) recursive_replace(soup)
Example #2
Source File: queries.py From RTFMbot with Mozilla Public License 2.0 | 6 votes |
def get_content(self, tag): """Returns content between two h2 tags""" bssiblings = tag.next_siblings siblings = [] for elem in bssiblings: # get only tag elements, before the next h2 # Putting away the comments, we know there's # at least one after it. if type(elem) == NavigableString: continue # It's a tag if elem.name == 'h2': break siblings.append(elem.text) content = '\n'.join(siblings) if len(content) >= 1024: content = content[:1021] + '...' return content
Example #3
Source File: EasyLogin.py From cc98 with MIT License | 5 votes |
def text(self, target=None, ignore_pureascii_words=False): """ Get all text in HTML, skip script and comment :param target: the BeatuifulSoup object, default self.b :param ignore_pureascii_words: if set True, only return words that contains Chinese charaters (may be useful for English version website) :return: list of str """ if target is None: target = self.b from bs4 import Comment from bs4.element import NavigableString,Doctype result = [] for descendant in target.descendants: if not isinstance(descendant, NavigableString) \ or isinstance(descendant,Doctype) \ or descendant.parent.name in ["script", "style"] \ or isinstance(descendant, Comment) \ or "none" in descendant.parent.get("style","")\ or "font-size:0px" in descendant.parent.get("style",""): continue data = descendant.strip() if len(data) > 0: if not ignore_pureascii_words or any([ord(i)>127 for i in data]): if PY2: result.append(data.encode()) else: result.append(data) return result
Example #4
Source File: create_official_data.py From converse_reading_cmr with MIT License | 5 votes |
def insert_escaped_tags(tags, label=None): """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text so that these tags are still there when html markup is stripped out.""" found = False for tag in tags: strs = list(tag.strings) if len(strs) > 0: if label != None: l = label else: l = tag.name strs[0].parent.insert(0, NavigableString("<"+l+">")) strs[-1].parent.append(NavigableString("</"+l+">")) found = True return found
Example #5
Source File: fetch_realtime_grounding.py From converse_reading_cmr with MIT License | 5 votes |
def insert_escaped_tags(self, tags): """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text so that these tags are still there when html markup is stripped out.""" found = False for tag in tags: strs = list(tag.strings) if len(strs) > 0: l = tag.name strs[0].parent.insert(0, NavigableString("<"+l+">")) strs[-1].parent.append(NavigableString("</"+l+">")) found = True return found
Example #6
Source File: create_trial_data.py From icecaps with MIT License | 5 votes |
def insert_escaped_tags(tags, label=None): """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text so that these tags are still there when html markup is stripped out.""" found = False for tag in tags: strs = list(tag.strings) if len(strs) > 0: if label != None: l = label else: l = tag.name strs[0].parent.insert(0, NavigableString("<"+l+">")) strs[-1].parent.append(NavigableString("</"+l+">")) found = True return found
Example #7
Source File: create_official_data.py From icecaps with MIT License | 5 votes |
def insert_escaped_tags(tags, label=None): """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text so that these tags are still there when html markup is stripped out.""" found = False for tag in tags: strs = list(tag.strings) if len(strs) > 0: if label != None: l = label else: l = tag.name strs[0].parent.insert(0, NavigableString("<"+l+">")) strs[-1].parent.append(NavigableString("</"+l+">")) found = True return found
Example #8
Source File: xss_utils.py From ITWSV with MIT License | 4 votes |
def study(bs_node, parent=None, keyword=""): entries = [] # if parent is None: # print("Keyword is: {0}".format(keyword)) if keyword in str(bs_node).lower(): if isinstance(bs_node, element.Tag): if keyword in str(bs_node.attrs): for k, v in bs_node.attrs.items(): if keyword in v: # print("Found in attribute value {0} of tag {1}".format(k, bs_node.name)) noscript = close_noscript(bs_node) d = {"type": "attrval", "name": k, "tag": bs_node.name, "noscript": noscript} if d not in entries: entries.append(d) if keyword in k: # print("Found in attribute name {0} of tag {1}".format(k, bs_node.name)) noscript = close_noscript(bs_node) d = {"type": "attrname", "name": k, "tag": bs_node.name, "noscript": noscript} if d not in entries: entries.append(d) elif keyword in bs_node.name: # print("Found in tag name") noscript = close_noscript(bs_node) d = {"type": "tag", "value": bs_node.name, "noscript": noscript} if d not in entries: entries.append(d) # recursively search injection points for the same variable for x in bs_node.contents: for entry in study(x, parent=bs_node, keyword=keyword): if entry not in entries: entries.append(entry) elif isinstance(bs_node, element.Comment): # print("Found in comment, tag {0}".format(parent.name)) noscript = close_noscript(bs_node) d = {"type": "comment", "parent": parent.name, "noscript": noscript} if d not in entries: entries.append(d) elif isinstance(bs_node, element.NavigableString): # print("Found in text, tag {0}".format(parent.name)) noscript = close_noscript(bs_node) d = {"type": "text", "parent": parent.name, "noscript": noscript} if d not in entries: entries.append(d) return entries # generate a list of payloads based on where in the webpage the js-code will be injected