Python bs4.element.Comment() Examples

The following are 16 code examples of bs4.element.Comment(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bs4.element , or try the search function .
Example #1
Source File: crawler.py    From ITWSV with MIT License 6 votes vote down vote up
def text_only(self):
        """Returns the displayed text of a webpage (without HTML tags)"""
        if "text" in self.type and self.size:
            texts = self.soup.findAll(text=True)

            def is_visible(element):
                if len(element.strip()) == 0:
                    return False
                elif isinstance(element, Comment):
                    return False
                elif element.parent.name in ["style", "script", "[document]", "head", "title"]:
                    return False
                return True

            text = " ".join(filter(is_visible, texts)).replace("\r\n", " ").replace("\n", " ")
            return text
        return "" 
Example #2
Source File: _lxml.py    From pledgeservice with Apache License 2.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #3
Source File: _lxml.py    From locality-sensitive-hashing with MIT License 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #4
Source File: _lxml.py    From fuzzdb-collect with GNU General Public License v3.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #5
Source File: _lxml.py    From crunchy-xml-decoder with GNU General Public License v2.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #6
Source File: _lxml.py    From Gank-Alfred-Workflow with MIT License 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #7
Source File: _lxml.py    From nzb-subliminal with GNU General Public License v3.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #8
Source File: _lxml.py    From nbaplus-server with Apache License 2.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #9
Source File: _lxml.py    From MARA_Framework with GNU Lesser General Public License v3.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #10
Source File: _lxml.py    From ru with GNU General Public License v2.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #11
Source File: moodleCrawler.py    From Moodle-Downloader with GNU General Public License v3.0 5 votes vote down vote up
def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True 
Example #12
Source File: _lxml.py    From python-for-android with Apache License 2.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #13
Source File: _lxml.py    From POC-EXP with GNU General Public License v3.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #14
Source File: _lxml.py    From Crunchyroll-XML-Decoder with GNU General Public License v2.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #15
Source File: _lxml.py    From moviegrabber with GNU General Public License v3.0 5 votes vote down vote up
def comment(self, content):
        "Handle comments as Comment objects."
        self.soup.endData()
        self.soup.handle_data(content)
        self.soup.endData(Comment) 
Example #16
Source File: xss_utils.py    From ITWSV with MIT License 4 votes vote down vote up
def study(bs_node, parent=None, keyword=""):
    entries = []

    # if parent is None:
    #  print("Keyword is: {0}".format(keyword))
    if keyword in str(bs_node).lower():
        if isinstance(bs_node, element.Tag):
            if keyword in str(bs_node.attrs):

                for k, v in bs_node.attrs.items():
                    if keyword in v:
                        # print("Found in attribute value {0} of tag {1}".format(k, bs_node.name))
                        noscript = close_noscript(bs_node)
                        d = {"type": "attrval", "name": k, "tag": bs_node.name, "noscript": noscript}
                        if d not in entries:
                            entries.append(d)

                    if keyword in k:
                        # print("Found in attribute name {0} of tag {1}".format(k, bs_node.name))
                        noscript = close_noscript(bs_node)
                        d = {"type": "attrname", "name": k, "tag": bs_node.name, "noscript": noscript}
                        if d not in entries:
                            entries.append(d)

            elif keyword in bs_node.name:
                # print("Found in tag name")
                noscript = close_noscript(bs_node)
                d = {"type": "tag", "value": bs_node.name, "noscript": noscript}
                if d not in entries:
                    entries.append(d)

            # recursively search injection points for the same variable
            for x in bs_node.contents:
                for entry in study(x, parent=bs_node, keyword=keyword):
                    if entry not in entries:
                        entries.append(entry)

        elif isinstance(bs_node, element.Comment):
            # print("Found in comment, tag {0}".format(parent.name))
            noscript = close_noscript(bs_node)
            d = {"type": "comment", "parent": parent.name, "noscript": noscript}
            if d not in entries:
                entries.append(d)

        elif isinstance(bs_node, element.NavigableString):
            # print("Found in text, tag {0}".format(parent.name))
            noscript = close_noscript(bs_node)
            d = {"type": "text", "parent": parent.name, "noscript": noscript}
            if d not in entries:
                entries.append(d)

    return entries


# generate a list of payloads based on where in the webpage the js-code will be injected