Python Examples of HTMLParser.HTMLParseError

Source File: _htmlparser.py From svg-animation-tools with MIT License

6 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.

Source File: BeautifulSoup.py From python-for-android with Apache License 2.0

6 votes

def parse_declaration(self, i):
        """Treat a bogus SGML declaration as raw data. Treat a CDATA
        declaration as a CData object."""
        j = None
        if self.rawdata[i:i+9] == '<![CDATA[':
             k = self.rawdata.find(']]>', i)
             if k == -1:
                 k = len(self.rawdata)
             data = self.rawdata[i+9:k]
             j = k+3
             self._toStringSubclass(data, CData)
        else:
            try:
                j = HTMLParser.parse_declaration(self, i)
            except HTMLParseError:
                toHandle = self.rawdata[i:]
                self.handle_data(toHandle)
                j = i + len(toHandle)
        return j

Source File: _htmlparser.py From CrisisMappingToolkit with Apache License 2.0

6 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.

Source File: _htmlparser.py From stopstalk-deployment with MIT License

6 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.

Source File: BeautifulSoup.py From dirigible-spreadsheet with MIT License

6 votes

def parse_declaration(self, i):
        """Treat a bogus SGML declaration as raw data. Treat a CDATA
        declaration as a CData object."""
        j = None
        if self.rawdata[i:i+9] == '<![CDATA[':
             k = self.rawdata.find(']]>', i)
             if k == -1:
                 k = len(self.rawdata)
             data = self.rawdata[i+9:k]
             j = k+3
             self._toStringSubclass(data, CData)
        else:
            try:
                j = HTMLParser.parse_declaration(self, i)
            except HTMLParseError:
                toHandle = self.rawdata[i:]
                self.handle_data(toHandle)
                j = i + len(toHandle)
        return j

Source File: _http.py From BruteXSS with GNU General Public License v3.0

6 votes

def http_response(self, request, response):
        if not hasattr(response, "seek"):
            response = response_seek_wrapper(response)
        http_message = response.info()
        url = response.geturl()
        ct_hdrs = http_message.getheaders("content-type")
        if is_html(ct_hdrs, url, self._allow_xhtml):
            try:
                try:
                    html_headers = parse_head(response,
                                              self.head_parser_class())
                finally:
                    response.seek(0)
            except (HTMLParser.HTMLParseError,
                    sgmllib.SGMLParseError):
                pass
            else:
                for hdr, val in html_headers:
                    # add a header
                    http_message.dict[hdr.lower()] = val
                    text = hdr + ": " + val
                    for line in text.split("\n"):
                        http_message.headers.append(line + "\n")
        return response

Source File: _htmlparser.py From svg-animation-tools with MIT License

6 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.

Source File: _http.py From pelisalacarta-ce with GNU General Public License v3.0

6 votes

def http_response(self, request, response):
        if not hasattr(response, "seek"):
            response = response_seek_wrapper(response)
        http_message = response.info()
        url = response.geturl()
        ct_hdrs = http_message.getheaders("content-type")
        if is_html(ct_hdrs, url, self._allow_xhtml):
            try:
                try:
                    html_headers = parse_head(response,
                                              self.head_parser_class())
                finally:
                    response.seek(0)
            except (HTMLParser.HTMLParseError,
                    sgmllib.SGMLParseError):
                pass
            else:
                for hdr, val in html_headers:
                    # add a header
                    http_message.dict[hdr.lower()] = val
                    text = hdr + ": " + val
                    for line in text.split("\n"):
                        http_message.headers.append(line + "\n")
        return response

Source File: _htmlparser.py From weeman with GNU General Public License v3.0

6 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.

Source File: test_htmlparser.py From CTFCrackTools with GNU General Public License v3.0

5 votes

def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse)

Source File: _form.py From pelisalacarta-ce with GNU General Public License v3.0

5 votes

def feed(self, data):
        try:
            HTMLParser.HTMLParser.feed(self, data)
        except HTMLParser.HTMLParseError, exc:
            raise ParseError(exc)

Source File: _htmlparser.py From FastWordQuery with GNU General Public License v3.0

5 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

Source File: clientform.py From POC-EXP with GNU General Public License v3.0

5 votes

def feed(self, data):
            try:
                HTMLParser.HTMLParser.feed(self, data)
            except HTMLParser.HTMLParseError, exc:
                raise ParseError(exc)

Source File: test_htmlparser.py From CTFCrackTools-V2 with GNU General Public License v3.0

5 votes

def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse)

Source File: _htmlparser.py From bazarr with GNU General Public License v3.0

5 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

Source File: clientform.py From EasY_HaCk with Apache License 2.0

5 votes

def feed(self, data):
            try:
                HTMLParser.HTMLParser.feed(self, data)
            except HTMLParser.HTMLParseError, exc:
                raise ParseError(exc)

Source File: html.py From sync-engine with GNU Affero General Public License v3.0

5 votes

def strip_tags(html):
    s = HTMLTagStripper()
    try:
        s.feed(html)
    except HTMLParseError:
        get_logger().error('error stripping tags', raw_html=html)
    return s.get_data()

# https://djangosnippets.org/snippets/19/

Source File: _form.py From BruteXSS with GNU General Public License v3.0

5 votes

def feed(self, data):
        try:
            HTMLParser.HTMLParser.feed(self, data)
        except HTMLParser.HTMLParseError, exc:
            raise ParseError(exc)

Source File: _htmlparser.py From ServerlessCrawler-VancouverRealState with MIT License

5 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

Source File: test_htmlparser.py From medicare-demo with Apache License 2.0

5 votes

def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse)

Source File: test_htmlparser.py From gcblue with BSD 3-Clause "New" or "Revised" License

5 votes

def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse)

Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0

5 votes

def feed(self, data):
            try:
                HTMLParser.HTMLParser.feed(self, data)
            except HTMLParser.HTMLParseError, exc:
                raise ParseError(exc)

Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0

5 votes

def feed(self, data):
            try:
                HTMLParser.HTMLParser.feed(self, data)
            except HTMLParser.HTMLParseError, exc:
                raise ParseError(exc)

Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0

5 votes

def feed(self, data):
            try:
                HTMLParser.HTMLParser.feed(self, data)
            except HTMLParser.HTMLParseError, exc:
                raise ParseError(exc)

Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0

5 votes

def feed(self, data):
            try:
                HTMLParser.HTMLParser.feed(self, data)
            except HTMLParser.HTMLParseError, exc:
                raise ParseError(exc)

Source File: test_htmlparser.py From oss-ftp with MIT License

5 votes

def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse)

Source File: test_htmlparser.py From BinderFilter with MIT License

5 votes

def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse)

Source File: htmlutil.py From closure-linter with Apache License 2.0

5 votes

def StripTags(str):
  """Returns the string with HTML tags stripped.

  Args:
    str: An html string.

  Returns:
    The html string with all tags stripped. If there was a parse error, returns
    the text successfully parsed so far.
  """
  # Brute force approach to stripping as much HTML as possible. If there is a
  # parsing error, don't strip text before parse error position, and continue
  # trying from there.
  final_text = ''
  finished = False
  while not finished:
    try:
      strip = _HtmlStripper()
      strip.feed(str)
      strip.close()
      str = strip.get_output()
      final_text += str
      finished = True
    except HTMLParser.HTMLParseError, e:
      final_text += str[:e.offset]
      str = str[e.offset + 1:]

Source File: test_htmlparser.py From ironpython2 with Apache License 2.0

5 votes

def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse)

Source File: _htmlparser.py From ServerlessCrawler-VancouverRealState with MIT License

5 votes

def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

Python HTMLParser.HTMLParseError() Examples