Python HTMLParser.HTMLParseError() Examples
The following are 30
code examples of HTMLParser.HTMLParseError().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
HTMLParser
, or try the search function
.
Example #1
Source File: _htmlparser.py From svg-animation-tools with MIT License | 6 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some # 3.2.3 code. This ensures they don't treat markup like <p></p> as a # string. # # XXX This code can be removed once most Python 3 users are on 3.2.3.
Example #2
Source File: BeautifulSoup.py From python-for-android with Apache License 2.0 | 6 votes |
def parse_declaration(self, i): """Treat a bogus SGML declaration as raw data. Treat a CDATA declaration as a CData object.""" j = None if self.rawdata[i:i+9] == '<![CDATA[': k = self.rawdata.find(']]>', i) if k == -1: k = len(self.rawdata) data = self.rawdata[i+9:k] j = k+3 self._toStringSubclass(data, CData) else: try: j = HTMLParser.parse_declaration(self, i) except HTMLParseError: toHandle = self.rawdata[i:] self.handle_data(toHandle) j = i + len(toHandle) return j
Example #3
Source File: _htmlparser.py From CrisisMappingToolkit with Apache License 2.0 | 6 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some # 3.2.3 code. This ensures they don't treat markup like <p></p> as a # string. # # XXX This code can be removed once most Python 3 users are on 3.2.3.
Example #4
Source File: _htmlparser.py From stopstalk-deployment with MIT License | 6 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some # 3.2.3 code. This ensures they don't treat markup like <p></p> as a # string. # # XXX This code can be removed once most Python 3 users are on 3.2.3.
Example #5
Source File: BeautifulSoup.py From dirigible-spreadsheet with MIT License | 6 votes |
def parse_declaration(self, i): """Treat a bogus SGML declaration as raw data. Treat a CDATA declaration as a CData object.""" j = None if self.rawdata[i:i+9] == '<![CDATA[': k = self.rawdata.find(']]>', i) if k == -1: k = len(self.rawdata) data = self.rawdata[i+9:k] j = k+3 self._toStringSubclass(data, CData) else: try: j = HTMLParser.parse_declaration(self, i) except HTMLParseError: toHandle = self.rawdata[i:] self.handle_data(toHandle) j = i + len(toHandle) return j
Example #6
Source File: _http.py From BruteXSS with GNU General Public License v3.0 | 6 votes |
def http_response(self, request, response): if not hasattr(response, "seek"): response = response_seek_wrapper(response) http_message = response.info() url = response.geturl() ct_hdrs = http_message.getheaders("content-type") if is_html(ct_hdrs, url, self._allow_xhtml): try: try: html_headers = parse_head(response, self.head_parser_class()) finally: response.seek(0) except (HTMLParser.HTMLParseError, sgmllib.SGMLParseError): pass else: for hdr, val in html_headers: # add a header http_message.dict[hdr.lower()] = val text = hdr + ": " + val for line in text.split("\n"): http_message.headers.append(line + "\n") return response
Example #7
Source File: _htmlparser.py From svg-animation-tools with MIT License | 6 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some # 3.2.3 code. This ensures they don't treat markup like <p></p> as a # string. # # XXX This code can be removed once most Python 3 users are on 3.2.3.
Example #8
Source File: _http.py From pelisalacarta-ce with GNU General Public License v3.0 | 6 votes |
def http_response(self, request, response): if not hasattr(response, "seek"): response = response_seek_wrapper(response) http_message = response.info() url = response.geturl() ct_hdrs = http_message.getheaders("content-type") if is_html(ct_hdrs, url, self._allow_xhtml): try: try: html_headers = parse_head(response, self.head_parser_class()) finally: response.seek(0) except (HTMLParser.HTMLParseError, sgmllib.SGMLParseError): pass else: for hdr, val in html_headers: # add a header http_message.dict[hdr.lower()] = val text = hdr + ": " + val for line in text.split("\n"): http_message.headers.append(line + "\n") return response
Example #9
Source File: _htmlparser.py From weeman with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some # 3.2.3 code. This ensures they don't treat markup like <p></p> as a # string. # # XXX This code can be removed once most Python 3 users are on 3.2.3.
Example #10
Source File: test_htmlparser.py From CTFCrackTools with GNU General Public License v3.0 | 5 votes |
def _parse_error(self, source): def parse(source=source): parser = HTMLParser.HTMLParser() parser.feed(source) parser.close() self.assertRaises(HTMLParser.HTMLParseError, parse)
Example #11
Source File: _form.py From pelisalacarta-ce with GNU General Public License v3.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #12
Source File: _htmlparser.py From FastWordQuery with GNU General Public License v3.0 | 5 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e
Example #13
Source File: clientform.py From POC-EXP with GNU General Public License v3.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #14
Source File: test_htmlparser.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 5 votes |
def _parse_error(self, source): def parse(source=source): parser = HTMLParser.HTMLParser() parser.feed(source) parser.close() self.assertRaises(HTMLParser.HTMLParseError, parse)
Example #15
Source File: _htmlparser.py From bazarr with GNU General Public License v3.0 | 5 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e
Example #16
Source File: clientform.py From EasY_HaCk with Apache License 2.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #17
Source File: html.py From sync-engine with GNU Affero General Public License v3.0 | 5 votes |
def strip_tags(html): s = HTMLTagStripper() try: s.feed(html) except HTMLParseError: get_logger().error('error stripping tags', raw_html=html) return s.get_data() # https://djangosnippets.org/snippets/19/
Example #18
Source File: _form.py From BruteXSS with GNU General Public License v3.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #19
Source File: _htmlparser.py From ServerlessCrawler-VancouverRealState with MIT License | 5 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e
Example #20
Source File: test_htmlparser.py From medicare-demo with Apache License 2.0 | 5 votes |
def _parse_error(self, source): def parse(source=source): parser = HTMLParser.HTMLParser() parser.feed(source) parser.close() self.assertRaises(HTMLParser.HTMLParseError, parse)
Example #21
Source File: test_htmlparser.py From gcblue with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _parse_error(self, source): def parse(source=source): parser = HTMLParser.HTMLParser() parser.feed(source) parser.close() self.assertRaises(HTMLParser.HTMLParseError, parse)
Example #22
Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #23
Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #24
Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #25
Source File: clientform.py From NoobSec-Toolkit with GNU General Public License v2.0 | 5 votes |
def feed(self, data): try: HTMLParser.HTMLParser.feed(self, data) except HTMLParser.HTMLParseError, exc: raise ParseError(exc)
Example #26
Source File: test_htmlparser.py From oss-ftp with MIT License | 5 votes |
def _parse_error(self, source): def parse(source=source): parser = HTMLParser.HTMLParser() parser.feed(source) parser.close() self.assertRaises(HTMLParser.HTMLParseError, parse)
Example #27
Source File: test_htmlparser.py From BinderFilter with MIT License | 5 votes |
def _parse_error(self, source): def parse(source=source): parser = HTMLParser.HTMLParser() parser.feed(source) parser.close() self.assertRaises(HTMLParser.HTMLParseError, parse)
Example #28
Source File: htmlutil.py From closure-linter with Apache License 2.0 | 5 votes |
def StripTags(str): """Returns the string with HTML tags stripped. Args: str: An html string. Returns: The html string with all tags stripped. If there was a parse error, returns the text successfully parsed so far. """ # Brute force approach to stripping as much HTML as possible. If there is a # parsing error, don't strip text before parse error position, and continue # trying from there. final_text = '' finished = False while not finished: try: strip = _HtmlStripper() strip.feed(str) strip.close() str = strip.get_output() final_text += str finished = True except HTMLParser.HTMLParseError, e: final_text += str[:e.offset] str = str[e.offset + 1:]
Example #29
Source File: test_htmlparser.py From ironpython2 with Apache License 2.0 | 5 votes |
def _parse_error(self, source): def parse(source=source): parser = HTMLParser.HTMLParser() parser.feed(source) parser.close() self.assertRaises(HTMLParser.HTMLParseError, parse)
Example #30
Source File: _htmlparser.py From ServerlessCrawler-VancouverRealState with MIT License | 5 votes |
def feed(self, markup): args, kwargs = self.parser_args parser = BeautifulSoupHTMLParser(*args, **kwargs) parser.soup = self.soup try: parser.feed(markup) except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e