Python lxml.etree.ParserError() Examples
The following are 30
code examples of lxml.etree.ParserError().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: _lxml.py From bazarr with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #2
Source File: _lxml.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, str): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(e)
Example #3
Source File: _lxml.py From ServerlessCrawler-VancouverRealState with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #4
Source File: _lxml.py From svg-animation-tools with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #5
Source File: _lxml.py From ServerlessCrawler-VancouverRealState with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #6
Source File: _lxml.py From B.E.N.J.I. with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, str): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(str(e))
Example #7
Source File: _lxml.py From pledgeservice with Apache License 2.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #8
Source File: _lxml.py From Tautulli with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, str): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(e)
Example #9
Source File: _lxml.py From weeman with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #10
Source File: _lxml.py From moviegrabber with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #11
Source File: _lxml.py From fuzzdb-collect with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #12
Source File: _lxml.py From locality-sensitive-hashing with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #13
Source File: _lxml.py From svg-animation-tools with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #14
Source File: _lxml.py From nzb-subliminal with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #15
Source File: _lxml.py From bazarr with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, str): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(str(e))
Example #16
Source File: _lxml.py From ServerlessCrawler-VancouverRealState with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #17
Source File: _lxml.py From MIA-Dictionary-Addon with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, str): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(str(e))
Example #18
Source File: conferences.py From sportsreference with MIT License | 6 votes |
def _pull_conference_page(self, conference_abbreviation, year): """ Download the conference page. Download the conference page for the requested conference and season and create a PyQuery object. Parameters ---------- conference_abbreviation : string A string of the requested conference's abbreviation, such as 'big-12'. year : string A string of the requested year to pull conference information from. """ try: return pq(CONFERENCE_URL % (conference_abbreviation, year)) except (HTTPError, ParserError): return None
Example #19
Source File: roster.py From sportsreference with MIT License | 6 votes |
def _retrieve_html_page(self): """ Download the requested player's stats page. Download the requested page and strip all of the comment tags before returning a pyquery object which will be used to parse the data. Returns ------- PyQuery object The requested page is returned as a queriable PyQuery object with the comment tags removed. """ url = PLAYER_URL % self._player_id try: url_data = pq(url) except (HTTPError, ParserError): return None return pq(utils._remove_html_comment_tags(url_data))
Example #20
Source File: roster.py From sportsreference with MIT License | 6 votes |
def _retrieve_html_page(self): """ Download the requested player's stats page. Download the requested page and strip all of the comment tags before returning a pyquery object which will be used to parse the data. Returns ------- PyQuery object The requested page is returned as a queriable PyQuery object with the comment tags removed. """ url = self._build_url() try: url_data = pq(url) except (HTTPError, ParserError): return None return pq(utils._remove_html_comment_tags(url_data))
Example #21
Source File: roster.py From sportsreference with MIT License | 6 votes |
def _retrieve_html_page(self): """ Download the requested player's stats page. Download the requested page and strip all of the comment tags before returning a PyQuery object which will be used to parse the data. Oftentimes, important data is contained in tables which are hidden in HTML comments and not accessible via PyQuery. Returns ------- PyQuery object The requested page is returned as a queriable PyQuery object with the comment tags removed. """ url = self._build_url() try: url_data = pq(url) except (HTTPError, ParserError): return None # For NFL, a 404 page doesn't actually raise a 404 error, so it needs # to be manually checked. if 'Page Not Found (404 error)' in str(url_data): return None return pq(utils._remove_html_comment_tags(url_data))
Example #22
Source File: _lxml.py From POC-EXP with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #23
Source File: _lxml.py From stopstalk-deployment with MIT License | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #24
Source File: _lxml.py From plugin.git.browser with GNU General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, str): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(str(e))
Example #25
Source File: xml.py From ingestors with MIT License | 6 votes |
def ingest(self, file_path): """Ingestor implementation.""" file_size = self.result.size or os.path.getsize(file_path) if file_size > self.MAX_SIZE: raise ProcessingException("XML file is too large.") try: doc = etree.parse(file_path) except (ParserError, ParseError): raise ProcessingException("XML could not be parsed.") text = self.extract_html_text(doc.getroot()) transform = etree.XSLT(self.XSLT) html_doc = transform(doc) html_body = html.tostring(html_doc, encoding=str, pretty_print=True) self.result.flag(self.result.FLAG_HTML) self.result.emit_html_body(html_body, text)
Example #26
Source File: _lxml.py From nbaplus-server with Apache License 2.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #27
Source File: html.py From ingestors with MIT License | 6 votes |
def extract_html_content(self, html_body, fix_html=True): """Ingestor implementation.""" if html_body is None: return try: try: doc = html.fromstring(html_body) except ValueError: # Ship around encoding declarations. # https://stackoverflow.com/questions/3402520 html_body = self.RE_XML_ENCODING.sub('', html_body, count=1) doc = html.fromstring(html_body) except (ParserError, ParseError, ValueError): raise ProcessingException("HTML could not be parsed.") self.extract_html_header(doc) self.cleaner(doc) text = self.extract_html_text(doc) self.result.flag(self.result.FLAG_HTML) self.result.emit_html_body(html_body, text)
Example #28
Source File: _lxml.py From CrisisMappingToolkit with Apache License 2.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #29
Source File: _lxml.py From MARA_Framework with GNU Lesser General Public License v3.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))
Example #30
Source File: _lxml.py From ru with GNU General Public License v2.0 | 6 votes |
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e))