Python bs4.FeatureNotFound() Examples
The following are 5
code examples of bs4.FeatureNotFound().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bs4
, or try the search function
.
Example #1
Source File: formatter.py From pytablereader with MIT License | 6 votes |
def __init__(self, source_data, logger=None): super().__init__(source_data) if logger: self.__logger = logger else: self.__logger = NullSourceLogger(None) self.__table_id = None if typepy.is_null_string(source_data): raise DataError try: self.__soup = bs4.BeautifulSoup(self._source_data, "lxml") except bs4.FeatureNotFound: self.__soup = bs4.BeautifulSoup(self._source_data, "html.parser")
Example #2
Source File: __init__.py From bazarr with GNU General Public License v3.0 | 6 votes |
def __init__(self, markup, parsers, **kwargs): # reject features if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}): raise ValueError('Features not allowed, only parser names') # reject some kwargs if 'features' in kwargs: raise ValueError('Cannot use features kwarg') if 'builder' in kwargs: raise ValueError('Cannot use builder kwarg') # pick the first parser available for parser in parsers: try: super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs) return except FeatureNotFound: pass raise FeatureNotFound
Example #3
Source File: test_html.py From Carnets with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_backend_parsers(): """ Make sure the user can specify which back-end parser to use and that an error is raised if the parser is invalid. """ for parser in ('lxml', 'xml', 'html.parser', 'html5lib'): try: table = Table.read('data/html2.html', format='ascii.html', htmldict={'parser': parser}, guess=False) except FeatureNotFound: if parser == 'html.parser': raise # otherwise ignore if the dependency isn't present # reading should fail if the parser is invalid with pytest.raises(FeatureNotFound): Table.read('data/html2.html', format='ascii.html', htmldict={'parser': 'foo'}, guess=False)
Example #4
Source File: htmlark.py From htmlark with MIT License | 5 votes |
def get_available_parsers(): """Return a list of parsers that can be used.""" available = [] for p in PARSERS: try: bs4.BeautifulSoup("", p) except bs4.FeatureNotFound: # Try the next parser continue else: available.append(p) return available
Example #5
Source File: tipue_search.py From ford with GNU General Public License v3.0 | 5 votes |
def create_node(self, html, loc, meta={}): try: soup = BeautifulSoup(html,'lxml', parse_only=self.only_text) soup_title = BeautifulSoup(html,'lxml', parse_only=self.only_title) except FeatureNotFound: soup = BeautifulSoup(html,'html.parser', parse_only=self.only_text) soup_title = BeautifulSoup(html,'html.parser', parse_only=self.only_title) page_text = soup.find("div", {"id": "text"}).get_text(' ', strip=True).replace('\\(','').replace('\\)','').replace('\\[','').replace('\\]','').replace('$$','').replace('^','^') # What happens if there is not a title. if soup_title.title is not None: page_title = '{0}'.format(soup_title.title.string) else: page_title = '' # Should set default category? if 'category' in meta: page_category = meta['category'] else: page_category = '' if self.siteurl != '': page_url = urljoin(self.siteurl, loc) else: page_url = loc node = {'title': page_title, 'text': page_text, 'tags': page_category, 'loc': page_url} self.json_nodes.append(node)