Python bs4.__version__() Examples
The following are 17
code examples of bs4.__version__().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bs4
, or try the search function
.
Example #1
Source File: test_html.py From Computable with MIT License | 6 votes |
def _skip_if_none_of(module_names): if isinstance(module_names, string_types): _skip_if_no(module_names) if module_names == 'bs4': import bs4 if bs4.__version__ == LooseVersion('4.2.0'): raise nose.SkipTest("Bad version of bs4: 4.2.0") else: not_found = [module_name for module_name in module_names if not _have_module(module_name)] if set(not_found) & set(module_names): raise nose.SkipTest("{0!r} not found".format(not_found)) if 'bs4' in module_names: import bs4 if bs4.__version__ == LooseVersion('4.2.0'): raise nose.SkipTest("Bad version of bs4: 4.2.0")
Example #2
Source File: test_html.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def _skip_if_none_of(module_names): if isinstance(module_names, string_types): _skip_if_no(module_names) if module_names == 'bs4': import bs4 if bs4.__version__ == LooseVersion('4.2.0'): pytest.skip("Bad version of bs4: 4.2.0") else: not_found = [module_name for module_name in module_names if not _have_module(module_name)] if set(not_found) & set(module_names): pytest.skip("{0!r} not found".format(not_found)) if 'bs4' in module_names: import bs4 if bs4.__version__ == LooseVersion('4.2.0'): pytest.skip("Bad version of bs4: 4.2.0")
Example #3
Source File: utils.py From MechanicalSoup with MIT License | 5 votes |
def mock_post(mocked_adapter, url, expected, reply='Success!'): def text_callback(request, context): # Python 2's parse_qsl doesn't like None argument query = parse_qsl(request.text) if request.text else [] # In bs4 4.7.0+, CSS selectors return elements in page order, # but did not in earlier versions. if StrictVersion(bs4.__version__) >= StrictVersion('4.7.0'): assert query == expected else: assert sorted(query) == sorted(expected) return reply mocked_adapter.register_uri('POST', url, text=text_callback)
Example #4
Source File: diagnose.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" print("Comparative parser benchmark on Beautiful Soup %s" % __version__) data = rdoc(num_elements) print("Generated a large invalid HTML document (%d bytes)." % len(data)) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) from lxml import etree a = time.time() etree.HTML(data) b = time.time() print("Raw lxml parsed the markup in %.2fs." % (b-a)) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() print("Raw html5lib parsed the markup in %.2fs." % (b-a))
Example #5
Source File: diagnose.py From Tautulli with GNU General Public License v3.0 | 5 votes |
def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" print("Comparative parser benchmark on Beautiful Soup %s" % __version__) data = rdoc(num_elements) print("Generated a large invalid HTML document (%d bytes)." % len(data)) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) from lxml import etree a = time.time() etree.HTML(data) b = time.time() print("Raw lxml parsed the markup in %.2fs." % (b-a)) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() print("Raw html5lib parsed the markup in %.2fs." % (b-a))
Example #6
Source File: diagnose.py From bazarr with GNU General Public License v3.0 | 5 votes |
def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" print("Comparative parser benchmark on Beautiful Soup %s" % __version__) data = rdoc(num_elements) print("Generated a large invalid HTML document (%d bytes)." % len(data)) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) from lxml import etree a = time.time() etree.HTML(data) b = time.time() print("Raw lxml parsed the markup in %.2fs." % (b-a)) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() print("Raw html5lib parsed the markup in %.2fs." % (b-a))
Example #7
Source File: diagnose.py From MIA-Dictionary-Addon with GNU General Public License v3.0 | 5 votes |
def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" print("Comparative parser benchmark on Beautiful Soup %s" % __version__) data = rdoc(num_elements) print("Generated a large invalid HTML document (%d bytes)." % len(data)) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) from lxml import etree a = time.time() etree.HTML(data) b = time.time() print("Raw lxml parsed the markup in %.2fs." % (b-a)) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() print("Raw html5lib parsed the markup in %.2fs." % (b-a))
Example #8
Source File: test_html.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_bs4_version_fails(): _skip_if_none_of(('bs4', 'html5lib')) import bs4 if bs4.__version__ == LooseVersion('4.2.0'): tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH, "spam.html"), flavor='bs4')
Example #9
Source File: html.py From recruit with Apache License 2.0 | 5 votes |
def _parser_dispatch(flavor): """Choose the parser based on the input flavor. Parameters ---------- flavor : str The type of parser to use. This must be a valid backend. Returns ------- cls : _HtmlFrameParser subclass The parser class based on the requested input flavor. Raises ------ ValueError * If `flavor` is not a valid backend. ImportError * If you do not have the requested `flavor` """ valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise ValueError('{invalid!r} is not a valid flavor, valid flavors ' 'are {valid}' .format(invalid=flavor, valid=valid_parsers)) if flavor in ('bs4', 'html5lib'): if not _HAS_HTML5LIB: raise ImportError("html5lib not found, please install it") if not _HAS_BS4: raise ImportError( "BeautifulSoup4 (bs4) not found, please install it") import bs4 if LooseVersion(bs4.__version__) <= LooseVersion('4.2.0'): raise ValueError("A minimum version of BeautifulSoup 4.2.1 " "is required") else: if not _HAS_LXML: raise ImportError("lxml not found, please install it") return _valid_parsers[flavor]
Example #10
Source File: html.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def _parser_dispatch(flavor): """Choose the parser based on the input flavor. Parameters ---------- flavor : str The type of parser to use. This must be a valid backend. Returns ------- cls : _HtmlFrameParser subclass The parser class based on the requested input flavor. Raises ------ ValueError * If `flavor` is not a valid backend. ImportError * If you do not have the requested `flavor` """ valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise ValueError('{invalid!r} is not a valid flavor, valid flavors ' 'are {valid}' .format(invalid=flavor, valid=valid_parsers)) if flavor in ('bs4', 'html5lib'): if not _HAS_HTML5LIB: raise ImportError("html5lib not found, please install it") if not _HAS_BS4: raise ImportError( "BeautifulSoup4 (bs4) not found, please install it") import bs4 if LooseVersion(bs4.__version__) <= LooseVersion('4.2.0'): raise ValueError("A minimum version of BeautifulSoup 4.2.1 " "is required") else: if not _HAS_LXML: raise ImportError("lxml not found, please install it") return _valid_parsers[flavor]
Example #11
Source File: diagnose.py From B.E.N.J.I. with MIT License | 5 votes |
def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" print("Comparative parser benchmark on Beautiful Soup %s" % __version__) data = rdoc(num_elements) print("Generated a large invalid HTML document (%d bytes)." % len(data)) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False try: a = time.time() soup = BeautifulSoup(data, parser) b = time.time() success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) from lxml import etree a = time.time() etree.HTML(data) b = time.time() print("Raw lxml parsed the markup in %.2fs." % (b-a)) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() print("Raw html5lib parsed the markup in %.2fs." % (b-a))
Example #12
Source File: test_html.py From Computable with MIT License | 5 votes |
def test_bs4_version_fails(): _skip_if_none_of(('bs4', 'html5lib')) import bs4 if bs4.__version__ == LooseVersion('4.2.0'): tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH, "spam.html"), flavor='bs4')
Example #13
Source File: html.py From vnpy_crypto with MIT License | 5 votes |
def _parser_dispatch(flavor): """Choose the parser based on the input flavor. Parameters ---------- flavor : str The type of parser to use. This must be a valid backend. Returns ------- cls : _HtmlFrameParser subclass The parser class based on the requested input flavor. Raises ------ ValueError * If `flavor` is not a valid backend. ImportError * If you do not have the requested `flavor` """ valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise ValueError('{invalid!r} is not a valid flavor, valid flavors ' 'are {valid}' .format(invalid=flavor, valid=valid_parsers)) if flavor in ('bs4', 'html5lib'): if not _HAS_HTML5LIB: raise ImportError("html5lib not found, please install it") if not _HAS_BS4: raise ImportError( "BeautifulSoup4 (bs4) not found, please install it") import bs4 if LooseVersion(bs4.__version__) <= LooseVersion('4.2.0'): raise ValueError("A minimum version of BeautifulSoup 4.2.1 " "is required") else: if not _HAS_LXML: raise ImportError("lxml not found, please install it") return _valid_parsers[flavor]
Example #14
Source File: html.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def _parser_dispatch(flavor): """Choose the parser based on the input flavor. Parameters ---------- flavor : str The type of parser to use. This must be a valid backend. Returns ------- cls : _HtmlFrameParser subclass The parser class based on the requested input flavor. Raises ------ ValueError * If `flavor` is not a valid backend. ImportError * If you do not have the requested `flavor` """ valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise ValueError('{invalid!r} is not a valid flavor, valid flavors ' 'are {valid}' .format(invalid=flavor, valid=valid_parsers)) if flavor in ('bs4', 'html5lib'): if not _HAS_HTML5LIB: raise ImportError("html5lib not found, please install it") if not _HAS_BS4: raise ImportError( "BeautifulSoup4 (bs4) not found, please install it") import bs4 if bs4.__version__ == LooseVersion('4.2.0'): raise ValueError("You're using a version" " of BeautifulSoup4 (4.2.0) that has been" " known to cause problems on certain" " operating systems such as Debian. " "Please install a version of" " BeautifulSoup4 != 4.2.0, both earlier" " and later releases will work.") else: if not _HAS_LXML: raise ImportError("lxml not found, please install it") return _valid_parsers[flavor]
Example #15
Source File: html.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def _parser_dispatch(flavor): """Choose the parser based on the input flavor. Parameters ---------- flavor : str The type of parser to use. This must be a valid backend. Returns ------- cls : _HtmlFrameParser subclass The parser class based on the requested input flavor. Raises ------ ValueError * If `flavor` is not a valid backend. ImportError * If you do not have the requested `flavor` """ valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise ValueError('{invalid!r} is not a valid flavor, valid flavors ' 'are {valid}' .format(invalid=flavor, valid=valid_parsers)) if flavor in ('bs4', 'html5lib'): if not _HAS_HTML5LIB: raise ImportError("html5lib not found, please install it") if not _HAS_BS4: raise ImportError( "BeautifulSoup4 (bs4) not found, please install it") import bs4 if bs4.__version__ == LooseVersion('4.2.0'): raise ValueError("You're using a version" " of BeautifulSoup4 (4.2.0) that has been" " known to cause problems on certain" " operating systems such as Debian. " "Please install a version of" " BeautifulSoup4 != 4.2.0, both earlier" " and later releases will work.") else: if not _HAS_LXML: raise ImportError("lxml not found, please install it") return _valid_parsers[flavor]
Example #16
Source File: diagnose.py From B.E.N.J.I. with MIT License | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print("Diagnostic running on Beautiful Soup %s" % __version__) print("Python version %s" % sys.version) basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print(( "I noticed that %s is not installed. Installing it may help." % name)) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) except ImportError as e: print ( "lxml is not installed or couldn't be imported.") if 'html5lib' in basic_parsers: try: import html5lib print("Found html5lib version %s" % html5lib.__version__) except ImportError as e: print ( "html5lib is not installed or couldn't be imported.") if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print('"%s" looks like a filename. Reading data from the file.' % data) with open(data) as fp: data = fp.read() elif data.startswith("http:") or data.startswith("https:"): print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") return print() for parser in basic_parsers: print("Trying to parse your markup with %s" % parser) success = False try: soup = BeautifulSoup(data, parser) success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("Here's what %s did with the markup:" % parser) print(soup.prettify()) print("-" * 80)
Example #17
Source File: html.py From Computable with MIT License | 4 votes |
def _parser_dispatch(flavor): """Choose the parser based on the input flavor. Parameters ---------- flavor : str The type of parser to use. This must be a valid backend. Returns ------- cls : _HtmlFrameParser subclass The parser class based on the requested input flavor. Raises ------ ValueError * If `flavor` is not a valid backend. ImportError * If you do not have the requested `flavor` """ valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise ValueError('%r is not a valid flavor, valid flavors are %s' % (flavor, valid_parsers)) if flavor in ('bs4', 'html5lib'): if not _HAS_HTML5LIB: raise ImportError("html5lib not found please install it") if not _HAS_BS4: raise ImportError("bs4 not found please install it") if bs4.__version__ == LooseVersion('4.2.0'): raise ValueError("You're using a version" " of BeautifulSoup4 (4.2.0) that has been" " known to cause problems on certain" " operating systems such as Debian. " "Please install a version of" " BeautifulSoup4 != 4.2.0, both earlier" " and later releases will work.") else: if not _HAS_LXML: raise ImportError("lxml not found please install it") return _valid_parsers[flavor]