Python bs4.builder() Examples
The following are 19
code examples of bs4.builder().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bs4
, or try the search function
.
Example #1
Source File: diagnose.py From ServerlessCrawler-VancouverRealState with MIT License | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #2
Source File: diagnose.py From ServerlessCrawler-VancouverRealState with MIT License | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #3
Source File: diagnose.py From ServerlessCrawler-VancouverRealState with MIT License | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #4
Source File: diagnose.py From bazarr with GNU General Public License v3.0 | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #5
Source File: diagnose.py From FastWordQuery with GNU General Public License v3.0 | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #6
Source File: diagnose.py From svg-animation-tools with MIT License | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #7
Source File: diagnose.py From svg-animation-tools with MIT License | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #8
Source File: diagnose.py From stopstalk-deployment with MIT License | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #9
Source File: diagnose.py From CrisisMappingToolkit with Apache License 2.0 | 6 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) except ImportError, e: print ( "lxml is not installed or couldn't be imported.")
Example #10
Source File: diagnose.py From moviegrabber with GNU General Public License v3.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #11
Source File: diagnose.py From POC-EXP with GNU General Public License v3.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #12
Source File: diagnose.py From ru with GNU General Public License v2.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #13
Source File: diagnose.py From MARA_Framework with GNU Lesser General Public License v3.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #14
Source File: diagnose.py From nbaplus-server with Apache License 2.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #15
Source File: diagnose.py From nzb-subliminal with GNU General Public License v3.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #16
Source File: diagnose.py From B.E.N.J.I. with MIT License | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print("Diagnostic running on Beautiful Soup %s" % __version__) print("Python version %s" % sys.version) basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print(( "I noticed that %s is not installed. Installing it may help." % name)) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) try: from lxml import etree print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) except ImportError as e: print ( "lxml is not installed or couldn't be imported.") if 'html5lib' in basic_parsers: try: import html5lib print("Found html5lib version %s" % html5lib.__version__) except ImportError as e: print ( "html5lib is not installed or couldn't be imported.") if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print('"%s" looks like a filename. Reading data from the file.' % data) with open(data) as fp: data = fp.read() elif data.startswith("http:") or data.startswith("https:"): print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") return print() for parser in basic_parsers: print("Trying to parse your markup with %s" % parser) success = False try: soup = BeautifulSoup(data, parser) success = True except Exception as e: print("%s could not parse the markup." % parser) traceback.print_exc() if success: print("Here's what %s did with the markup:" % parser) print(soup.prettify()) print("-" * 80)
Example #17
Source File: diagnose.py From fuzzdb-collect with GNU General Public License v3.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #18
Source File: diagnose.py From locality-sensitive-hashing with MIT License | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80
Example #19
Source File: diagnose.py From pledgeservice with Apache License 2.0 | 4 votes |
def diagnose(data): """Diagnostic suite for isolating common problems.""" print "Diagnostic running on Beautiful Soup %s" % __version__ print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: for builder in builder_registry.builders: if name in builder.features: break else: basic_parsers.remove(name) print ( "I noticed that %s is not installed. Installing it may help." % name) if 'lxml' in basic_parsers: basic_parsers.append(["lxml", "xml"]) from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) if 'html5lib' in basic_parsers: import html5lib print "Found html5lib version %s" % html5lib.__version__ if hasattr(data, 'read'): data = data.read() elif os.path.exists(data): print '"%s" looks like a filename. Reading data from the file.' % data data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: soup = BeautifulSoup(data, parser) success = True except Exception, e: print "%s could not parse the markup." % parser traceback.print_exc() if success: print "Here's what %s did with the markup:" % parser print soup.prettify() print "-" * 80