Python sgmllib.SGMLParser() Examples
The following are 30
code examples of sgmllib.SGMLParser().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
, or try the search function

Example #1
Source File: From Cloudmare with GNU General Public License v3.0 | 6 votes |
def parse_declaration(self, i): """Treat a bogus SGML declaration as raw data. Treat a CDATA declaration as a CData object.""" j = None if self.rawdata[i:i+9] == '<![CDATA[': k = self.rawdata.find(']]>', i) if k == -1: k = len(self.rawdata) data = self.rawdata[i+9:k] j = k+3 self._toStringSubclass(data, CData) else: try: j = sgmllib.SGMLParser.parse_declaration(self, i) except sgmllib.SGMLParseError: toHandle = self.rawdata[i:] self.handle_data(toHandle) j = i + len(toHandle) return j
Example #2
Source File: From Cloudmare with GNU General Public License v3.0 | 5 votes |
def convert_charref(self, name): """This method fixes a bug in Python's SGMLParser.""" try: n = int(name) except ValueError: return if not 0 <= n <= 127 : # ASCII ends at 127, not 255 return return self.convert_codepoint(n)
Example #3
Source File: From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def __init__(self): sgmllib.SGMLParser.__init__(self)
Example #4
Source File: From incremental-reading with ISC License | 5 votes |
def feed(self, data): data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data) data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') try: bytes if bytes is str: raise NameError self.encoding = self.encoding + '_INVALID_PYTHON_3' except NameError: if self.encoding and isinstance(data, str): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self)
Example #5
Source File: From Cloudmare with GNU General Public License v3.0 | 5 votes |
def __getattr__(self, methodName): """This method routes method call requests to either the SGMLParser superclass or the Tag superclass, depending on the method name.""" #print "__getattr__ called on %s.%s" % (self.__class__, methodName) if methodName.startswith('start_') or methodName.startswith('end_') \ or methodName.startswith('do_'): return sgmllib.SGMLParser.__getattr__(self, methodName) elif not methodName.startswith('__'): return Tag.__getattr__(self, methodName) else: raise AttributeError
Example #6
Source File: From Cloudmare with GNU General Public License v3.0 | 5 votes |
def reset(self): Tag.__init__(self, self, self.ROOT_TAG_NAME) self.hidden = 1 sgmllib.SGMLParser.reset(self) self.currentData = [] self.currentTag = None self.tagStack = [] self.quoteStack = [] self.pushTag(self)
Example #7
Source File: From Cloudmare with GNU General Public License v3.0 | 5 votes |
def _feed(self, inDocumentEncoding=None, isHTML=False): # Convert the document to Unicode. markup = self.markup if isinstance(markup, text_type): if not hasattr(self, 'originalEncoding'): self.originalEncoding = None else: dammit = UnicodeDammit\ (markup, [self.fromEncoding, inDocumentEncoding], smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) markup = dammit.unicode self.originalEncoding = dammit.originalEncoding self.declaredHTMLEncoding = dammit.declaredHTMLEncoding if markup: if self.markupMassage: if not hasattr(self.markupMassage, "__iter__"): self.markupMassage = self.MARKUP_MASSAGE for fix, m in self.markupMassage: markup = fix.sub(m, markup) # TODO: We get rid of markupMassage so that the # soup object can be deepcopied later on. Some # Python installations can't copy regexes. If anyone # was relying on the existence of markupMassage, this # might cause problems. del(self.markupMassage) self.reset() sgmllib.SGMLParser.feed(self, markup) # Close out any unfinished strings and close all the open tags. self.endData() while != self.ROOT_TAG_NAME: self.popTag()
Example #8
Source File: From POC-EXP with GNU General Public License v3.0 | 5 votes |
def feed(self, data): try: sgmllib.SGMLParser.feed(self, data) except SGMLLIB_PARSEERROR, exc: raise ParseError(exc)
Example #9
Source File: From POC-EXP with GNU General Public License v3.0 | 5 votes |
def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): sgmllib.SGMLParser.__init__(self) _AbstractFormParser.__init__(self, entitydefs, encoding)
Example #10
Source File: From incremental-reading with ISC License | 5 votes |
def __init__(self, baseuri, baselang, encoding, entities): sgmllib.SGMLParser.__init__(self) _FeedParserMixin.__init__(self, baseuri, baselang, encoding) _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') self.entities=entities
Example #11
Source File: From incremental-reading with ISC License | 5 votes |
def parse_declaration(self, i): try: return sgmllib.SGMLParser.parse_declaration(self, i) except sgmllib.SGMLParseError: # escape the doctype declaration and continue parsing self.handle_data('<') return i+1
Example #12
Source File: From POC-EXP with GNU General Public License v3.0 | 5 votes |
def close(self): sgmllib.SGMLParser.close(self) self.end_body() # sigh, must support mechanize by allowing dynamic creation of classes based on # its bundled copy of BeautifulSoup (which was necessary because of dependency # problems)
Example #13
Source File: From medicare-demo with Apache License 2.0 | 5 votes |
def reset(self): sgmllib.SGMLParser.reset(self) self.savedata = None self.isindex = 0 self.title = None self.base = None self.anchor = None self.anchorlist = [] self.nofill = 0 self.list_stack = [] # ------ Methods used internally; some may be overridden # --- Formatter interface, taking care of 'savedata' mode; # shouldn't need to be overridden
Example #14
Source File: From medicare-demo with Apache License 2.0 | 5 votes |
def __init__(self, formatter, verbose=0): """Creates an instance of the HTMLParser class. The formatter parameter is the formatter instance associated with the parser. """ sgmllib.SGMLParser.__init__(self, verbose) self.formatter = formatter
Example #15
Source File: From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def reset(self): sgmllib.SGMLParser.reset(self) self.savedata = None self.isindex = 0 self.title = None self.base = None self.anchor = None self.anchorlist = [] self.nofill = 0 self.list_stack = [] # ------ Methods used internally; some may be overridden # --- Formatter interface, taking care of 'savedata' mode; # shouldn't need to be overridden
Example #16
Source File: From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, formatter, verbose=0): """Creates an instance of the HTMLParser class. The formatter parameter is the formatter instance associated with the parser. """ sgmllib.SGMLParser.__init__(self, verbose) self.formatter = formatter
Example #17
Source File: From flo with MIT License | 5 votes |
def __init__(self, writer): sgmllib.SGMLParser.__init__(self) self.writer = writer self.document_id = None self.in_body = False self.text = ''
Example #18
Source File: From datafari with Apache License 2.0 | 5 votes |
def reset(self): sgmllib.SGMLParser.reset(self) self.savedata = None self.isindex = 0 self.title = None self.base = None self.anchor = None self.anchorlist = [] self.nofill = 0 self.list_stack = [] # ------ Methods used internally; some may be overridden # --- Formatter interface, taking care of 'savedata' mode; # shouldn't need to be overridden
Example #19
Source File: From datafari with Apache License 2.0 | 5 votes |
def __init__(self, formatter, verbose=0): """Creates an instance of the HTMLParser class. The formatter parameter is the formatter instance associated with the parser. """ sgmllib.SGMLParser.__init__(self, verbose) self.formatter = formatter
Example #20
Source File: From datafari with Apache License 2.0 | 5 votes |
def __init__(self, url, verbose=VERBOSE, checker=None): self.myverbose = verbose # now unused self.checker = checker self.base = None self.links = {} self.names = [] self.url = url sgmllib.SGMLParser.__init__(self)
Example #21
Source File: From d4rkc0de with GNU General Public License v2.0 | 5 votes |
def __init__(self, verbose=0): "Initialise an object, passing 'verbose' to the superclass." sgmllib.SGMLParser.__init__(self, verbose) self.hyperlinks = []
Example #22
Source File: From SCDV with MIT License | 5 votes |
def __init__(self, verbose=0): sgmllib.SGMLParser.__init__(self, verbose) self._reset()
Example #23
Source File: From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def __init__(self, baseuri, baselang, encoding, entities): sgmllib.SGMLParser.__init__(self) _FeedParserMixin.__init__(self, baseuri, baselang, encoding) _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') self.entities=entities
Example #24
Source File: From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def parse_declaration(self, i): try: return sgmllib.SGMLParser.parse_declaration(self, i) except sgmllib.SGMLParseError: # escape the doctype declaration and continue parsing self.handle_data('<') return i+1
Example #25
Source File: From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def feed(self, data): data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data) data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') try: bytes if bytes is str: raise NameError self.encoding = self.encoding + u'_INVALID_PYTHON_3' except NameError: if self.encoding and isinstance(data, unicode): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self)
Example #26
Source File: From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def reset(self): self.pieces = [] sgmllib.SGMLParser.reset(self)
Example #27
Source File: From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def __init__(self, encoding, _type): self.encoding = encoding self._type = _type sgmllib.SGMLParser.__init__(self)
Example #28
Source File: From pyrobotlab with Apache License 2.0 | 5 votes |
def __init__(self, baseuri, baselang, encoding): sgmllib.SGMLParser.__init__(self) _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
Example #29
Source File: From pyrobotlab with Apache License 2.0 | 5 votes |
def feed(self, data): data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data) #data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') if self.encoding and type(data) == type(u''): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data)
Example #30
Source File: From pyrobotlab with Apache License 2.0 | 5 votes |
def reset(self): self.pieces = [] sgmllib.SGMLParser.reset(self)