Python html.parser.HTMLParser.reset() Examples
The following are 24
code examples of html.parser.HTMLParser.reset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
html.parser.HTMLParser
, or try the search function
.
Example #1
Source File: paraparser.py From Fluid-Designer with GNU General Public License v3.0 | 6 votes |
def start_seq(self, attr): #if it has a template, use that; otherwise try for id; #otherwise take default sequence if 'template' in attr: templ = attr['template'] self.handle_data(templ % self._seq) return elif 'id' in attr: id = attr['id'] else: id = None increment = attr.get('inc', None) if not increment: output = self._seq.nextf(id) else: #accepts "no" for do not increment, or an integer. #thus, 0 and 1 increment by the right amounts. if increment.lower() == 'no': output = self._seq.thisf(id) else: incr = int(increment) output = self._seq.thisf(id) self._seq.reset(id, self._seq._this() + incr) self.handle_data(output)
Example #2
Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def _reset(self): HTMLParser.reset(self) self.currentIndentLevel = 0 self.parsedData = [] self._inTag = [] self.root = None self.doctype = None self.inPreformatted = 0
Example #3
Source File: paraparser.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def _reset(self, style): '''reset the parser''' HTMLParser.reset(self) # initialize list of string segments to empty self.errors = [] self._style = style self._iReset() #----------------------------------------------------------------
Example #4
Source File: paraparser.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def start_seqreset(self, attr): try: id = attr['id'] except KeyError: id = None try: base = int(attr['base']) except: base=0 self._seq.reset(id, base)
Example #5
Source File: oxford.py From awesometts-anki-addon with GNU General Public License v3.0 | 5 votes |
def reset(self): HTMLParser.reset(self) self.sounds = [] self.prev_tag = ""
Example #6
Source File: cambridge.py From awesometts-anki-addon with GNU General Public License v3.0 | 5 votes |
def run(self, text, options, path): """ Downloads from Cambridge Dictionary directly to an MP3. """ dict_url = 'https://dictionary.cambridge.org/de/worterbuch/englisch/%s' % ( quote(text.encode('utf-8')) ) html_payload = self.net_stream(dict_url) if options['voice'] == 'en-US': initial_class = 'us dpron-i ' else: initial_class = 'uk dpron-i ' parser = CambridgeLister(initial_class) parser.feed(html_payload.decode('utf-8')) parser.close() if parser.sound_file != None: sound_url = 'https://dictionary.cambridge.org' + parser.sound_file #print(f'sound_url: {sound_url}') self.net_download( path, sound_url, add_padding=True, require=dict(mime='audio/mpeg', size=1024), ) parser.reset() else: raise IOError(f"Could not extract audio for voice {options['voice']} from Cambridge dictionary on page {dict_url}. You can try the en-US voice.")
Example #7
Source File: cambridge.py From awesometts-anki-addon with GNU General Public License v3.0 | 5 votes |
def reset(self): HTMLParser.reset(self)
Example #8
Source File: spruce.py From Spruce with GNU General Public License v3.0 | 5 votes |
def reset(self): """Manage data state to know when we are in the version span.""" HTMLParser.reset(self) self.in_version_span = False
Example #9
Source File: parsers.py From riko with MIT License | 5 votes |
def reset(self): HTMLParser.reset(self) self.data = StringIO()
Example #10
Source File: autorss.py From riko with MIT License | 5 votes |
def reset(self): HTMLParser.reset(self) self.entry = iter([])
Example #11
Source File: htmlparser.py From semeval2019-hyperpartisan-bertha-von-suttner with Apache License 2.0 | 5 votes |
def reset(self): HTMLParser.reset(self) self.data = [] self.p = []
Example #12
Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def parseStr(self, html): ''' parseStr - Parses a string and creates the DOM tree and indexes. @param html <str> - valid HTML ''' self.reset() if isinstance(html, bytes): self.feed(html.decode(self.encoding)) else: self.feed(html)
Example #13
Source File: util.py From arch-security-tracker with MIT License | 5 votes |
def reset(self): self.elements = [] self.processing = [] HTMLParser.reset(self)
Example #14
Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def feed(self, contents): ''' feed - Load contents @param contents - HTML contents ''' contents = stripIEConditionals(contents) try: HTMLParser.feed(self, contents) except MultipleRootNodeException: self.reset() HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END))
Example #15
Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def __init__(self, indent=' ', encoding='utf-8'): ''' Create a pretty formatter. @param indent <str/int>, Default ' ' [4 spaces] - Either a space/tab/newline that represents one level of indent, or an integer to use that number of spaces @param encoding <str/None>, Default 'utf-8', - Use this encoding for the document. None to not mess with encoding ''' HTMLParser.__init__(self) # Do not automatically convert charrefs in python3 self.convert_charrefs = False self.parsedData = [] self.reset = self._reset self.decl = None self.currentIndentLevel = 0 self.indent = indent self.encoding = encoding self.inPreformatted = 0 self.root = None self._inTag = [] self.doctype = None
Example #16
Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def _reset(self): ''' _reset - reset this object. Assigned to .reset after __init__ call. ''' AdvancedHTMLParser.reset(self) self._resetIndexInternal() #vim: set ts=4 sw=4 expandtab
Example #17
Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def parseStr(self, html): ''' parseStr - Parses a string and creates the DOM tree and indexes. @param html <str> - valid HTML ''' self.reset() if isinstance(html, bytes): self.feed(html.decode(self.encoding)) else: self.feed(html)
Example #18
Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def feed(self, contents): ''' feed - Feed contents. Use parseStr or parseFile instead. @param contents - Contents ''' contents = stripIEConditionals(contents) try: HTMLParser.feed(self, contents) except MultipleRootNodeException: self.reset() HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END))
Example #19
Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def _reset(self): ''' _reset - reset this object. Assigned to .reset after __init__ call. ''' HTMLParser.reset(self) self.root = None self.doctype = None self._inTag = []
Example #20
Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def __setstate__(self, state): ''' __setstate - Restore state for loading pickle @param state <dict> - The state ''' for key, value in state.items(): setattr(self, key, value) # Python2 compat self.reset = self._reset
Example #21
Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def __getstate__(self): ''' __getstate__ - Get state for pickling @return <dict> ''' state = self.__dict__ # Python2 compat del state['reset'] return state
Example #22
Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0 | 5 votes |
def __init__(self, filename=None, encoding='utf-8'): ''' __init__ - Creates an Advanced HTML parser object. For read-only parsing, consider IndexedAdvancedHTMLParser for faster searching. @param filename <str> - Optional filename to parse. Otherwise use parseFile or parseStr methods. @param encoding <str> - Specifies the document encoding. Default utf-8 ''' HTMLParser.__init__(self) # Do not automatically convert charrefs in python3 self.convert_charrefs = False # Encoding to use for this document self.encoding = encoding self._inTag = [] self.root = None self.doctype = None self.reset = self._reset # Must assign after first call, otherwise members won't yet be present if filename is not None: self.parseFile(filename) ########################################### ##### INTERNAL ####### ###########################################
Example #23
Source File: HTMLDocsParser.py From cFS-GroundSystem with Apache License 2.0 | 5 votes |
def reset(self): self.allData = [] HTMLParser.reset(self) # # Appends HTML file content to allData variable #
Example #24
Source File: CommandParser.py From cFS-GroundSystem with Apache License 2.0 | 5 votes |
def reset(self): self.allData = [] self.allhref = [] HTMLParser.reset(self) # # Appends HTML file content to allData variable #