Python html.parser.HTMLParser.reset() Examples

The following are 24 code examples of html.parser.HTMLParser.reset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module html.parser.HTMLParser , or try the search function .
Example #1
Source File: paraparser.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def start_seq(self, attr):
        #if it has a template, use that; otherwise try for id;
        #otherwise take default sequence
        if 'template' in attr:
            templ = attr['template']
            self.handle_data(templ % self._seq)
            return
        elif 'id' in attr:
            id = attr['id']
        else:
            id = None
        increment = attr.get('inc', None)
        if not increment:
            output = self._seq.nextf(id)
        else:
            #accepts "no" for do not increment, or an integer.
            #thus, 0 and 1 increment by the right amounts.
            if increment.lower() == 'no':
                output = self._seq.thisf(id)
            else:
                incr = int(increment)
                output = self._seq.thisf(id)
                self._seq.reset(id, self._seq._this() + incr)
        self.handle_data(output) 
Example #2
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _reset(self):
        HTMLParser.reset(self)
        self.currentIndentLevel = 0
        self.parsedData = []
        self._inTag = []
        self.root = None
        self.doctype = None
        self.inPreformatted = 0 
Example #3
Source File: paraparser.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def _reset(self, style):
        '''reset the parser'''

        HTMLParser.reset(self)
        # initialize list of string segments to empty
        self.errors = []
        self._style = style
        self._iReset()

    #---------------------------------------------------------------- 
Example #4
Source File: paraparser.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def start_seqreset(self, attr):
        try:
            id = attr['id']
        except KeyError:
            id = None
        try:
            base = int(attr['base'])
        except:
            base=0
        self._seq.reset(id, base) 
Example #5
Source File: oxford.py    From awesometts-anki-addon with GNU General Public License v3.0 5 votes vote down vote up
def reset(self):
        HTMLParser.reset(self)
        self.sounds = []
        self.prev_tag = "" 
Example #6
Source File: cambridge.py    From awesometts-anki-addon with GNU General Public License v3.0 5 votes vote down vote up
def run(self, text, options, path):
        """
        Downloads from Cambridge Dictionary directly to an MP3.
        """

        dict_url = 'https://dictionary.cambridge.org/de/worterbuch/englisch/%s' % (
            quote(text.encode('utf-8'))
        )
        html_payload = self.net_stream(dict_url)

        if options['voice'] == 'en-US':
            initial_class = 'us dpron-i '
        else:
            initial_class = 'uk dpron-i '

        parser = CambridgeLister(initial_class)
        parser.feed(html_payload.decode('utf-8'))
        parser.close()

        if parser.sound_file != None:
            sound_url = 'https://dictionary.cambridge.org' + parser.sound_file
            #print(f'sound_url: {sound_url}')

            self.net_download(
                path,
                sound_url,
                add_padding=True,
                require=dict(mime='audio/mpeg', size=1024),
            )
            parser.reset()
        else:
            raise IOError(f"Could not extract audio for voice {options['voice']} from Cambridge dictionary on page {dict_url}. You can try the en-US voice.") 
Example #7
Source File: cambridge.py    From awesometts-anki-addon with GNU General Public License v3.0 5 votes vote down vote up
def reset(self):
        HTMLParser.reset(self) 
Example #8
Source File: spruce.py    From Spruce with GNU General Public License v3.0 5 votes vote down vote up
def reset(self):
        """Manage data state to know when we are in the version span."""
        HTMLParser.reset(self)
        self.in_version_span = False 
Example #9
Source File: parsers.py    From riko with MIT License 5 votes vote down vote up
def reset(self):
        HTMLParser.reset(self)
        self.data = StringIO() 
Example #10
Source File: autorss.py    From riko with MIT License 5 votes vote down vote up
def reset(self):
        HTMLParser.reset(self)
        self.entry = iter([]) 
Example #11
Source File: htmlparser.py    From semeval2019-hyperpartisan-bertha-von-suttner with Apache License 2.0 5 votes vote down vote up
def reset(self):
        HTMLParser.reset(self)
        self.data = []
        self.p = [] 
Example #12
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def parseStr(self, html):
        '''
            parseStr - Parses a string and creates the DOM tree and indexes.

                @param html <str> - valid HTML
        '''
        self.reset()
        if isinstance(html, bytes):
            self.feed(html.decode(self.encoding))
        else:
            self.feed(html) 
Example #13
Source File: util.py    From arch-security-tracker with MIT License 5 votes vote down vote up
def reset(self):
        self.elements = []
        self.processing = []
        HTMLParser.reset(self) 
Example #14
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def feed(self, contents):
        '''
            feed - Load contents

            @param contents - HTML contents
        '''
        contents = stripIEConditionals(contents)
        try:
            HTMLParser.feed(self, contents)
        except MultipleRootNodeException:
            self.reset()

            HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END)) 
Example #15
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, indent='  ', encoding='utf-8'):
        '''
            Create a pretty formatter.

            @param indent <str/int>, Default '    ' [4 spaces] - Either a space/tab/newline that represents one level of indent, or an integer to use that number of spaces

            @param encoding <str/None>, Default 'utf-8', - Use this encoding for the document. None to not mess with encoding
        '''
        HTMLParser.__init__(self)

        # Do not automatically convert charrefs in python3
        self.convert_charrefs = False

        self.parsedData = []
        self.reset = self._reset
        self.decl = None
        self.currentIndentLevel = 0
        self.indent = indent
        self.encoding = encoding

        self.inPreformatted = 0

        self.root = None

        self._inTag = []
        self.doctype = None 
Example #16
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _reset(self):
        '''
            _reset - reset this object. Assigned to .reset after __init__ call.
        '''
        AdvancedHTMLParser.reset(self)

        self._resetIndexInternal()

#vim: set ts=4 sw=4 expandtab 
Example #17
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def parseStr(self, html):
        '''
            parseStr - Parses a string and creates the DOM tree and indexes.

                @param html <str> - valid HTML
        '''
        self.reset()

        if isinstance(html, bytes):
            self.feed(html.decode(self.encoding))
        else:
            self.feed(html) 
Example #18
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def feed(self, contents):
        '''
            feed - Feed contents. Use  parseStr or parseFile instead.

            @param contents - Contents
        '''
        contents = stripIEConditionals(contents)
        try:
            HTMLParser.feed(self, contents)
        except MultipleRootNodeException:
            self.reset()
            HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END)) 
Example #19
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _reset(self):
        '''
            _reset - reset this object. Assigned to .reset after __init__ call.
        '''
        HTMLParser.reset(self)

        self.root = None
        self.doctype = None
        self._inTag = [] 
Example #20
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __setstate__(self, state):
        '''
            __setstate - Restore state for loading pickle

                @param state <dict> - The state
        '''
        for key, value in state.items():
            setattr(self, key, value)

        # Python2 compat
        self.reset = self._reset 
Example #21
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __getstate__(self):
        '''
            __getstate__ - Get state for pickling

                @return <dict>
        '''
        state = self.__dict__

        # Python2 compat
        del state['reset']

        return state 
Example #22
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, filename=None, encoding='utf-8'):
        '''
            __init__ - Creates an Advanced HTML parser object. For read-only parsing, consider IndexedAdvancedHTMLParser for faster searching.

                @param filename <str>         - Optional filename to parse. Otherwise use parseFile or parseStr methods.
                @param encoding <str>         - Specifies the document encoding. Default utf-8

        '''
        HTMLParser.__init__(self)
        # Do not automatically convert charrefs in python3
        self.convert_charrefs = False

        # Encoding to use for this document
        self.encoding = encoding

        self._inTag = []
        self.root = None
        self.doctype = None

        self.reset = self._reset # Must assign after first call, otherwise members won't yet be present

        if filename is not None:
            self.parseFile(filename)

###########################################
#####        INTERNAL               #######
########################################### 
Example #23
Source File: HTMLDocsParser.py    From cFS-GroundSystem with Apache License 2.0 5 votes vote down vote up
def reset(self):
        self.allData = []
        HTMLParser.reset(self)

    #
    # Appends HTML file content to allData variable
    # 
Example #24
Source File: CommandParser.py    From cFS-GroundSystem with Apache License 2.0 5 votes vote down vote up
def reset(self):
        self.allData = []
        self.allhref = []
        HTMLParser.reset(self)

    #
    # Appends HTML file content to allData variable
    #