Python HTMLParser.HTMLParser.__init__() Examples

The following are 30 code examples of HTMLParser.HTMLParser.__init__(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module HTMLParser.HTMLParser , or try the search function .
Example #1
Source File: scraping.py    From cryptocurrency_arbitrage with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self):
        HTMLParser.__init__(self)
        self.messageId = None
        self.messageTime = None
        self.messageUser = None
        self.messageText = None
        self.messages = []
        self.reserves24change = None
        self.reservesALFAcashier = None
        self.usersOnline = None
        self.botsOnline = None

        self.inMessageA = False
        self.inMessageSpan = False
        self.in24changeSpan = False
        self.inALFAcashierSpan = False
        self.inUsersOnlineDiv = False

        self.devOnline = False
        self.supportOnline = False
        self.adminOnline = False 
Example #2
Source File: build_perf.py    From fplutil with Apache License 2.0 6 votes vote down vote up
def __init__(self, error, command, returnstatus, stdout=None, stderr=None):
    """Initialize this instance.

    Args:
      error: Error message.
      command: Command that failed.
      returnstatus: Status of the command that failed.
      stdout: Standard output stream of the failed command.
      stderr: Standard error stream of the failed command.
    """
    super(CommandError, self).__init__(error)
    self.command = command
    self.returnstatus = returnstatus
    self.stdout = stdout
    self.stderr = stderr
    self.error = error 
Example #3
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 6 votes vote down vote up
def __init__(self, data):
        """
        The data holds the characters.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        x = dom.fst('em')
        x.append(Data('\nbeta'))

        It outputs.

        <body ><em >alpha
        beta</em></body>
        """

        Root.__init__(self, DATA)
        self.data = data 
Example #4
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 6 votes vote down vote up
def __init__(self, *args, **kwargs):
        '''
            __init__ - Create an AdvancedTagSlim object.

                @see AdvancedTag

                Extra arguments:

                  slimSelfClosing <bool> default False - If True, will use slim-endings on self-closing tags,

                    i.e. <br/> instead of <br />

                    This may break xhtml compatibility but modern browsers are okay with it.
        '''
        if 'slimSelfClosing' in kwargs:
            slimSelfClosing = kwargs.pop('slimSelfClosing')
        else:
            slimSelfClosing = False
        AdvancedTag.__init__(self, *args, **kwargs)

        object.__setattr__(self, 'slimSelfClosing', slimSelfClosing) 
Example #5
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, META)
        self.data = data 
Example #6
Source File: scraping.py    From cryptocurrency_arbitrage with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self):
        self.messages = None
        self.reserves24change = None
        self.reservesALFAcashier = None
        self.usersOnline = None
        self.botsOnline = None
        self.devOnline = False
        self.supportOnline = False
        self.adminOnline = False 
Example #7
Source File: prettytable.py    From smod-1 with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, **kwargs):
        HTMLParser.__init__(self)
        self.kwargs = kwargs
        self.tables = []
        self.last_row = []
        self.rows = []
        self.max_row_width = 0
        self.active = None
        self.last_content = ""
        self.is_last_row_header = False 
Example #8
Source File: paraparser.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self,v):
        self._value = v*0.01 
Example #9
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, COMMENT)
        self.data = data 
Example #10
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 5 votes vote down vote up
def __init__(self, name, attr=None):
        """
        See help(Tag).
        """
        if attr is None:
            attr = {}
        Root.__init__(self, name, attr) 
Example #11
Source File: gencon-hotel-check.py    From gencon-hotel-check with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, resp):
		HTMLParser.__init__(self)
		self.json = None
		self.feed(resp.read().decode('utf8'))
		self.close() 
Example #12
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, AMP)
        self.data = data 
Example #13
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, PI)
        self.data = data 
Example #14
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 5 votes vote down vote up
def __init__(self):
        HTMLParser.__init__(self)
        self.structure = Tree() 
Example #15
Source File: recipe-576884.py    From code with MIT License 5 votes vote down vote up
def __init__(self, starting_url, depth, max_span):
        HTMLParser.__init__(self)
        self.url = starting_url
        self.db = {self.url: 1}
        self.node = [self.url]

        self.depth = depth # recursion depth max
        self.max_span = max_span # max links obtained per url
        self.links_found = 0 
Example #16
Source File: build_perf.py    From fplutil with Apache License 2.0 5 votes vote down vote up
def __init__(self):
    """Initialize this instance."""
    HTMLParser.__init__(self)
    self.tables = collections.defaultdict(list)
    self.reset_parse_state() 
Example #17
Source File: ehp.py    From script.elementum.burst with Do What The F*ck You Want To Public License 5 votes vote down vote up
def __init__(self, name, attr=None):
        """
        The parameter name is the xml/html tag's name.

        Example:

        d = {'style': 'background:blue;'}
        x = Tag('p', d)
        """
        if attr is None:
            attr = {}
        Root.__init__(self, name, attr) 
Example #18
Source File: prettytable.py    From Hatkey with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, **kwargs):
        HTMLParser.__init__(self)
        self.kwargs = kwargs
        self.tables = []
        self.last_row = []
        self.rows = []
        self.max_row_width = 0
        self.active = None
        self.last_content = ""
        self.is_last_row_header = False 
Example #19
Source File: parser.py    From webtech with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self):
        HTMLParser.__init__(self)
        self.meta = {}
        self.scripts = [] 
Example #20
Source File: gsearch.py    From Google-Alfred3-Workflow with MIT License 5 votes vote down vote up
def __init__(self):
        HTMLParser.__init__(self)
        self.result_info = []
        self.link = ''
        self.title = '' 
Example #21
Source File: gsearch.py    From Google-Alfred3-Workflow with MIT License 5 votes vote down vote up
def __init__(self, query, port):

        self.query = query.encode('utf-8')
        self.url = u"http://www.google.com/search?" + \
            urlencode({'q': self.query}) + u"&pws=0&gl=us&gws_rd=cr"
        self.header = 'Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101'
        self.SOCKS5_PROXY_HOST = '127.0.0.1'
        self.SOCKS5_PROXY_PORT = port 
Example #22
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, encoding='utf-8', slimSelfClosing=False):
        '''
            __init__ - Create an AdvancedHTMLSlimTagMiniFormatter

                @see AdvancedHTMLMiniFormatter

                @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags,

                    e.x. <br /> becomes <br/>
        '''

        AdvancedHTMLMiniFormatter.__init__(self, encoding=encoding)

        self.slimSelfClosing = slimSelfClosing 
Example #23
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, indent='    ', encoding='utf-8', slimSelfClosing=False):
        '''
            __init__ - Construct an AdvancedHTMLSlimTagFormatter

                @see AdvancedHTMLFormatter

                @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags,

                    e.x. <br /> becomes <br/>
        '''

        AdvancedHTMLFormatter.__init__(self, indent=indent, encoding=encoding)

        self.slimSelfClosing = slimSelfClosing 
Example #24
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, indent='  ', encoding='utf-8'):
        '''
            Create a pretty formatter.

            @param indent <str/int>, Default '    ' [4 spaces] - Either a space/tab/newline that represents one level of indent, or an integer to use that number of spaces

            @param encoding <str/None>, Default 'utf-8', - Use this encoding for the document. None to not mess with encoding
        '''
        HTMLParser.__init__(self)

        # Do not automatically convert charrefs in python3
        self.convert_charrefs = False

        self.parsedData = []
        self.reset = self._reset
        self.decl = None
        self.currentIndentLevel = 0
        self.indent = indent
        self.encoding = encoding

        self.inPreformatted = 0

        self.root = None

        self._inTag = []
        self.doctype = None 
Example #25
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _reset(self):
        '''
            _reset - reset this object. Assigned to .reset after __init__ call.
        '''
        AdvancedHTMLParser.reset(self)

        self._resetIndexInternal()

#vim: set ts=4 sw=4 expandtab 
Example #26
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, filename=None, encoding='utf-8', indexIDs=True, indexNames=True, indexClassNames=True, indexTagNames=True):
        '''
            __init__ - Creates an Advanced HTML parser object, with specific indexing settings.

                For the various index* arguments, if True the index will be collected and use (if useIndex=True [default] on get* function)

                @param filename <str>         - Optional filename to parse. Otherwise use parseFile or parseStr methods.
                @param encoding <str> - Specifies the document encoding. Default utf-8
                @param indexIDs <bool>        - True to create an index for getElementByID method.  <default True>
                @param indexNames <bool>      - True to create an index for getElementsByName method  <default True>
                @param indexClassNames <bool> - True to create an index for getElementsByClassName method. <default True>
                @param indexTagNames <bool>   - True to create an index for tag names. <default True>

                For indexing other attributes, see the more generic addIndexOnAttribute

        '''
        self.indexFunctions = []
        self.otherAttributeIndexFunctions = {}
        self._otherAttributeIndexes = {}
        self.indexIDs = indexIDs
        self.indexNames = indexNames
        self.indexClassNames = indexClassNames
        self.indexTagNames = indexTagNames

        self._resetIndexInternal()

        AdvancedHTMLParser.__init__(self, filename, encoding)

        if filename is not None:
            self.parseFile(filename)

###########################################
#####        INTERNAL               #######
########################################### 
Example #27
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _reset(self):
        '''
            _reset - reset this object. Assigned to .reset after __init__ call.
        '''
        HTMLParser.reset(self)

        self.root = None
        self.doctype = None
        self._inTag = [] 
Example #28
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, filename=None, encoding='utf-8'):
        '''
            __init__ - Creates an Advanced HTML parser object. For read-only parsing, consider IndexedAdvancedHTMLParser for faster searching.

                @param filename <str>         - Optional filename to parse. Otherwise use parseFile or parseStr methods.
                @param encoding <str>         - Specifies the document encoding. Default utf-8

        '''
        HTMLParser.__init__(self)
        # Do not automatically convert charrefs in python3
        self.convert_charrefs = False

        # Encoding to use for this document
        self.encoding = encoding

        self._inTag = []
        self.root = None
        self.doctype = None

        self.reset = self._reset # Must assign after first call, otherwise members won't yet be present

        if filename is not None:
            self.parseFile(filename)

###########################################
#####        INTERNAL               #######
########################################### 
Example #29
Source File: prettytable.py    From paper.io.sessdsa with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, **kwargs):
        HTMLParser.__init__(self)
        self.kwargs = kwargs
        self.tables = []
        self.last_row = []
        self.rows = []
        self.max_row_width = 0
        self.active = None
        self.last_content = ""
        self.is_last_row_header = False 
Example #30
Source File: fb_parser.py    From facebook-discussion-tk with MIT License 5 votes vote down vote up
def __init__(self, callback_found_name, callback_found_fb_id):
        HTMLParser.__init__(self)
        self.callback_found_name = callback_found_name
        self.callback_found_fb_id = callback_found_fb_id
        self.tag_level_a = 0
        self.tag_level_a_target = None