Python html.parser.HTMLParser.__init__() Examples

The following are 30 code examples of html.parser.HTMLParser.__init__(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module html.parser.HTMLParser , or try the search function .
Example #1
Source File: alerts.py    From elastalert with Apache License 2.0 6 votes vote down vote up
def __init__(self, *args):
        super(EmailAlerter, self).__init__(*args)

        self.smtp_host = self.rule.get('smtp_host', 'localhost')
        self.smtp_ssl = self.rule.get('smtp_ssl', False)
        self.from_addr = self.rule.get('from_addr', 'ElastAlert')
        self.smtp_port = self.rule.get('smtp_port')
        if self.rule.get('smtp_auth_file'):
            self.get_account(self.rule['smtp_auth_file'])
        self.smtp_key_file = self.rule.get('smtp_key_file')
        self.smtp_cert_file = self.rule.get('smtp_cert_file')
        # Convert email to a list if it isn't already
        if isinstance(self.rule['email'], str):
            self.rule['email'] = [self.rule['email']]
        # If there is a cc then also convert it a list if it isn't
        cc = self.rule.get('cc')
        if cc and isinstance(cc, str):
            self.rule['cc'] = [self.rule['cc']]
        # If there is a bcc then also convert it to a list if it isn't
        bcc = self.rule.get('bcc')
        if bcc and isinstance(bcc, str):
            self.rule['bcc'] = [self.rule['bcc']]
        add_suffix = self.rule.get('email_add_domain')
        if add_suffix and not add_suffix.startswith('@'):
            self.rule['email_add_domain'] = '@' + add_suffix 
Example #2
Source File: parser.py    From html-table-parser-python3 with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(
        self,
        decode_html_entities=False,
        data_separator=' ',
    ):

        HTMLParser.__init__(self, convert_charrefs=decode_html_entities)

        self._data_separator = data_separator

        self._in_td = False
        self._in_th = False
        self._current_table = []
        self._current_row = []
        self._current_cell = []
        self.tables = [] 
Example #3
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 6 votes vote down vote up
def __init__(self, *args, **kwargs):
        '''
            __init__ - Create an AdvancedTagSlim object.

                @see AdvancedTag

                Extra arguments:

                  slimSelfClosing <bool> default False - If True, will use slim-endings on self-closing tags,

                    i.e. <br/> instead of <br />

                    This may break xhtml compatibility but modern browsers are okay with it.
        '''
        if 'slimSelfClosing' in kwargs:
            slimSelfClosing = kwargs.pop('slimSelfClosing')
        else:
            slimSelfClosing = False
        AdvancedTag.__init__(self, *args, **kwargs)

        object.__setattr__(self, 'slimSelfClosing', slimSelfClosing) 
Example #4
Source File: spruce.py    From Spruce with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, obj_type, results, heading, metadata):
        """Init our data structure.

        Args:
            obj_type: String object type name (as returned by
                device_type)
            results: A list of Result objects to include in the
                report.
            heading: String heading describing the report.
            metadata: Dictionary of other data you want to output.
                key: Heading name.
                val Another dictionary, with:
                    key: Subheading name.
                    val: String of data to print.
        """
        self.obj_type = obj_type
        self.results = results
        self.heading = heading
        self.metadata = metadata 
Example #5
Source File: HTML.py    From Jtyoui with MIT License 6 votes vote down vote up
def __init__(self, start_tag, start_attr, end_tag, end_attr):
        """使用相当简单

        html = '<div class="declare" id="J-declare">声明:百科词条人人可编辑。<a class="declare-details"></a>'
        p = ParseHtml('div', ['class="declare"'], 'a', ['class="declare-details"'])
        p.feed(html)
        print(p.get_data())  # 声明:百科词条人人可编辑。

        :param start_tag: 开始标签,必须填写
        :param start_attr: 开始标签里面的属性,切记一定是列表[]类型。如果没有,传入空列表[]
        :param end_tag: 结束标签,必须填写
        :param end_attr: 结束标签里面的属性,切记一定是列表[]类型。如果没有,传入空列表[]
        """
        HTMLParser.__init__(self)
        self._data = ''
        self._flag = False
        self._start_tag = start_tag
        self._start_attr = self._split(start_attr)
        self._end_tag = end_tag
        self._end_attr = self._split(end_attr) 
Example #6
Source File: ehp.py    From ehp with MIT License 6 votes vote down vote up
def __init__(self, data):
        """
        The data holds the characters.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        x = dom.fst('em')
        x.append(Data('\nbeta'))

        It outputs.

        <body ><em >alpha
        beta</em></body>
        """

        Root.__init__(self, DATA)
        self.data = data 
Example #7
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self):
        """
        Initializes outmost which is the struct which will 
        hold all data inside the file.
        """

        self.outmost = Root('')

        self.stack = deque()
        self.stack.append(self.outmost) 
Example #8
Source File: alerts.py    From elastalert with Apache License 2.0 5 votes vote down vote up
def __init__(self, rule):
        self.rule = rule
        # pipeline object is created by ElastAlerter.send_alert()
        # and attached to each alerters used by a rule before calling alert()
        self.pipeline = None
        self.resolve_rule_references(self.rule) 
Example #9
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self):
        HTMLParser.__init__(self)
        self.struct = Tree() 
Example #10
Source File: gencon-hotel-check.py    From gencon-hotel-check with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, resp):
		HTMLParser.__init__(self)
		self.json = None
		self.feed(resp.read().decode('utf8'))
		self.close() 
Example #11
Source File: html.py    From selenium-python-helium with MIT License 5 votes vote down vote up
def __init__(self):
		HTMLParser.__init__(self)
		self.reset()
		self.fed = [] 
Example #12
Source File: help.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, parent, filename):
        "Configure tags and feed file to parser."
        uwide = idleConf.GetOption('main', 'EditorWindow', 'width', type='int')
        uhigh = idleConf.GetOption('main', 'EditorWindow', 'height', type='int')
        uhigh = 3 * uhigh // 4  # lines average 4/3 of editor line height
        Text.__init__(self, parent, wrap='word', highlightthickness=0,
                      padx=5, borderwidth=0, width=uwide, height=uhigh)

        normalfont = self.findfont(['TkDefaultFont', 'arial', 'helvetica'])
        fixedfont = self.findfont(['TkFixedFont', 'monaco', 'courier'])
        self['font'] = (normalfont, 12)
        self.tag_configure('em', font=(normalfont, 12, 'italic'))
        self.tag_configure('h1', font=(normalfont, 20, 'bold'))
        self.tag_configure('h2', font=(normalfont, 18, 'bold'))
        self.tag_configure('h3', font=(normalfont, 15, 'bold'))
        self.tag_configure('pre', font=(fixedfont, 12), background='#f6f6ff')
        self.tag_configure('preblock', font=(fixedfont, 10), lmargin1=25,
                borderwidth=1, relief='solid', background='#eeffcc')
        self.tag_configure('l1', lmargin1=25, lmargin2=25)
        self.tag_configure('l2', lmargin1=50, lmargin2=50)
        self.tag_configure('l3', lmargin1=75, lmargin2=75)
        self.tag_configure('l4', lmargin1=100, lmargin2=100)

        self.parser = HelpParser(self)
        with open(filename, encoding='utf-8') as f:
            contents = f.read()
        self.parser.feed(contents)
        self['state'] = 'disabled' 
Example #13
Source File: formatting.py    From CloudBot with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self):
        HTMLParser.__init__(self)
        self.result = [] 
Example #14
Source File: paraparser.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self,v):
        self._value = v*0.01 
Example #15
Source File: paraparser.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self,verbose=0, caseSensitive=0, ignoreUnknownTags=1):
        HTMLParser.__init__(self,
            **(dict(convert_charrefs=False) if sys.version_info>=(3,4) else {}))
        self.verbose = verbose
        #HTMLParser is case insenstive anyway, but the rml interface still needs this
        #all start/end_ methods should have a lower case version for HMTMParser
        self.caseSensitive = caseSensitive
        self.ignoreUnknownTags = ignoreUnknownTags 
Example #16
Source File: help.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, text):
        HTMLParser.__init__(self, convert_charrefs=True)
        self.text = text         # text widget we're rendering into
        self.tags = ''           # current block level text tags to apply
        self.chartags = ''       # current character level text tags
        self.show = False        # used so we exclude page navigation
        self.hdrlink = False     # used so we don't show header links
        self.level = 0           # indentation level
        self.pre = False         # displaying preformatted text
        self.hprefix = ''        # prefix such as '25.5' to strip from headings
        self.nested_dl = False   # if we're in a nested <dl>
        self.simplelist = False  # simple list (no double spacing)
        self.toc = []            # pair headers with text indexes for toc
        self.header = ''         # text within header tags for toc 
Example #17
Source File: alerts.py    From elastalert with Apache License 2.0 5 votes vote down vote up
def __init__(self, rule, match):
        self.rule = rule
        self.match = match 
Example #18
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, filename=None, encoding='utf-8', indexIDs=True, indexNames=True, indexClassNames=True, indexTagNames=True):
        '''
            __init__ - Creates an Advanced HTML parser object, with specific indexing settings.

                For the various index* arguments, if True the index will be collected and use (if useIndex=True [default] on get* function)

                @param filename <str>         - Optional filename to parse. Otherwise use parseFile or parseStr methods.
                @param encoding <str> - Specifies the document encoding. Default utf-8
                @param indexIDs <bool>        - True to create an index for getElementByID method.  <default True>
                @param indexNames <bool>      - True to create an index for getElementsByName method  <default True>
                @param indexClassNames <bool> - True to create an index for getElementsByClassName method. <default True>
                @param indexTagNames <bool>   - True to create an index for tag names. <default True>

                For indexing other attributes, see the more generic addIndexOnAttribute

        '''
        self.indexFunctions = []
        self.otherAttributeIndexFunctions = {}
        self._otherAttributeIndexes = {}
        self.indexIDs = indexIDs
        self.indexNames = indexNames
        self.indexClassNames = indexClassNames
        self.indexTagNames = indexTagNames

        self._resetIndexInternal()

        AdvancedHTMLParser.__init__(self, filename, encoding)

        if filename is not None:
            self.parseFile(filename)

###########################################
#####        INTERNAL               #######
########################################### 
Example #19
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, COMMENT)
        self.data = data 
Example #20
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, PI)
        self.data = data 
Example #21
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, AMP)
        self.data = data 
Example #22
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self, data):
        Root.__init__(self, META)
        self.data = data 
Example #23
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self, name, attr={}):
        """
        See help(Tag).
        """
        Root.__init__(self, name, attr) 
Example #24
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self, name, attr={}):
        """
        The parameter name is the xml/html tag's name.

        Example:

        d = {'style': 'background:blue;'}
        x = Tag('p', d)
        """
        Root.__init__(self, name, attr) 
Example #25
Source File: ehp.py    From ehp with MIT License 5 votes vote down vote up
def __init__(self, name=None, attr={}):
        """
        """

        self.name = name
        self.attr = Attribute(attr)

        list.__init__(self) 
Example #26
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, encoding='utf-8', slimSelfClosing=False):
        '''
            __init__ - Create an AdvancedHTMLSlimTagMiniFormatter

                @see AdvancedHTMLMiniFormatter

                @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags,

                    e.x. <br /> becomes <br/>
        '''

        AdvancedHTMLMiniFormatter.__init__(self, encoding=encoding)

        self.slimSelfClosing = slimSelfClosing 
Example #27
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, indent='    ', encoding='utf-8', slimSelfClosing=False):
        '''
            __init__ - Construct an AdvancedHTMLSlimTagFormatter

                @see AdvancedHTMLFormatter

                @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags,

                    e.x. <br /> becomes <br/>
        '''

        AdvancedHTMLFormatter.__init__(self, indent=indent, encoding=encoding)

        self.slimSelfClosing = slimSelfClosing 
Example #28
Source File: Formatter.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, indent='  ', encoding='utf-8'):
        '''
            Create a pretty formatter.

            @param indent <str/int>, Default '    ' [4 spaces] - Either a space/tab/newline that represents one level of indent, or an integer to use that number of spaces

            @param encoding <str/None>, Default 'utf-8', - Use this encoding for the document. None to not mess with encoding
        '''
        HTMLParser.__init__(self)

        # Do not automatically convert charrefs in python3
        self.convert_charrefs = False

        self.parsedData = []
        self.reset = self._reset
        self.decl = None
        self.currentIndentLevel = 0
        self.indent = indent
        self.encoding = encoding

        self.inPreformatted = 0

        self.root = None

        self._inTag = []
        self.doctype = None 
Example #29
Source File: Parser.py    From AdvancedHTMLParser with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _reset(self):
        '''
            _reset - reset this object. Assigned to .reset after __init__ call.
        '''
        AdvancedHTMLParser.reset(self)

        self._resetIndexInternal()

#vim: set ts=4 sw=4 expandtab 
Example #30
Source File: epr.py    From epr with MIT License 5 votes vote down vote up
def __init__(self):
        HTMLParser.__init__(self)
        self.text = [""]
        self.imgs = []
        self.ishead = False
        self.isinde = False
        self.isbull = False
        self.ispref = False
        self.ishidden = False
        self.idhead = set()
        self.idinde = set()
        self.idbull = set()
        self.idpref = set()