Python Examples of html.parser.HTMLParser.__init_

Source File: alerts.py From elastalert with Apache License 2.0

6 votes

def __init__(self, *args):
        super(EmailAlerter, self).__init__(*args)

        self.smtp_host = self.rule.get('smtp_host', 'localhost')
        self.smtp_ssl = self.rule.get('smtp_ssl', False)
        self.from_addr = self.rule.get('from_addr', 'ElastAlert')
        self.smtp_port = self.rule.get('smtp_port')
        if self.rule.get('smtp_auth_file'):
            self.get_account(self.rule['smtp_auth_file'])
        self.smtp_key_file = self.rule.get('smtp_key_file')
        self.smtp_cert_file = self.rule.get('smtp_cert_file')
        # Convert email to a list if it isn't already
        if isinstance(self.rule['email'], str):
            self.rule['email'] = [self.rule['email']]
        # If there is a cc then also convert it a list if it isn't
        cc = self.rule.get('cc')
        if cc and isinstance(cc, str):
            self.rule['cc'] = [self.rule['cc']]
        # If there is a bcc then also convert it to a list if it isn't
        bcc = self.rule.get('bcc')
        if bcc and isinstance(bcc, str):
            self.rule['bcc'] = [self.rule['bcc']]
        add_suffix = self.rule.get('email_add_domain')
        if add_suffix and not add_suffix.startswith('@'):
            self.rule['email_add_domain'] = '@' + add_suffix

Source File: parser.py From html-table-parser-python3 with GNU Affero General Public License v3.0

6 votes

def __init__(
        self,
        decode_html_entities=False,
        data_separator=' ',
    ):

        HTMLParser.__init__(self, convert_charrefs=decode_html_entities)

        self._data_separator = data_separator

        self._in_td = False
        self._in_th = False
        self._current_table = []
        self._current_row = []
        self._current_cell = []
        self.tables = []

Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0

6 votes

def __init__(self, *args, **kwargs):
        '''
            __init__ - Create an AdvancedTagSlim object.

                @see AdvancedTag

                Extra arguments:

                  slimSelfClosing <bool> default False - If True, will use slim-endings on self-closing tags,

                    i.e. <br/> instead of <br />

                    This may break xhtml compatibility but modern browsers are okay with it.
        '''
        if 'slimSelfClosing' in kwargs:
            slimSelfClosing = kwargs.pop('slimSelfClosing')
        else:
            slimSelfClosing = False
        AdvancedTag.__init__(self, *args, **kwargs)

        object.__setattr__(self, 'slimSelfClosing', slimSelfClosing)

Source File: spruce.py From Spruce with GNU General Public License v3.0

6 votes

def __init__(self, obj_type, results, heading, metadata):
        """Init our data structure.

        Args:
            obj_type: String object type name (as returned by
                device_type)
            results: A list of Result objects to include in the
                report.
            heading: String heading describing the report.
            metadata: Dictionary of other data you want to output.
                key: Heading name.
                val Another dictionary, with:
                    key: Subheading name.
                    val: String of data to print.
        """
        self.obj_type = obj_type
        self.results = results
        self.heading = heading
        self.metadata = metadata

Source File: HTML.py From Jtyoui with MIT License

6 votes

def __init__(self, start_tag, start_attr, end_tag, end_attr):
        """使用相当简单

        html = '<div class="declare" id="J-declare">声明：百科词条人人可编辑。<a class="declare-details"></a>'
        p = ParseHtml('div', ['class="declare"'], 'a', ['class="declare-details"'])
        p.feed(html)
        print(p.get_data())  # 声明：百科词条人人可编辑。

        :param start_tag: 开始标签，必须填写
        :param start_attr: 开始标签里面的属性，切记一定是列表[]类型。如果没有，传入空列表[]
        :param end_tag: 结束标签，必须填写
        :param end_attr: 结束标签里面的属性，切记一定是列表[]类型。如果没有，传入空列表[]
        """
        HTMLParser.__init__(self)
        self._data = ''
        self._flag = False
        self._start_tag = start_tag
        self._start_attr = self._split(start_attr)
        self._end_tag = end_tag
        self._end_attr = self._split(end_attr)

Source File: ehp.py From ehp with MIT License

6 votes

def __init__(self, data):
        """
        The data holds the characters.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        x = dom.fst('em')
        x.append(Data('\nbeta'))

        It outputs.

        <body ><em >alpha
        beta</em></body>
        """

        Root.__init__(self, DATA)
        self.data = data

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self):
        """
        Initializes outmost which is the struct which will 
        hold all data inside the file.
        """

        self.outmost = Root('')

        self.stack = deque()
        self.stack.append(self.outmost)

Source File: alerts.py From elastalert with Apache License 2.0

5 votes

def __init__(self, rule):
        self.rule = rule
        # pipeline object is created by ElastAlerter.send_alert()
        # and attached to each alerters used by a rule before calling alert()
        self.pipeline = None
        self.resolve_rule_references(self.rule)

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self):
        HTMLParser.__init__(self)
        self.struct = Tree()

Source File: gencon-hotel-check.py From gencon-hotel-check with GNU General Public License v3.0

5 votes

def __init__(self, resp):
		HTMLParser.__init__(self)
		self.json = None
		self.feed(resp.read().decode('utf8'))
		self.close()

Source File: html.py From selenium-python-helium with MIT License

5 votes

def __init__(self):
		HTMLParser.__init__(self)
		self.reset()
		self.fed = []

Source File: help.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def __init__(self, parent, filename):
        "Configure tags and feed file to parser."
        uwide = idleConf.GetOption('main', 'EditorWindow', 'width', type='int')
        uhigh = idleConf.GetOption('main', 'EditorWindow', 'height', type='int')
        uhigh = 3 * uhigh // 4  # lines average 4/3 of editor line height
        Text.__init__(self, parent, wrap='word', highlightthickness=0,
                      padx=5, borderwidth=0, width=uwide, height=uhigh)

        normalfont = self.findfont(['TkDefaultFont', 'arial', 'helvetica'])
        fixedfont = self.findfont(['TkFixedFont', 'monaco', 'courier'])
        self['font'] = (normalfont, 12)
        self.tag_configure('em', font=(normalfont, 12, 'italic'))
        self.tag_configure('h1', font=(normalfont, 20, 'bold'))
        self.tag_configure('h2', font=(normalfont, 18, 'bold'))
        self.tag_configure('h3', font=(normalfont, 15, 'bold'))
        self.tag_configure('pre', font=(fixedfont, 12), background='#f6f6ff')
        self.tag_configure('preblock', font=(fixedfont, 10), lmargin1=25,
                borderwidth=1, relief='solid', background='#eeffcc')
        self.tag_configure('l1', lmargin1=25, lmargin2=25)
        self.tag_configure('l2', lmargin1=50, lmargin2=50)
        self.tag_configure('l3', lmargin1=75, lmargin2=75)
        self.tag_configure('l4', lmargin1=100, lmargin2=100)

        self.parser = HelpParser(self)
        with open(filename, encoding='utf-8') as f:
            contents = f.read()
        self.parser.feed(contents)
        self['state'] = 'disabled'

Source File: formatting.py From CloudBot with GNU General Public License v3.0

5 votes

def __init__(self):
        HTMLParser.__init__(self)
        self.result = []

Source File: paraparser.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def __init__(self,v):
        self._value = v*0.01

Source File: paraparser.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def __init__(self,verbose=0, caseSensitive=0, ignoreUnknownTags=1):
        HTMLParser.__init__(self,
            **(dict(convert_charrefs=False) if sys.version_info>=(3,4) else {}))
        self.verbose = verbose
        #HTMLParser is case insenstive anyway, but the rml interface still needs this
        #all start/end_ methods should have a lower case version for HMTMParser
        self.caseSensitive = caseSensitive
        self.ignoreUnknownTags = ignoreUnknownTags

Source File: help.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def __init__(self, text):
        HTMLParser.__init__(self, convert_charrefs=True)
        self.text = text         # text widget we're rendering into
        self.tags = ''           # current block level text tags to apply
        self.chartags = ''       # current character level text tags
        self.show = False        # used so we exclude page navigation
        self.hdrlink = False     # used so we don't show header links
        self.level = 0           # indentation level
        self.pre = False         # displaying preformatted text
        self.hprefix = ''        # prefix such as '25.5' to strip from headings
        self.nested_dl = False   # if we're in a nested <dl>
        self.simplelist = False  # simple list (no double spacing)
        self.toc = []            # pair headers with text indexes for toc
        self.header = ''         # text within header tags for toc

Source File: alerts.py From elastalert with Apache License 2.0

5 votes

def __init__(self, rule, match):
        self.rule = rule
        self.match = match

Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0

5 votes

def __init__(self, filename=None, encoding='utf-8', indexIDs=True, indexNames=True, indexClassNames=True, indexTagNames=True):
        '''
            __init__ - Creates an Advanced HTML parser object, with specific indexing settings.

                For the various index* arguments, if True the index will be collected and use (if useIndex=True [default] on get* function)

                @param filename <str>         - Optional filename to parse. Otherwise use parseFile or parseStr methods.
                @param encoding <str> - Specifies the document encoding. Default utf-8
                @param indexIDs <bool>        - True to create an index for getElementByID method.  <default True>
                @param indexNames <bool>      - True to create an index for getElementsByName method  <default True>
                @param indexClassNames <bool> - True to create an index for getElementsByClassName method. <default True>
                @param indexTagNames <bool>   - True to create an index for tag names. <default True>

                For indexing other attributes, see the more generic addIndexOnAttribute

        '''
        self.indexFunctions = []
        self.otherAttributeIndexFunctions = {}
        self._otherAttributeIndexes = {}
        self.indexIDs = indexIDs
        self.indexNames = indexNames
        self.indexClassNames = indexClassNames
        self.indexTagNames = indexTagNames

        self._resetIndexInternal()

        AdvancedHTMLParser.__init__(self, filename, encoding)

        if filename is not None:
            self.parseFile(filename)

###########################################
#####        INTERNAL               #######
###########################################

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self, data):
        Root.__init__(self, COMMENT)
        self.data = data

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self, data):
        Root.__init__(self, PI)
        self.data = data

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self, data):
        Root.__init__(self, AMP)
        self.data = data

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self, data):
        Root.__init__(self, META)
        self.data = data

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self, name, attr={}):
        """
        See help(Tag).
        """
        Root.__init__(self, name, attr)

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self, name, attr={}):
        """
        The parameter name is the xml/html tag's name.

        Example:

        d = {'style': 'background:blue;'}
        x = Tag('p', d)
        """
        Root.__init__(self, name, attr)

Source File: ehp.py From ehp with MIT License

5 votes

def __init__(self, name=None, attr={}):
        """
        """

        self.name = name
        self.attr = Attribute(attr)

        list.__init__(self)

Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0

5 votes

def __init__(self, encoding='utf-8', slimSelfClosing=False):
        '''
            __init__ - Create an AdvancedHTMLSlimTagMiniFormatter

                @see AdvancedHTMLMiniFormatter

                @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags,

                    e.x. <br /> becomes <br/>
        '''

        AdvancedHTMLMiniFormatter.__init__(self, encoding=encoding)

        self.slimSelfClosing = slimSelfClosing

Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0

5 votes

def __init__(self, indent='    ', encoding='utf-8', slimSelfClosing=False):
        '''
            __init__ - Construct an AdvancedHTMLSlimTagFormatter

                @see AdvancedHTMLFormatter

                @param slimSelfClosing <bool> Default False - If True, will use slim self-closing tags,

                    e.x. <br /> becomes <br/>
        '''

        AdvancedHTMLFormatter.__init__(self, indent=indent, encoding=encoding)

        self.slimSelfClosing = slimSelfClosing

Source File: Formatter.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0

5 votes

def __init__(self, indent='  ', encoding='utf-8'):
        '''
            Create a pretty formatter.

            @param indent <str/int>, Default '    ' [4 spaces] - Either a space/tab/newline that represents one level of indent, or an integer to use that number of spaces

            @param encoding <str/None>, Default 'utf-8', - Use this encoding for the document. None to not mess with encoding
        '''
        HTMLParser.__init__(self)

        # Do not automatically convert charrefs in python3
        self.convert_charrefs = False

        self.parsedData = []
        self.reset = self._reset
        self.decl = None
        self.currentIndentLevel = 0
        self.indent = indent
        self.encoding = encoding

        self.inPreformatted = 0

        self.root = None

        self._inTag = []
        self.doctype = None

Source File: Parser.py From AdvancedHTMLParser with GNU Lesser General Public License v3.0

5 votes

def _reset(self):
        '''
            _reset - reset this object. Assigned to .reset after __init__ call.
        '''
        AdvancedHTMLParser.reset(self)

        self._resetIndexInternal()

#vim: set ts=4 sw=4 expandtab

Source File: epr.py From epr with MIT License

5 votes

def __init__(self):
        HTMLParser.__init__(self)
        self.text = [""]
        self.imgs = []
        self.ishead = False
        self.isinde = False
        self.isbull = False
        self.ispref = False
        self.ishidden = False
        self.idhead = set()
        self.idinde = set()
        self.idbull = set()
        self.idpref = set()

Python html.parser.HTMLParser.__init__() Examples

Python html.parser.HTMLParser.init() Examples