Python xml.sax.make_parser() Examples

The following are 30 code examples of xml.sax.make_parser(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module xml.sax , or try the search function .
Example #1
Source File: test_sax.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_make_parser2(self):
        # Creating parsers several times in a row should succeed.
        # Testing this because there have been failures of this kind
        # before.
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()


# ===========================================================================
#
#   saxutils tests
#
# =========================================================================== 
Example #2
Source File: saxparser_xml_stanfordtokenized_boxergraph_traininggraph.py    From Sentence-Simplification-ACL14 with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def parse_to_iterate_probabilitytable(self):
        handler = SAX_Handler("iter", self.em_io_handler)
        parser = make_parser()
        parser.setContentHandler(handler)
        
        for count in range(self.NUM_TRAINING_ITERATION):
            print "Starting iteration: "+str(count+1)+" ..."

            print "Resetting all counts to ZERO ..."
            self.em_io_handler.reset_count_table()

            print "Start parsing "+self.training_xmlfile+" ..."
            parser.parse(self.training_xmlfile)  
            print "Ending iteration: "+str(count+1)+" ..."
        
            print "Updating probability table ..."
            self.em_io_handler.update_probability_table() 
Example #3
Source File: test_sax.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_make_parser2(self):
        # Creating parsers several times in a row should succeed.
        # Testing this because there have been failures of this kind
        # before.
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()


# ===========================================================================
#
#   saxutils tests
#
# =========================================================================== 
Example #4
Source File: test_sax.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_5027_1(self):
        # The xml prefix (as in xml:lang below) is reserved and bound by
        # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
        # a bug whereby a KeyError is raised because this namespace is missing
        # from a dictionary.
        #
        # This test demonstrates the bug by parsing a document.
        test_xml = StringIO(
            '<?xml version="1.0"?>'
            '<a:g1 xmlns:a="http://example.com/ns">'
             '<a:g2 xml:lang="en">Hello</a:g2>'
            '</a:g1>')

        parser = make_parser()
        parser.setFeature(feature_namespaces, True)
        result = self.ioclass()
        gen = XMLGenerator(result)
        parser.setContentHandler(gen)
        parser.parse(test_xml)

        self.assertEqual(result.getvalue(),
                         start + (
                         '<a:g1 xmlns:a="http://example.com/ns">'
                          '<a:g2 xml:lang="en">Hello</a:g2>'
                         '</a:g1>')) 
Example #5
Source File: stylesheet.py    From gprime with GNU General Public License v2.0 6 votes vote down vote up
def parse(self):
        """
        Loads the StyleSheets from the associated file, if it exists.
        """
        try:
            if os.path.isfile(self.__file):
                parser = make_parser()
                parser.setContentHandler(SheetParser(self))
                with open(self.__file) as the_file:
                    parser.parse(the_file)
        except (IOError, OSError, SAXParseException):
            pass

#------------------------------------------------------------------------
#
# StyleSheet
#
#------------------------------------------------------------------------ 
Example #6
Source File: _book.py    From gprime with GNU General Public License v2.0 6 votes vote down vote up
def parse(self):
        """
        Loads the BookList from the associated file, if it exists.
        """
        try:
            parser = make_parser()
            parser.setContentHandler(BookParser(self, self.dbase))
            with open(self.file) as the_file:
                parser.parse(the_file)
        except (IOError, OSError, ValueError, SAXParseException, KeyError,
                AttributeError):
            LOG.debug("Failed to parse book list", exc_info=True)


#-------------------------------------------------------------------------
#
# BookParser
#
#------------------------------------------------------------------------- 
Example #7
Source File: _options.py    From gprime with GNU General Public License v2.0 6 votes vote down vote up
def parse(self):
        """
        Loads the :class:`OptionList` from the associated file, if it exists.
        """
        try:
            if os.path.isfile(self.filename):
                parser = make_parser()
                parser.setContentHandler(OptionParser(self))
                with open(self.filename, encoding="utf-8") as the_file:
                    parser.parse(the_file)
        except (IOError, OSError, SAXParseException):
            pass

#-------------------------------------------------------------------------
#
# OptionParser
#
#------------------------------------------------------------------------- 
Example #8
Source File: test_sax.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_make_parser2(self):
        # Creating parsers several times in a row should succeed.
        # Testing this because there have been failures of this kind
        # before.
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()


# ===========================================================================
#
#   saxutils tests
#
# =========================================================================== 
Example #9
Source File: qt5.py    From 802.11ah-ns3 with GNU General Public License v2.0 6 votes vote down vote up
def scan(self):
		if not has_xml:
			Logs.error('no xml support was found, the rcc dependencies will be incomplete!')
			return([],[])
		parser=make_parser()
		curHandler=XMLHandler()
		parser.setContentHandler(curHandler)
		fi=open(self.inputs[0].abspath(),'r')
		try:
			parser.parse(fi)
		finally:
			fi.close()
		nodes=[]
		names=[]
		root=self.inputs[0].parent
		for x in curHandler.files:
			nd=root.find_resource(x)
			if nd:nodes.append(nd)
			else:names.append(x)
		return(nodes,names) 
Example #10
Source File: qt4.py    From 802.11ah-ns3 with GNU General Public License v2.0 6 votes vote down vote up
def scan(self):
		if not has_xml:
			Logs.error('no xml support was found, the rcc dependencies will be incomplete!')
			return([],[])
		parser=make_parser()
		curHandler=XMLHandler()
		parser.setContentHandler(curHandler)
		fi=open(self.inputs[0].abspath(),'r')
		try:
			parser.parse(fi)
		finally:
			fi.close()
		nodes=[]
		names=[]
		root=self.inputs[0].parent
		for x in curHandler.files:
			nd=root.find_resource(x)
			if nd:nodes.append(nd)
			else:names.append(x)
		return(nodes,names) 
Example #11
Source File: qt5.py    From royal-chaos with MIT License 6 votes vote down vote up
def scan(self):
		if not has_xml:
			Logs.error('no xml support was found, the rcc dependencies will be incomplete!')
			return([],[])
		parser=make_parser()
		curHandler=XMLHandler()
		parser.setContentHandler(curHandler)
		fi=open(self.inputs[0].abspath(),'r')
		try:
			parser.parse(fi)
		finally:
			fi.close()
		nodes=[]
		names=[]
		root=self.inputs[0].parent
		for x in curHandler.files:
			nd=root.find_resource(x)
			if nd:nodes.append(nd)
			else:names.append(x)
		return(nodes,names) 
Example #12
Source File: qt4.py    From royal-chaos with MIT License 6 votes vote down vote up
def scan(self):
		if not has_xml:
			Logs.error('no xml support was found, the rcc dependencies will be incomplete!')
			return([],[])
		parser=make_parser()
		curHandler=XMLHandler()
		parser.setContentHandler(curHandler)
		fi=open(self.inputs[0].abspath(),'r')
		try:
			parser.parse(fi)
		finally:
			fi.close()
		nodes=[]
		names=[]
		root=self.inputs[0].parent
		for x in curHandler.files:
			nd=root.find_resource(x)
			if nd:nodes.append(nd)
			else:names.append(x)
		return(nodes,names) 
Example #13
Source File: test_sax.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_make_parser2(self):
        # Creating parsers several times in a row should succeed.
        # Testing this because there have been failures of this kind
        # before.
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()


# ===========================================================================
#
#   saxutils tests
#
# =========================================================================== 
Example #14
Source File: test_sax.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_5027_1(self):
        # The xml prefix (as in xml:lang below) is reserved and bound by
        # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
        # a bug whereby a KeyError is raised because this namespace is missing
        # from a dictionary.
        #
        # This test demonstrates the bug by parsing a document.
        test_xml = StringIO(
            '<?xml version="1.0"?>'
            '<a:g1 xmlns:a="http://example.com/ns">'
             '<a:g2 xml:lang="en">Hello</a:g2>'
            '</a:g1>')

        parser = make_parser()
        parser.setFeature(feature_namespaces, True)
        result = self.ioclass()
        gen = XMLGenerator(result)
        parser.setContentHandler(gen)
        parser.parse(test_xml)

        self.assertEqual(result.getvalue(),
                         self.xml(
                         '<a:g1 xmlns:a="http://example.com/ns">'
                          '<a:g2 xml:lang="en">Hello</a:g2>'
                         '</a:g1>')) 
Example #15
Source File: xml_sax.py    From flake8-bandit with MIT License 6 votes vote down vote up
def main():
    xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>"
    # bad
    xml.sax.parseString(xmlString, ExampleContentHandler())
    xml.sax.parse('notaxmlfilethatexists.xml', ExampleContentHandler())
    sax.parseString(xmlString, ExampleContentHandler())
    sax.parse('notaxmlfilethatexists.xml', ExampleContentHandler)

    # good
    defusedxml.sax.parseString(xmlString, ExampleContentHandler())

    # bad
    xml.sax.make_parser()
    sax.make_parser()
    print('nothing')
    # good
    defusedxml.sax.make_parser() 
Example #16
Source File: template.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def _flatsaxParse(fl):
    """
    Perform a SAX parse of an XML document with the _ToStan class.

    @param fl: The XML document to be parsed.
    @type fl: A file object or filename.

    @return: a C{list} of Stan objects.
    """
    parser = make_parser()
    parser.setFeature(handler.feature_validation, 0)
    parser.setFeature(handler.feature_namespaces, 1)
    parser.setFeature(handler.feature_external_ges, 0)
    parser.setFeature(handler.feature_external_pes, 0)

    s = _ToStan(getattr(fl, "name", None))
    parser.setContentHandler(s)
    parser.setEntityResolver(s)
    parser.setProperty(handler.property_lexical_handler, s)

    parser.parse(fl)

    return s.document 
Example #17
Source File: test_sax.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_5027_1(self):
        # The xml prefix (as in xml:lang below) is reserved and bound by
        # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
        # a bug whereby a KeyError is raised because this namespace is missing
        # from a dictionary.
        #
        # This test demonstrates the bug by parsing a document.
        test_xml = StringIO(
            '<?xml version="1.0"?>'
            '<a:g1 xmlns:a="http://example.com/ns">'
             '<a:g2 xml:lang="en">Hello</a:g2>'
            '</a:g1>')

        parser = make_parser()
        parser.setFeature(feature_namespaces, True)
        result = self.ioclass()
        gen = XMLGenerator(result)
        parser.setContentHandler(gen)
        parser.parse(test_xml)

        self.assertEqual(result.getvalue(),
                         self.xml(
                         '<a:g1 xmlns:a="http://example.com/ns">'
                          '<a:g2 xml:lang="en">Hello</a:g2>'
                         '</a:g1>')) 
Example #18
Source File: reader.py    From delft with Apache License 2.0 6 votes vote down vote up
def load_data_and_labels_xml_string(stringXml):
    """
    Load data and label from a string 
    the format is as follow:
    <p> 
        bla bla you are a <rs type="insult">CENSURED</rs>, 
        and I will <rs type="threat">find and kill</rs> you bla bla
    </p>
    only the insulting expression is labelled, and similarly only the threat 
    "action" is tagged

    Returns:
        tuple(numpy array, numpy array): data and labels

    """
    # as we have XML mixed content, we need a real XML parser...
    parser = make_parser()
    handler = TEIContentHandler()
    parser.setContentHandler(handler)
    parser.parseString(stringXml)
    tokens = handler.getSents()
    labels = handler.getAllLabels()
    return tokens, labels 
Example #19
Source File: reader.py    From delft with Apache License 2.0 6 votes vote down vote up
def load_data_and_labels_xml_file(filepathXml):
    """
    Load data and label from an XML file
    the format is as follow:
    <p> 
        bla bla you are a <rs type="insult">CENSURED</rs>, 
        and I will <rs type="threat">find and kill</rs> you bla bla
    </p>
    only the insulting expression is labelled, and similarly only the threat 
    "action" is tagged

    Returns:
        tuple(numpy array, numpy array): data and labels

    """
    # as we have XML mixed content, we need a real XML parser...
    parser = make_parser()
    handler = TEIContentHandler()
    parser.setContentHandler(handler)
    parser.parse(filepathXml)
    tokens = handler.getSents()
    labels = handler.getAllLabels()
    return tokens, labels 
Example #20
Source File: reader.py    From delft with Apache License 2.0 6 votes vote down vote up
def load_data_and_labels_lemonde(filepathXml):
    """
    Load data and label from Le Monde XML corpus file
    the format is ENAMEX-style, as follow:
    <sentence id="E14">Les ventes de micro-ordinateurs en <ENAMEX type="Location" sub_type="Country" 
        eid="2000000003017382" name="Republic of France">France</ENAMEX> se sont ralenties en 1991. </sentence>

    Returns:
        tuple(numpy array, numpy array): data and labels

    """
    # as we have XML mixed content, we need a real XML parser...
    parser = make_parser()
    handler = ENAMEXContentHandler()
    parser.setContentHandler(handler)
    parser.parse(filepathXml)
    tokens = handler.getSents()
    labels = handler.getAllLabels()

    return tokens, labels 
Example #21
Source File: test_sax.py    From gcblue with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_make_parser2(self):
        # Creating parsers several times in a row should succeed.
        # Testing this because there have been failures of this kind
        # before.
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()


# ===========================================================================
#
#   saxutils tests
#
# =========================================================================== 
Example #22
Source File: test_sax.py    From gcblue with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_5027_1(self):
        # The xml prefix (as in xml:lang below) is reserved and bound by
        # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
        # a bug whereby a KeyError is raised because this namespace is missing
        # from a dictionary.
        #
        # This test demonstrates the bug by parsing a document.
        test_xml = StringIO(
            '<?xml version="1.0"?>'
            '<a:g1 xmlns:a="http://example.com/ns">'
             '<a:g2 xml:lang="en">Hello</a:g2>'
            '</a:g1>')

        parser = make_parser()
        parser.setFeature(feature_namespaces, True)
        result = self.ioclass()
        gen = XMLGenerator(result)
        parser.setContentHandler(gen)
        parser.parse(test_xml)

        self.assertEqual(result.getvalue(),
                         start + (
                         '<a:g1 xmlns:a="http://example.com/ns">'
                          '<a:g2 xml:lang="en">Hello</a:g2>'
                         '</a:g1>')) 
Example #23
Source File: test_sax.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 6 votes vote down vote up
def test_make_parser2(self):
        # Creating parsers several times in a row should succeed.
        # Testing this because there have been failures of this kind
        # before.
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()


# ===========================================================================
#
#   saxutils tests
#
# =========================================================================== 
Example #24
Source File: test_sax.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 6 votes vote down vote up
def test_5027_1(self):
        # The xml prefix (as in xml:lang below) is reserved and bound by
        # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
        # a bug whereby a KeyError is raised because this namespace is missing
        # from a dictionary.
        #
        # This test demonstrates the bug by parsing a document.
        test_xml = StringIO(
            '<?xml version="1.0"?>'
            '<a:g1 xmlns:a="http://example.com/ns">'
             '<a:g2 xml:lang="en">Hello</a:g2>'
            '</a:g1>')

        parser = make_parser()
        parser.setFeature(feature_namespaces, True)
        result = self.ioclass()
        gen = XMLGenerator(result)
        parser.setContentHandler(gen)
        parser.parse(test_xml)

        self.assertEqual(result.getvalue(),
                         self.xml(
                         '<a:g1 xmlns:a="http://example.com/ns">'
                          '<a:g2 xml:lang="en">Hello</a:g2>'
                         '</a:g1>')) 
Example #25
Source File: test_sax.py    From medicare-demo with Apache License 2.0 6 votes vote down vote up
def test_expat_dtdhandler():
    parser = make_parser()
    handler = TestDTDHandler()
    parser.setDTDHandler(handler)

    parser.parse(StringIO('''<!DOCTYPE doc [
  <!ENTITY img SYSTEM "expat.gif" NDATA GIF>
  <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">
]>
<doc></doc>'''))
    if len(handler._entities) != 1 or len(handler._entities[0]) != 4:
        return 0
    name, pubId, sysId, ndata = handler._entities[0]
    if name != 'img' or not pubId is None or not sysId.endswith('expat.gif') or ndata != 'GIF':
        return 0
    return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]

# ===== EntityResolver support 
Example #26
Source File: test_sax.py    From medicare-demo with Apache License 2.0 6 votes vote down vote up
def test_expat_nsattrs_wattr():
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    gather = AttrGatherer()
    parser.setContentHandler(gather)

    a_name = "id" ; a_val = "val"
    parser.parse(StringIO("<doc xmlns:ns='%s' ns:%s='%s'/>" % (ns_uri, a_name, a_val) ))

    attrs = gather._attrs

    return attrs.getLength() == 1 and \
           attrs.getNames() == [(ns_uri, a_name)] and \
           attrs.getQNames() == ["ns:%s" % a_name] and \
           len(attrs) == 1 and \
           attrs.has_key((ns_uri, a_name)) and \
           attrs.keys() == [(ns_uri, a_name)] and \
           attrs.get((ns_uri, a_name)) == a_val and \
           attrs.get((ns_uri, a_name), 25) == a_val and \
           attrs.items() == [((ns_uri, a_name), a_val)] and \
           attrs.values() == [a_val] and \
           attrs.getValue((ns_uri, a_name)) == a_val and \
           attrs[(ns_uri, a_name)] == a_val 
Example #27
Source File: test_sax.py    From medicare-demo with Apache License 2.0 6 votes vote down vote up
def test_expat_nsattrs_no_namespace():
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    gather = AttrGatherer()
    parser.setContentHandler(gather)

    a_name = "id" ; a_val = "val"
    parser.parse(StringIO("<doc %s='%s'/>" % (a_name, a_val) ))

    attrs = gather._attrs

    return attrs.getLength() == 1 and \
           attrs.getNames() == [(None, a_name)] and \
           attrs.getQNames() == [a_name] and \
           len(attrs) == 1 and \
           attrs.has_key((None, a_name)) and \
           attrs.keys() == [(None, a_name)] and \
           attrs.get((None, a_name)) == a_val and \
           attrs.get((None, a_name), 25) == a_val and \
           attrs.items() == [((None, a_name), a_val)] and \
           attrs.values() == [a_val] and \
           attrs.getValue((None, a_name)) == a_val and \
           attrs[(None, a_name)] == a_val

# ===== InputSource support 
Example #28
Source File: test_sax.py    From medicare-demo with Apache License 2.0 6 votes vote down vote up
def test_expat_locator_withinfo():
    result = StringIO()
    xmlgen = LocatorTest(result)
    parser = make_parser()
    parser.setContentHandler(xmlgen)
    testfile = findfile("test.xml")
    parser.parse(testfile)
    if is_jython:
        # In Jython, the system id is a URL with forward slashes, and
        # under Windows findfile returns a path with backslashes, so
        # replace the backslashes with forward
        testfile = testfile.replace('\\', '/')

    # urllib.quote isn't the exact encoder (e.g. ':' isn't escaped)
    expected = urllib.quote(testfile).replace('%3A', ':')
    return xmlgen.location.getSystemId().endswith(expected) and \
           xmlgen.location.getPublicId() is None


# ===========================================================================
#
#   error reporting
#
# =========================================================================== 
Example #29
Source File: dump.py    From evernote-dump with GNU General Public License v3.0 6 votes vote down vote up
def run_parse(settings: Settings, print_fun=None):
    """
    Start the parsing of an Evernote enex file.

    :param settings: Settings is a custom class to pass application wide settings.
    :param print_fun: func Pass in a callback function that will be passed a string for printing
                            and disable printing to console.
    """

    # Setup xml parser
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 0)

    for file in settings.files:
        base = os.path.basename(file)
        current_file = base.replace(".enex", "")
        note_handler = NoteParser(current_file, settings, print_fun)
        parser.setContentHandler(note_handler)
        parser.parse(file) 
Example #30
Source File: test_sax.py    From CTFCrackTools-V2 with GNU General Public License v3.0 6 votes vote down vote up
def test_make_parser2(self):
        # Creating parsers several times in a row should succeed.
        # Testing this because there have been failures of this kind
        # before.
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()
        from xml.sax import make_parser
        p = make_parser()


# ===========================================================================
#
#   saxutils tests
#
# ===========================================================================