Python lxml.etree.XMLSyntaxError() Examples
The following are 30
code examples of lxml.etree.XMLSyntaxError().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lxml.etree
, or try the search function
.
Example #1
Source File: doctestcompare.py From learn_python3_spider with MIT License | 6 votes |
def check_output(self, want, got, optionflags): alt_self = getattr(self, '_temp_override_self', None) if alt_self is not None: super_method = self._temp_call_super_check_output self = alt_self else: super_method = OutputChecker.check_output parser = self.get_parser(want, got, optionflags) if not parser: return super_method( self, want, got, optionflags) try: want_doc = parser(want) except etree.XMLSyntaxError: return False try: got_doc = parser(got) except etree.XMLSyntaxError: return False return self.compare_docs(want_doc, got_doc)
Example #2
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_capi_file_output_jpg(self): """ Test that tesseract CAPI calls create hocr output for jpgs. """ try: t = ctypes.cdll.LoadLibrary('libtesseract.so.3') except: raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='capi') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_jpg.xml'), languages=['eng'], extended=False) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #3
Source File: base.py From daf-recipes with GNU General Public License v3.0 | 6 votes |
def _validate_document(self, document_string, harvest_object, validator=None): ''' Validates an XML document with the default, or if present, the provided validators. It will create a HarvestObjectError for each validation error found, so they can be shown properly on the frontend. Returns a tuple, with a boolean showing whether the validation passed or not, the profile used and a list of errors (tuples with error message and error lines if present). ''' if not validator: validator = self._get_validator() document_string = re.sub('<\?xml(.*)\?>', '', document_string) try: xml = etree.fromstring(document_string) except etree.XMLSyntaxError, e: self._save_object_error('Could not parse XML file: {0}'.format(str(e)), harvest_object, 'Import') return False, None, []
Example #4
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_direct_file_output_png(self): """ Test that direct tesseract calls create hocr output for pngs. """ if not spawn.find_executable('tesseract'): raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='direct') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_png.xml'), languages=['eng'], extended=False) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #5
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_direct_file_output_tiff(self): """ Test that direct tesseract calls create hocr output for tiffs. """ if not spawn.find_executable('tesseract'): raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='direct') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_tiff.xml'), languages=['eng'], extended=False) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #6
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_direct_file_output_jpg(self): """ Test that direct tesseract calls create hocr output for jpgs. """ if not spawn.find_executable('tesseract'): raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='direct') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_jpg.xml'), languages=['eng'], extended=False) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #7
Source File: ironclaw_class.py From warriorframework with Apache License 2.0 | 6 votes |
def xml_to_xsd_validation(file_xml, file_xsd): """ Verify that the XML compliance with XSD Arguments: 1. file_xml: Input xml file 2. file_xsd: xsd file which needs to be validated against xml Return: No return value """ try: print_info("Validating:{0}".format(file_xml)) print_info("xsd_file:{0}".format(file_xsd)) xml_doc = parse(file_xml) xsd_doc = parse(file_xsd) xmlschema = XMLSchema(xsd_doc) xmlschema.assert_(xml_doc) return True except XMLSyntaxError as err: print_error("PARSING ERROR:{0}".format(err)) return False except AssertionError, err: print_error("Incorrect XML schema: {0}".format(err)) return False
Example #8
Source File: pyreact.py From pypath with GNU General Public License v3.0 | 6 votes |
def init_etree(self): """ Creates the ``lxml.etree.iterparse`` object. This method should not be called directly, ``BioPaxReader.process()`` calls it. """ try: self.bp = etree.iterparse(self._biopax, events=('start', 'end')) _, self.root = next(self.bp) except etree.XMLSyntaxError: self.bp = None self.used_elements = []
Example #9
Source File: __init__.py From python-gvm with GNU General Public License v3.0 | 6 votes |
def import_config(self, config: str) -> Any: """Import a scan config from XML Arguments: config: Scan Config XML as string to import. This XML must contain a :code:`<get_configs_response>` root element. Returns: The response. See :py:meth:`send_command` for details. """ if not config: raise RequiredArgument( function=self.import_config.__name__, argument='config' ) cmd = XmlCommand("create_config") try: cmd.append_xml_str(config) except etree.XMLSyntaxError: raise InvalidArgument( function=self.import_config.__name__, argument='config' ) return self._send_xml_command(cmd)
Example #10
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_capi_file_output_png(self): """ Test that tesseract CAPI calls create hocr output for pngs. """ try: t = ctypes.cdll.LoadLibrary('libtesseract.so.3') except: raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='capi') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_png.xml'), languages=['eng'], extended=False) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #11
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_capi_extended(self): """ Test that the CAPI extended output contains character cuts in each ocr_line and character confidences in each ocrx_word. """ try: ctypes.cdll.LoadLibrary('libtesseract.so.3') except: raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='capi') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_tiff.xml'), languages=['eng'], extended=True) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: h = etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!') self.assertIsNotNone(h.findall(".//line"), msg='Tesseract did not write lines.') self.assertIsNotNone(h.findall(".//seg"), msg='Tesseract did not write segments.') self.assertIsNotNone(h.findall(".//g"), msg='Tesseract did not write graphemes.')
Example #12
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_direct_multiple(self): """ Test that direct tesseract calls create hocr output for multiple languages. """ if not spawn.find_executable('tesseract'): raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='direct') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_tiff.xml'), languages=['grc', 'eng'], extended=False) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #13
Source File: xml.py From kraken with Apache License 2.0 | 6 votes |
def parse_xml(filename): """ Parses either a PageXML or ALTO file with autodetermination of the file format. Args: filename (str): path to an XML file. Returns: A dict {'image': impath, lines: [{'boundary': [[x0, y0], ...], 'baseline': [[x0, y0], ...]}, {...], 'text': 'apdjfqpf', 'script': 'script_type'}, regions: {'region_type_0': [[[x0, y0], ...], ...], ...}} """ with open(filename, 'rb') as fp: try: doc = etree.parse(fp) except etree.XMLSyntaxError as e: raise KrakenInputException(f'Parsing {filename} failed: {e}') if doc.getroot().tag.endswith('alto'): return parse_alto(filename) elif doc.getroot().tag.endswith('PcGts'): return parse_page(filename) else: raise KrakenInputException(f'Unknown XML format in {filename}')
Example #14
Source File: main.py From parserator with MIT License | 6 votes |
def __call__(self, parser, namespace, string, option_string): try: with open(string, 'r') as f: tree = etree.parse(f) xml = tree.getroot() except (OSError, IOError): xml = None except etree.XMLSyntaxError as e: if 'Document is empty' not in str(e): raise argparse.ArgumentError(self, "%s does not seem to be a valid xml file" % string) xml = None setattr(namespace, self.dest, string) setattr(namespace, 'xml', xml)
Example #15
Source File: files.py From janeway with GNU Affero General Public License v3.0 | 6 votes |
def transform_with_xsl(xml_path, xsl_path, recover=False): try: xml_dom = etree.parse(xml_path) except etree.XMLSyntaxError as e: if recover: logger.error(e) parser = etree.XMLParser(recover=True) xml_dom = etree.parse(xml_path, parser=parser) else: raise xsl_transform = etree.XSLT(etree.parse(xsl_path)) try: transformed_dom = xsl_transform(xml_dom) except Exception as err: logger.error(err) for xsl_error in xsl_transform.error_log: logger.error(xsl_error) if not recover: raise return transformed_dom
Example #16
Source File: nexpose.py From nexpose-client-python with BSD 3-Clause "New" or "Revised" License | 6 votes |
def Open(self): """ Opens a session to the nexpose appliance by logging in. This function with raise an exception on error or if the session is already open. """ if self._session_id: raise SessionIsNotClosedException("Please close the session first!") try: response = self._Execute_APIv1d1(self._login_request) except NexposeConnectionException as ex: if isinstance(ex.inner_exception, etree.XMLSyntaxError): raise NexposeException("Unexpected error! Is the Nexpose appliance activated?") raise ex if response.tag == "LoginResponse": if response.attrib["success"] == "1": self._session_id = response.attrib["session-id"] if not self._session_id: raise NexposeFailureException("Login failure!")
Example #17
Source File: doctestcompare.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def check_output(self, want, got, optionflags): alt_self = getattr(self, '_temp_override_self', None) if alt_self is not None: super_method = self._temp_call_super_check_output self = alt_self else: super_method = OutputChecker.check_output parser = self.get_parser(want, got, optionflags) if not parser: return super_method( self, want, got, optionflags) try: want_doc = parser(want) except etree.XMLSyntaxError: return False try: got_doc = parser(got) except etree.XMLSyntaxError: return False return self.compare_docs(want_doc, got_doc)
Example #18
Source File: doctestcompare.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def check_output(self, want, got, optionflags): alt_self = getattr(self, '_temp_override_self', None) if alt_self is not None: super_method = self._temp_call_super_check_output self = alt_self else: super_method = OutputChecker.check_output parser = self.get_parser(want, got, optionflags) if not parser: return super_method( self, want, got, optionflags) try: want_doc = parser(want) except etree.XMLSyntaxError: return False try: got_doc = parser(got) except etree.XMLSyntaxError: return False return self.compare_docs(want_doc, got_doc)
Example #19
Source File: doctestcompare.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def check_output(self, want, got, optionflags): alt_self = getattr(self, '_temp_override_self', None) if alt_self is not None: super_method = self._temp_call_super_check_output self = alt_self else: super_method = OutputChecker.check_output parser = self.get_parser(want, got, optionflags) if not parser: return super_method( self, want, got, optionflags) try: want_doc = parser(want) except etree.XMLSyntaxError: return False try: got_doc = parser(got) except etree.XMLSyntaxError: return False return self.compare_docs(want_doc, got_doc)
Example #20
Source File: doctestcompare.py From stopstalk-deployment with MIT License | 6 votes |
def check_output(self, want, got, optionflags): alt_self = getattr(self, '_temp_override_self', None) if alt_self is not None: super_method = self._temp_call_super_check_output self = alt_self else: super_method = OutputChecker.check_output parser = self.get_parser(want, got, optionflags) if not parser: return super_method( self, want, got, optionflags) try: want_doc = parser(want) except etree.XMLSyntaxError: return False try: got_doc = parser(got) except etree.XMLSyntaxError: return False return self.compare_docs(want_doc, got_doc)
Example #21
Source File: doctestcompare.py From aws-lambda-lxml with GNU General Public License v3.0 | 6 votes |
def check_output(self, want, got, optionflags): alt_self = getattr(self, '_temp_override_self', None) if alt_self is not None: super_method = self._temp_call_super_check_output self = alt_self else: super_method = OutputChecker.check_output parser = self.get_parser(want, got, optionflags) if not parser: return super_method( self, want, got, optionflags) try: want_doc = parser(want) except etree.XMLSyntaxError: return False try: got_doc = parser(got) except etree.XMLSyntaxError: return False return self.compare_docs(want_doc, got_doc)
Example #22
Source File: get_ui.py From adbui with MIT License | 6 votes |
def get_uis_by_xpath(self, xpath, is_update=True): """ 通过xpath查找节点 :param xpath: :param is_update: :return: """ if is_update: xml_str = None for _ in range(5): try: xml_str = self.adb_ext.dump() # 获取xml文件 self.__init_xml(xml_str) break except etree.XMLSyntaxError: logging.error('etree.XMLSyntaxError:\n') if xml_str: logging.error('xml str:{}'.format(xml_str)) xpath = xpath.decode('utf-8') if sys.version_info[0] < 3 else xpath elements = self.xml.xpath(xpath) uis = [] for element in elements: uis.append(self.get_ui_by_element(element)) return uis
Example #23
Source File: test_backends.py From xblock-video with GNU General Public License v3.0 | 6 votes |
def test_download_default_transcript(self, backend, download_transcript_mock, params): """ Check default transcript is downloaded from a video platform API. """ player = self.player[backend] for index, event in enumerate(download_transcript_mock.get_outcomes()): mock = download_transcript_mock(event=event) self.mocked_objects = mock.apply_mock(self.mocked_objects) try: res = player(self.xblock).download_default_transcript(**params[index]) message = '' expected_default_transcript = mock.expected_value[0] self.assertIsInstance(res, unicode) self.assertEqual(res, expected_default_transcript) except VideoXBlockException as ex: message = ex.message except etree.XMLSyntaxError: message = 'XMLSyntaxError exception' expected_message = mock.expected_value[-1] self.assertIn(expected_message, message) self.restore_mocked()
Example #24
Source File: test_tesseract.py From nidaba with GNU General Public License v2.0 | 6 votes |
def test_capi_multiple(self): """ Test that tesseract CAPI calls create hocr output for multiple languages. """ try: t = ctypes.cdll.LoadLibrary('libtesseract.so.3') except: raise unittest.SkipTest self.tesseract.setup(tessdata=tessdata, implementation='capi') ocr = self.tesseract.ocr_tesseract.run(('test', 'segmentation_tiff.xml'), languages=['grc', 'eng'], extended=False) outpath = os.path.join(self.storage_path, *ocr) self.assertTrue(os.path.isfile(outpath), msg='Tesseract did not ' 'output a file!') try: doc = etree.parse(open(os.path.join(self.storage_path, *ocr))) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #25
Source File: process_forest.py From process-forest with Apache License 2.0 | 5 votes |
def get_entries(evtx): """ @rtype: generator of Entry """ for xml, record in evtx_file_xml_view(evtx.get_file_header()): try: yield Entry(xml, record) except etree.XMLSyntaxError as e: continue
Example #26
Source File: doctestcompare.py From stopstalk-deployment with MIT License | 5 votes |
def output_difference(self, example, got, optionflags): want = example.want parser = self.get_parser(want, got, optionflags) errors = [] if parser is not None: try: want_doc = parser(want) except etree.XMLSyntaxError: e = sys.exc_info()[1] errors.append('In example: %s' % e) try: got_doc = parser(got) except etree.XMLSyntaxError: e = sys.exc_info()[1] errors.append('In actual output: %s' % e) if parser is None or errors: value = OutputChecker.output_difference( self, example, got, optionflags) if errors: errors.append(value) return '\n'.join(errors) else: return value html = parser is html_fromstring diff_parts = [] diff_parts.append('Expected:') diff_parts.append(self.format_doc(want_doc, html, 2)) diff_parts.append('Got:') diff_parts.append(self.format_doc(got_doc, html, 2)) diff_parts.append('Diff:') diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2)) return '\n'.join(diff_parts)
Example #27
Source File: test_ocropus.py From nidaba with GNU General Public License v2.0 | 5 votes |
def test_file_outpath_jpg(self): """ Test that ocropus creates hocr output for jpgs. """ ocr = self.ocropus.ocr_ocropus.run((('test', 'segmentation.xml'), ('test', 'image_jpg.jpg')), model='ocropus') try: parser = etree.HTMLParser() etree.parse(open(os.path.join(self.storage_path, *ocr)), parser) except etree.XMLSyntaxError: self.fail(msg='The output was not valid html/xml!')
Example #28
Source File: ooyala_player.py From xblock-ooyala with GNU Affero General Public License v3.0 | 5 votes |
def studio_submit(self, submissions, suffix=''): xml_config = submissions['xml_config'] try: etree.parse(StringIO(xml_config)) except etree.XMLSyntaxError as e: response = { 'result': 'error', 'message': e.message } else: response = { 'result': 'success', } self.xml_config = xml_config self.display_name = submissions['display_name'] self.content_id = submissions['content_id'].strip() self.transcript_file_id = submissions['transcript_file_id'].strip() self.enable_player_token = submissions['enable_player_token'] self.partner_code = submissions['partner_code'] self.api_key = submissions['api_key'] self.api_secret_key = submissions['api_secret_key'] self.api_key_3play = submissions['api_key_3play'] self.expiration_time = submissions['expiration_time'] self.width = submissions['width'] self.height = submissions['height'] self.disable_cc_and_translations = submissions['cc_disable'] return response
Example #29
Source File: test_metadata.py From pikepdf with Mozilla Public License 2.0 | 5 votes |
def test_truncated_xml(resources, idx): sandwich = Pdf.open(resources / 'sandwich.pdf') data = sandwich.Root.Metadata.read_bytes() assume(idx < len(data)) sandwich.Root.Metadata = sandwich.make_stream(data[0:idx]) try: with sandwich.open_metadata(strict=True) as xmp: xmp['pdfaid:part'] = '5' except (XMLSyntaxError, AssertionError): pass with sandwich.open_metadata(strict=False) as xmp: xmp['pdfaid:part'] = '7'
Example #30
Source File: _fc.py From pypowervm with Apache License 2.0 | 5 votes |
def _parse_pg83_xml(xml_resp): """Parse LUARecovery XML response, looking for pg83 descriptor. :param xml_resp: Tuple containing OutputXML and StdOut results of the LUARecovery Job :return: pg83 descriptor text, or None if not found. """ # QUERY_INVENTORY response may contain more than one element. Each will be # delimited by its own <?xml?> tag. etree will only parse one at a time. for chunk in xml_resp.split('<?xml version="1.0"?>'): if not chunk: continue try: parsed = etree.fromstring(chunk) except etree.XMLSyntaxError as e: LOG.warning(_('QUERY_INVENTORY produced invalid chunk of XML ' '(%(chunk)s). Error: %(err)s'), {'chunk': chunk, 'err': e.args[0]}) continue for elem in parsed.getiterator(): if (etree.QName(elem.tag).localname == 'PhysicalVolume_base' and elem.attrib.get('desType') == "NAA"): return elem.attrib.get('descriptor') LOG.warning(_('Failed to find pg83 descriptor in XML output:\n%s'), xml_resp) return None