org.apache.poi.ooxml.util.SAXHelper Java Examples

The following examples show how to use org.apache.poi.ooxml.util.SAXHelper. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReadOnlySharedStringsTable.java    From myexcel with Apache License 2.0 6 votes vote down vote up
/**
 * Read this shared strings table from an XML file.
 *
 * @param is The input stream containing the XML document.
 * @throws IOException  if an error occurs while reading.
 * @throws SAXException if parsing the XML data fails.
 */
public void readFrom(InputStream is) throws IOException, SAXException {
    // test if the file is empty, otherwise parse it
    PushbackInputStream pis = new PushbackInputStream(is, 1);
    int emptyTest = pis.read();
    if (emptyTest > -1) {
        pis.unread(emptyTest);
        InputSource sheetSource = new InputSource(pis);
        try {
            XMLReader sheetParser = SAXHelper.newXMLReader();
            sheetParser.setContentHandler(this);
            sheetParser.parse(sheetSource);
            stringsCache.finished();
        } catch (ParserConfigurationException e) {
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
        }
    }
}
 
Example #2
Source File: SaxExcelReader.java    From myexcel with Apache License 2.0 6 votes vote down vote up
/**
 * Parses and shows the content of one sheet
 * using the specified styles and shared-strings tables.
 *
 * @param strings          The table of strings that may be referenced by cells in the sheet
 * @param sheetInputStream The stream to read the sheet-data from.
 * @throws java.io.IOException An IO exception from the parser,
 *                             possibly from a byte stream or character stream
 *                             supplied by the application.
 * @throws SAXException        if parsing the XML data fails.
 */
private void processSheet(
        SharedStrings strings,
        XSSFSheetXMLHandler.SheetContentsHandler sheetHandler,
        InputStream sheetInputStream) throws IOException, SAXException {
    DataFormatter formatter = new DataFormatter();
    InputSource sheetSource = new InputSource(sheetInputStream);
    try {
        XMLReader sheetParser = SAXHelper.newXMLReader();
        ContentHandler handler = new XSSFSheetXMLHandler(
                null, null, strings, sheetHandler, formatter, false);
        sheetParser.setContentHandler(handler);
        sheetParser.parse(sheetSource);
    } catch (ParserConfigurationException e) {
        throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
    }
}
 
Example #3
Source File: ConvertExcelToCSVProcessor.java    From nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
 *
 * @param session
 *  The NiFi ProcessSession instance for the current invocation.
 */
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final InputStream sheetInputStream, ExcelSheetReadConfig readConfig,
                              CSVFormat csvFormat) throws IOException {

    FlowFile ff = session.create(originalParentFF);
    try {
        final DataFormatter formatter = new DataFormatter();
        final InputSource sheetSource = new InputSource(sheetInputStream);

        final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);

        final XMLReader parser = SAXHelper.newXMLReader();

        //If Value Formatting is set to false then don't pass in the styles table.
        // This will cause the XSSF Handler to return the raw value instead of the formatted one.
        final StylesTable sst = readConfig.getFormatValues()?readConfig.getStyles():null;

        final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(
                sst, null, readConfig.getSharedStringsTable(), sheetHandler, formatter, false);

        parser.setContentHandler(handler);

        ff = session.write(ff, new OutputStreamCallback() {
            @Override
            public void process(OutputStream out) throws IOException {
                PrintStream outPrint = new PrintStream(out);
                sheetHandler.setOutput(outPrint);

                try {
                    parser.parse(sheetSource);

                    sheetInputStream.close();

                    sheetHandler.close();
                    outPrint.close();
                } catch (SAXException se) {
                    getLogger().error("Error occurred while processing Excel sheet {}", new Object[]{readConfig.getSheetName()}, se);
                }
            }
        });

        ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
        ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString());

        if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
        } else {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
        }

        //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
        ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
                ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
        ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);

        session.transfer(ff, SUCCESS);

    } catch (SAXException | ParserConfigurationException saxE) {
        getLogger().error("Failed to create instance of Parser.", saxE);
        ff = session.putAttribute(ff,
                ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
        session.transfer(ff, FAILURE);
    } finally {
        sheetInputStream.close();
    }
}