org.apache.poi.hssf.extractor.ExcelExtractor Java Examples
The following examples show how to use
org.apache.poi.hssf.extractor.ExcelExtractor.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OLE2ExtractorFactory.java From lams with GNU General Public License v2.0 | 5 votes |
/** * Create the Extractor, if possible. Generally needs the Scratchpad jar. * Note that this won't check for embedded OOXML resources either, use * {@link org.apache.poi.extractor.ExtractorFactory} for that. */ public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException { // Look for certain entries in the stream, to figure it // out from for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) { if (poifsDir.hasEntry(workbookName)) { if (getPreferEventExtractor()) { return new EventBasedExcelExtractor(poifsDir); } return new ExcelExtractor(poifsDir); } } if (poifsDir.hasEntry(OLD_WORKBOOK_DIR_ENTRY_NAME)) { throw new OldExcelFormatException("Old Excel Spreadsheet format (1-95) " + "found. Please call OldExcelExtractor directly for basic text extraction"); } // Ask Scratchpad, or fail trying Class<?> cls = getScratchpadClass(); try { Method m = cls.getDeclaredMethod("createExtractor", DirectoryNode.class); POITextExtractor ext = (POITextExtractor)m.invoke(null, poifsDir); if (ext != null) return ext; } catch (IllegalArgumentException iae) { throw iae; } catch (Exception e) { throw new IllegalArgumentException("Error creating Scratchpad Extractor", e); } throw new IllegalArgumentException("No supported documents found in the OLE2 stream"); }
Example #2
Source File: IndexerTextExtractor.java From eplmp with Eclipse Public License 1.0 | 5 votes |
private String microsoftExcelDocumentToString(InputStream inputStream) throws IOException, OpenXML4JException, XmlException { StringBuilder sb = new StringBuilder(); try (InputStream excelStream = new BufferedInputStream(inputStream)) { if (POIFSFileSystem.hasPOIFSHeader(excelStream)) { // Before 2007 format files POIFSFileSystem excelFS = new POIFSFileSystem(excelStream); ExcelExtractor excelExtractor = new ExcelExtractor(excelFS); sb.append(excelExtractor.getText()); excelExtractor.close(); } else { // New format XSSFWorkbook workBook = new XSSFWorkbook(excelStream); int numberOfSheets = workBook.getNumberOfSheets(); for (int i = 0; i < numberOfSheets; i++) { XSSFSheet sheet = workBook.getSheetAt(0); Iterator<Row> rowIterator = sheet.rowIterator(); while (rowIterator.hasNext()) { XSSFRow row = (XSSFRow) rowIterator.next(); Iterator<Cell> cellIterator = row.cellIterator(); while (cellIterator.hasNext()) { XSSFCell cell = (XSSFCell) cellIterator.next(); sb.append(cell.toString()); sb.append(" "); } sb.append("\n"); } sb.append("\n"); } } } return sb.toString(); }
Example #3
Source File: MyExcelUtils.java From spring-boot with Apache License 2.0 | 5 votes |
/** * 利用 POI 提供的工具,提取文件内容为字符串 * * @param excelFile 待提取的 excel 文件 * @return */ public String excelExtractor(File excelFile) { try { HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(excelFile)); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(true); return extractor.getText(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } }
Example #4
Source File: MsExcelTextExtractor.java From document-management-system with GNU General Public License v2.0 | 5 votes |
/** * {@inheritDoc} */ public String extractText(InputStream stream, String type, String encoding) throws IOException { try { POIFSFileSystem fs = new POIFSFileSystem(stream); return new ExcelExtractor(fs).getText(); } catch (RuntimeException e) { logger.warn("Failed to extract Excel text content", e); throw new IOException(e.getMessage(), e); } finally { stream.close(); } }
Example #5
Source File: MSExcelIndexerTest.java From carbon-apimgt with Apache License 2.0 | 5 votes |
@Before public void setup() { excelExtractor = Mockito.mock(ExcelExtractor.class); xssfExtractor = Mockito.mock(XSSFExcelExtractor.class); msExcelIndexer = new MSExcelIndexerWrapper(xssfExtractor, excelExtractor); file2Index = new AsyncIndexer.File2Index("".getBytes(), "", "", -1234, ""); }
Example #6
Source File: MetadataExtractor.java From document-management-system with GNU General Public License v2.0 | 4 votes |
/** * Extract metadata from Office Word */ public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(is); OfficeMetadata md = new OfficeMetadata(); SummaryInformation si = null; if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) { si = new WordExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) { si = new ExcelExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) { si = new PowerPointExtractor(fs).getSummaryInformation(); } if (si != null) { md.setTitle(si.getTitle()); md.setSubject(si.getSubject()); md.setAuthor(si.getAuthor()); md.setLastAuthor(si.getLastAuthor()); md.setKeywords(si.getKeywords()); md.setComments(si.getComments()); md.setTemplate(si.getTemplate()); md.setRevNumber(si.getRevNumber()); md.setApplicationName(si.getApplicationName()); md.setEditTime(si.getEditTime()); md.setPageCount(si.getPageCount()); md.setWordCount(si.getWordCount()); md.setCharCount(si.getCharCount()); md.setSecurity(si.getSecurity()); Calendar createDateTime = Calendar.getInstance(); createDateTime.setTime(si.getCreateDateTime()); md.setCreateDateTime(createDateTime); Calendar lastSaveDateTime = Calendar.getInstance(); lastSaveDateTime.setTime(si.getLastSaveDateTime()); md.setLastSaveDateTime(lastSaveDateTime); Calendar lastPrinted = Calendar.getInstance(); lastPrinted.setTime(si.getLastPrinted()); md.setLastPrinted(lastPrinted); } log.info("officeExtractor: {}", md); return md; }
Example #7
Source File: ExcelOOXMLDocument.java From olat with Apache License 2.0 | 4 votes |
private void extractHeaderFooter(final StringBuilder buffy, final HeaderFooter hf) { final String content = ExcelExtractor._extractHeaderFooter(hf); if (content.length() > 0) { buffy.append(content).append(' '); } }
Example #8
Source File: MSExcelIndexer.java From carbon-apimgt with Apache License 2.0 | 4 votes |
protected ExcelExtractor getExcelExtractor(File2Index fileData) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data)); return new ExcelExtractor(fs); }
Example #9
Source File: MSExcelIndexerWrapper.java From carbon-apimgt with Apache License 2.0 | 4 votes |
public MSExcelIndexerWrapper(XSSFExcelExtractor xssfExtractor, ExcelExtractor excelExtractor) { this.xssfExcelExtractor = xssfExtractor; this.excelExtractor = excelExtractor; }
Example #10
Source File: MSExcelIndexerWrapper.java From carbon-apimgt with Apache License 2.0 | 4 votes |
@Override protected ExcelExtractor getExcelExtractor(AsyncIndexer.File2Index fileData) throws IOException { return this.excelExtractor; }