org.apache.poi.poifs.filesystem.OfficeXmlFileException Java Examples
The following examples show how to use
org.apache.poi.poifs.filesystem.OfficeXmlFileException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PresentationFactory.java From Quelea with GNU General Public License v3.0 | 6 votes |
/** * Generates a presentation object from a file. * * @param file the file to generate the presentation from. * @return the presentation object, or null if a problem occurs. */ public Presentation getPresentation(File file) throws IOException { Presentation ret = null; if(Utils.hasExtension(file, "ppt") || Utils.hasExtension(file, "pptx")) { try { ret = new PPTPresentation(file.getAbsolutePath()); } catch(OfficeXmlFileException ex) { ret = new PPTXPresentation(file.getAbsolutePath()); } } else { LOGGER.log(Level.WARNING, "Illegal file type: {0}", file.getName()); } return ret; }
Example #2
Source File: FileBeanParser.java From everywhere with Apache License 2.0 | 6 votes |
private static String readDoc (String filePath, InputStream is) throws Exception { String text= ""; is = FileMagic.prepareToCheckMagic(is); try { if (FileMagic.valueOf(is) == FileMagic.OLE2) { WordExtractor ex = new WordExtractor(is); text = ex.getText(); ex.close(); } else if(FileMagic.valueOf(is) == FileMagic.OOXML) { XWPFDocument doc = new XWPFDocument(is); XWPFWordExtractor extractor = new XWPFWordExtractor(doc); text = extractor.getText(); extractor.close(); } } catch (OfficeXmlFileException e) { logger.error(filePath, e); } finally { if (is != null) { is.close(); } } return text; }
Example #3
Source File: MyExcelUtil.java From seed with Apache License 2.0 | 5 votes |
/** * @param excelFile Excel文件 * @param modelClass 承载Excel数据的实体类 * @param skipRows 指定跳过的行数:从0开始,传-1表示不跳过 * Comment by 玄玉<https://jadyer.cn/> on 2019/8/15 19:35. */ public static <T> List<T> read(File excelFile, Class<T> modelClass, int skipRows){ List<T> dataList; //初始化SaxExcelReader SaxExcelReader<T> saxExcelReader = SaxExcelReader.of(modelClass); //判断是否需要跳过行 if(-1 < skipRows){ saxExcelReader = saxExcelReader.rowFilter(row -> row.getRowNum() > skipRows); } try{ //读文件 dataList = saxExcelReader.read(excelFile); }catch (OfficeXmlFileException e){ //若读取xls时报告格式错误,那就试试重命名为xlsx再读(有的excel文件头是2007版的,但文件名却是.xls结尾) String newFilePath = "unkonwnFile"; if(excelFile.getName().endsWith(".xls")){ newFilePath = excelFile.getPath() + "x"; } if(excelFile.getName().endsWith(".xlsx")){ newFilePath = excelFile.getPath().substring(0, excelFile.getPath().length()-1); } LogUtil.getLogger().warn("文件读取失败,异常信息为:{}。现尝试修改文件后缀名再重新读取一次,新文件名为:{}", e.getMessage(), newFilePath); File newFile = new File(newFilePath); excelFile.renameTo(newFile); dataList = saxExcelReader.read(newFile); } return dataList; }
Example #4
Source File: VBAMacroReader.java From lams with GNU General Public License v2.0 | 5 votes |
public VBAMacroReader(File file) throws IOException { try { this.fs = new NPOIFSFileSystem(file); } catch (OfficeXmlFileException e) { openOOXML(new FileInputStream(file)); } }
Example #5
Source File: ExcelPerfModeReader.java From azeroth with Apache License 2.0 | 5 votes |
private List<String> readAsXLS(String path) { try { XLS2CSV xls2csv = new XLS2CSV(path, -1); return xls2csv.process(); } catch (Exception e) { if (e instanceof NotOLE2FileException || e instanceof NotOfficeXmlFileException || e instanceof OfficeXmlFileException) { throw new ExcelOperBaseException("请选择正确格式excel文件"); } if (e instanceof IOException) { throw new ExcelOperBaseException("文件读取失败"); } throw new RuntimeException(e); } }
Example #6
Source File: ExcelPerfModeReader.java From jeesuite-libs with Apache License 2.0 | 5 votes |
private List<String> readAsXLS(String path){ try { XLS2CSV xls2csv = new XLS2CSV(path, -1); return xls2csv.process(); } catch (Exception e) { if(e instanceof NotOLE2FileException || e instanceof NotOfficeXmlFileException || e instanceof OfficeXmlFileException){ throw new ExcelOperBaseException("请选择正确格式excel文件"); } if(e instanceof IOException){ throw new ExcelOperBaseException("文件读取失败"); } throw new RuntimeException(e); } }
Example #7
Source File: MSPowerpointIndexerTest.java From carbon-apimgt with Apache License 2.0 | 5 votes |
@Test(expected = SolrException.class) public void testShouldThrowExceptionWhenFailToReadFile() throws Exception { PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any(InputStream.class)) .thenThrow(OfficeXmlFileException.class); PowerMockito.whenNew(XMLSlideShow.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any()) .thenThrow(IOException.class); // SolrException is expected MSPowerpointIndexer indexer = new MSPowerpointIndexer(); indexer.getIndexedDocument(file2Index); }
Example #8
Source File: MSExcelIndexerTest.java From carbon-apimgt with Apache License 2.0 | 5 votes |
@Test public void testShouldReturnIndexedDocmentWhenParameterCorrect() { String excelText = "excel"; Mockito.when(excelExtractor.getText()) .thenReturn(excelText) .thenThrow(OfficeXmlFileException.class) .thenThrow(Exception.class); Mockito.when(xssfExtractor.getText()).thenReturn(excelText); try { // retrieving indexed document with ExcelExtractor msExcelIndexer.getIndexedDocument(file2Index); // switching the mediaType null check file2Index = new AsyncIndexer.File2Index("".getBytes(), null, "", -1234, ""); // retrieving indexed document with MSExcelIndexer // Note: .thenReturn(excelText).thenThrow(OfficeXmlFileException.class) this switches the indexer msExcelIndexer.getIndexedDocument(file2Index); // switching to silent Exception catch block msExcelIndexer.getIndexedDocument(file2Index); } catch (Exception e) { Assert.fail("Should not throw any exceptions"); } }
Example #9
Source File: MSExcelIndexerTest.java From carbon-apimgt with Apache License 2.0 | 5 votes |
@Test(expected = SolrException.class) public void testShouldThrowExceptionWhenErrorOccurs() { Mockito.when(excelExtractor.getText()).thenThrow(OfficeXmlFileException.class); Mockito.when(xssfExtractor.getText()).thenThrow(IOException.class); // SolrException is expected msExcelIndexer.getIndexedDocument(file2Index); }
Example #10
Source File: MSWordIndexerTest.java From carbon-apimgt with Apache License 2.0 | 5 votes |
@Test(expected = SolrException.class) public void testShouldThrowExceptionWhenFailToReadFile() throws Exception { PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any(InputStream.class)) .thenThrow(OfficeXmlFileException.class); PowerMockito.whenNew(XWPFDocument.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any()) .thenThrow(IOException.class); // SolrException is expected MSWordIndexer indexer = new MSWordIndexer(); indexer.getIndexedDocument(file2Index); }
Example #11
Source File: HeaderBlock.java From lams with GNU General Public License v2.0 | 4 votes |
private HeaderBlock(byte[] data) throws IOException { this._data = data.clone(); // verify signature FileMagic fm = FileMagic.valueOf(data); switch (fm) { case OLE2: break; case OOXML: throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " + "You are calling the part of POI that deals with OLE2 Office Documents. " + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); case XML: throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " + "Formats such as Office 2003 XML are not supported"); case MSWRITE: throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " + "Apache POI doesn't currently support this format"); case BIFF2: case BIFF3: case BIFF4: throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. " + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); default: // Give a generic error if the OLE2 signature isn't found String exp = HexDump.longToHex(_signature); String act = HexDump.longToHex(LittleEndian.getLong(data, 0)); throw new NotOLE2FileException( "Invalid header signature; read " + act + ", expected " + exp + " - Your file appears not to be a valid OLE2 document"); } // Figure out our block size if (_data[30] == 12) { this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; } else if(_data[30] == 9) { this.bigBlockSize = POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS; } else { throw new IOException("Unsupported blocksize (2^"+ _data[30] + "). Expected 2^9 or 2^12."); } // Setup the fields to read and write the counts and starts _bat_count = new IntegerField(_bat_count_offset, data).get(); _property_start = new IntegerField(_property_start_offset,_data).get(); _sbat_start = new IntegerField(_sbat_start_offset, _data).get(); _sbat_count = new IntegerField(_sbat_block_count_offset, _data).get(); _xbat_start = new IntegerField(_xbat_start_offset, _data).get(); _xbat_count = new IntegerField(_xbat_count_offset, _data).get(); }
Example #12
Source File: MSPowerpointIndexerTest.java From carbon-apimgt with Apache License 2.0 | 4 votes |
@Test public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws Exception { POIFSFileSystem ppExtractor = Mockito.mock(POIFSFileSystem.class); PowerPointExtractor powerPointExtractor = Mockito.mock(PowerPointExtractor.class); XSLFPowerPointExtractor xslfExtractor = Mockito.mock(XSLFPowerPointExtractor.class); XMLSlideShow xmlSlideShow = Mockito.mock(XMLSlideShow.class); PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any(InputStream.class)) .thenThrow(OfficeXmlFileException.class) .thenReturn(ppExtractor) .thenThrow(APIManagementException.class); PowerMockito.whenNew(PowerPointExtractor.class).withParameterTypes(POIFSFileSystem.class) .withArguments(ppExtractor).thenReturn(powerPointExtractor); PowerMockito.whenNew(XMLSlideShow.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any()) .thenReturn(xmlSlideShow); PowerMockito.whenNew(XSLFPowerPointExtractor.class).withArguments(xmlSlideShow).thenReturn(xslfExtractor); Mockito.when(powerPointExtractor.getText()).thenReturn(""); Mockito.when(xslfExtractor.getText()).thenReturn(""); MSPowerpointIndexer indexer = new MSPowerpointIndexer(); IndexDocument ppDoc = indexer.getIndexedDocument(file2Index); // should return the default media type when media type is not defined in file2Index if (!"application/vnd.ms-powerpoint".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) { Assert.fail(); } // should return the media type we have set in the file2Index file2Index.mediaType = "text/html"; ppDoc = indexer.getIndexedDocument(file2Index); if (!"text/html".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) { Assert.fail(); } // should return the media type we have set in the file2Index even if exception occurred while reading the file ppDoc = indexer.getIndexedDocument(file2Index); if (!"text/html".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) { Assert.fail(); } }
Example #13
Source File: MSWordIndexerTest.java From carbon-apimgt with Apache License 2.0 | 4 votes |
@Test public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws Exception { POIFSFileSystem poiFS = Mockito.mock(POIFSFileSystem.class); WordExtractor wordExtractor = Mockito.mock(WordExtractor.class); XWPFWordExtractor xwpfExtractor = Mockito.mock(XWPFWordExtractor.class); XWPFDocument xwpfDocument = Mockito.mock(XWPFDocument.class); PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any(InputStream.class)) .thenThrow(OfficeXmlFileException.class) .thenReturn(poiFS) .thenThrow(APIManagementException.class); PowerMockito.whenNew(WordExtractor.class).withArguments(poiFS).thenReturn(wordExtractor); PowerMockito.whenNew(XWPFDocument.class).withParameterTypes(InputStream.class) .withArguments(Mockito.any()) .thenReturn(xwpfDocument); PowerMockito.whenNew(XWPFWordExtractor.class).withArguments(xwpfDocument).thenReturn(xwpfExtractor); Mockito.when(wordExtractor.getText()).thenReturn(""); Mockito.when(xwpfExtractor.getText()).thenReturn(""); MSWordIndexer indexer = new MSWordIndexer(); IndexDocument wordDoc = indexer.getIndexedDocument(file2Index); // should return the default media type when media type is not defined in file2Index if (!"application/pdf".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) { Assert.fail(); } // should return the media type we have set in the file2Index file2Index.mediaType = "text/html"; wordDoc = indexer.getIndexedDocument(file2Index); if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) { Assert.fail(); } // should return the media type we have set in the file2Index even if exception occurred while reading the file file2Index.mediaType = "text/html"; wordDoc = indexer.getIndexedDocument(file2Index); if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) { Assert.fail(); } }