org.apache.poi.poifs.filesystem.OfficeXmlFileException Java Exaples

Source File: PresentationFactory.java From Quelea with GNU General Public License v3.0

6 votes

/**
 * Generates a presentation object from a file.
 *
 * @param file the file to generate the presentation from.
 * @return the presentation object, or null if a problem occurs.
 */
public Presentation getPresentation(File file) throws IOException {
    Presentation ret = null;
    if(Utils.hasExtension(file, "ppt") || Utils.hasExtension(file, "pptx")) {
        try {
            ret = new PPTPresentation(file.getAbsolutePath());
        }
        catch(OfficeXmlFileException ex) {
            ret = new PPTXPresentation(file.getAbsolutePath());
        }
    }
    else {
        LOGGER.log(Level.WARNING, "Illegal file type: {0}", file.getName());
    }
    return ret;
}

Source File: FileBeanParser.java From everywhere with Apache License 2.0

6 votes

private static String readDoc (String filePath, InputStream is) throws Exception {
    String text= "";
    is = FileMagic.prepareToCheckMagic(is);
    try {
        if (FileMagic.valueOf(is) == FileMagic.OLE2) {
            WordExtractor ex = new WordExtractor(is);
            text = ex.getText();
            ex.close();
        } else if(FileMagic.valueOf(is) == FileMagic.OOXML) {
            XWPFDocument doc = new XWPFDocument(is);
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
            text = extractor.getText();
            extractor.close();
        }
    } catch (OfficeXmlFileException e) {
        logger.error(filePath, e);
    } finally {
        if (is != null) {
            is.close();
        }
    }
    return text;
}

Source File: MyExcelUtil.java From seed with Apache License 2.0

5 votes

/**
 * @param excelFile  Excel文件
 * @param modelClass 承载Excel数据的实体类
 * @param skipRows   指定跳过的行数：从0开始，传-1表示不跳过
 * Comment by 玄玉<https://jadyer.cn/> on 2019/8/15 19:35.
 */
public static <T> List<T> read(File excelFile, Class<T> modelClass, int skipRows){
    List<T> dataList;
    //初始化SaxExcelReader
    SaxExcelReader<T> saxExcelReader = SaxExcelReader.of(modelClass);
    //判断是否需要跳过行
    if(-1 < skipRows){
        saxExcelReader = saxExcelReader.rowFilter(row -> row.getRowNum() > skipRows);
    }
    try{
        //读文件
        dataList = saxExcelReader.read(excelFile);
    }catch (OfficeXmlFileException e){
        //若读取xls时报告格式错误，那就试试重命名为xlsx再读（有的excel文件头是2007版的，但文件名却是.xls结尾）
        String newFilePath = "unkonwnFile";
        if(excelFile.getName().endsWith(".xls")){
            newFilePath = excelFile.getPath() + "x";
        }
        if(excelFile.getName().endsWith(".xlsx")){
            newFilePath = excelFile.getPath().substring(0, excelFile.getPath().length()-1);
        }
        LogUtil.getLogger().warn("文件读取失败，异常信息为：{}。现尝试修改文件后缀名再重新读取一次，新文件名为：{}", e.getMessage(), newFilePath);
        File newFile = new File(newFilePath);
        excelFile.renameTo(newFile);
        dataList = saxExcelReader.read(newFile);
    }
    return dataList;
}

Source File: VBAMacroReader.java From lams with GNU General Public License v2.0

5 votes

public VBAMacroReader(File file) throws IOException {
    try {
        this.fs = new NPOIFSFileSystem(file);
    } catch (OfficeXmlFileException e) {
        openOOXML(new FileInputStream(file));
    }
}

Source File: ExcelPerfModeReader.java From azeroth with Apache License 2.0

5 votes

private List<String> readAsXLS(String path) {
    try {
        XLS2CSV xls2csv = new XLS2CSV(path, -1);
        return xls2csv.process();
    } catch (Exception e) {
        if (e instanceof NotOLE2FileException || e instanceof NotOfficeXmlFileException || e instanceof OfficeXmlFileException) {
            throw new ExcelOperBaseException("请选择正确格式excel文件");
        }
        if (e instanceof IOException) {
            throw new ExcelOperBaseException("文件读取失败");
        }
        throw new RuntimeException(e);
    }
}

Source File: ExcelPerfModeReader.java From jeesuite-libs with Apache License 2.0

5 votes

private List<String> readAsXLS(String path){
	try {				
		XLS2CSV xls2csv = new XLS2CSV(path, -1);
		return xls2csv.process();
	} catch (Exception e) {
		if(e instanceof NotOLE2FileException || e instanceof NotOfficeXmlFileException || e instanceof OfficeXmlFileException){
			throw new ExcelOperBaseException("请选择正确格式excel文件");
		}
		if(e instanceof IOException){
			throw new ExcelOperBaseException("文件读取失败");
		}
		throw new RuntimeException(e);
	}
}

Source File: MSPowerpointIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test(expected = SolrException.class)
public void testShouldThrowExceptionWhenFailToReadFile() throws Exception {
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class);
    PowerMockito.whenNew(XMLSlideShow.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenThrow(IOException.class);

    // SolrException is expected
    MSPowerpointIndexer indexer = new MSPowerpointIndexer();
    indexer.getIndexedDocument(file2Index);
}

Source File: MSExcelIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test
public void testShouldReturnIndexedDocmentWhenParameterCorrect() {
    String excelText = "excel";
    Mockito.when(excelExtractor.getText())
            .thenReturn(excelText)
            .thenThrow(OfficeXmlFileException.class)
            .thenThrow(Exception.class);
    Mockito.when(xssfExtractor.getText()).thenReturn(excelText);

    try {
        // retrieving indexed document with ExcelExtractor
        msExcelIndexer.getIndexedDocument(file2Index);

        // switching the mediaType null check
        file2Index = new AsyncIndexer.File2Index("".getBytes(),
                null, "", -1234, "");

        // retrieving indexed document with MSExcelIndexer
        // Note: .thenReturn(excelText).thenThrow(OfficeXmlFileException.class) this switches the indexer
        msExcelIndexer.getIndexedDocument(file2Index);

        // switching to silent Exception catch block
        msExcelIndexer.getIndexedDocument(file2Index);
    } catch (Exception e) {
        Assert.fail("Should not throw any exceptions");
    }
}

Source File: MSExcelIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test(expected = SolrException.class)
public void testShouldThrowExceptionWhenErrorOccurs() {
    Mockito.when(excelExtractor.getText()).thenThrow(OfficeXmlFileException.class);
    Mockito.when(xssfExtractor.getText()).thenThrow(IOException.class);

    // SolrException is expected
    msExcelIndexer.getIndexedDocument(file2Index);
}

Source File: MSWordIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test(expected = SolrException.class)
public void testShouldThrowExceptionWhenFailToReadFile() throws Exception {
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class);
    PowerMockito.whenNew(XWPFDocument.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenThrow(IOException.class);

    // SolrException is expected
    MSWordIndexer indexer = new MSWordIndexer();
    indexer.getIndexedDocument(file2Index);
}

Source File: HeaderBlock.java From lams with GNU General Public License v2.0

4 votes

private HeaderBlock(byte[] data) throws IOException {
   this._data = data.clone();
   
	// verify signature
   FileMagic fm = FileMagic.valueOf(data);
   
   switch (fm) {
   case OLE2:
       break;
   case OOXML:
          throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
              + "You are calling the part of POI that deals with OLE2 Office Documents. "
              + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
   case XML:
          throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
              + "Formats such as Office 2003 XML are not supported");
   case MSWRITE:
          throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
              + "Apache POI doesn't currently support this format");
      case BIFF2:
      case BIFF3:
      case BIFF4:
          throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. "
              + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
   default:
          // Give a generic error if the OLE2 signature isn't found
       String exp = HexDump.longToHex(_signature);
       String act = HexDump.longToHex(LittleEndian.getLong(data, 0));
          throw new NotOLE2FileException(
              "Invalid header signature; read " + act + ", expected " + exp +
              " - Your file appears not to be a valid OLE2 document");
   }
   
	// Figure out our block size
	if (_data[30] == 12) {
		this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS;
	} else if(_data[30] == 9) {
		this.bigBlockSize = POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS;
	} else {
	   throw new IOException("Unsupported blocksize  (2^"+ _data[30] + "). Expected 2^9 or 2^12.");
	}

   // Setup the fields to read and write the counts and starts
     _bat_count  = new IntegerField(_bat_count_offset, data).get();
     _property_start = new IntegerField(_property_start_offset,_data).get();
     _sbat_start = new IntegerField(_sbat_start_offset, _data).get();
     _sbat_count = new IntegerField(_sbat_block_count_offset, _data).get();
     _xbat_start = new IntegerField(_xbat_start_offset, _data).get();
     _xbat_count = new IntegerField(_xbat_count_offset, _data).get();
}

Source File: MSPowerpointIndexerTest.java From carbon-apimgt with Apache License 2.0

4 votes

@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws Exception {
    POIFSFileSystem ppExtractor = Mockito.mock(POIFSFileSystem.class);
    PowerPointExtractor powerPointExtractor = Mockito.mock(PowerPointExtractor.class);
    XSLFPowerPointExtractor xslfExtractor = Mockito.mock(XSLFPowerPointExtractor.class);
    XMLSlideShow xmlSlideShow = Mockito.mock(XMLSlideShow.class);
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class)
            .thenReturn(ppExtractor)
            .thenThrow(APIManagementException.class);
    PowerMockito.whenNew(PowerPointExtractor.class).withParameterTypes(POIFSFileSystem.class)
            .withArguments(ppExtractor).thenReturn(powerPointExtractor);
    PowerMockito.whenNew(XMLSlideShow.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenReturn(xmlSlideShow);
    PowerMockito.whenNew(XSLFPowerPointExtractor.class).withArguments(xmlSlideShow).thenReturn(xslfExtractor);
    Mockito.when(powerPointExtractor.getText()).thenReturn("");
    Mockito.when(xslfExtractor.getText()).thenReturn("");
    MSPowerpointIndexer indexer = new MSPowerpointIndexer();

    IndexDocument ppDoc = indexer.getIndexedDocument(file2Index);

    // should return the default media type when media type is not defined in file2Index
    if (!"application/vnd.ms-powerpoint".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index
    file2Index.mediaType = "text/html";
    ppDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index even if exception occurred while reading the file
    ppDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }
}

Source File: MSWordIndexerTest.java From carbon-apimgt with Apache License 2.0

4 votes

@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws Exception {
    POIFSFileSystem poiFS = Mockito.mock(POIFSFileSystem.class);
    WordExtractor wordExtractor = Mockito.mock(WordExtractor.class);
    XWPFWordExtractor xwpfExtractor = Mockito.mock(XWPFWordExtractor.class);
    XWPFDocument xwpfDocument = Mockito.mock(XWPFDocument.class);
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class)
            .thenReturn(poiFS)
            .thenThrow(APIManagementException.class);
    PowerMockito.whenNew(WordExtractor.class).withArguments(poiFS).thenReturn(wordExtractor);
    PowerMockito.whenNew(XWPFDocument.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenReturn(xwpfDocument);
    PowerMockito.whenNew(XWPFWordExtractor.class).withArguments(xwpfDocument).thenReturn(xwpfExtractor);
    Mockito.when(wordExtractor.getText()).thenReturn("");
    Mockito.when(xwpfExtractor.getText()).thenReturn("");
    MSWordIndexer indexer = new MSWordIndexer();

    IndexDocument wordDoc = indexer.getIndexedDocument(file2Index);

    // should return the default media type when media type is not defined in file2Index
    if (!"application/pdf".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index
    file2Index.mediaType = "text/html";
    wordDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index even if exception occurred while reading the file
    file2Index.mediaType = "text/html";
    wordDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }
}

org.apache.poi.poifs.filesystem.OfficeXmlFileException Java Examples