Java Code Examples for org.apache.poi.util.IOUtils#readFully()
The following examples show how to use
org.apache.poi.util.IOUtils#readFully() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TikaOfficeDetectParser.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException { byte[] initial4 = new byte[4]; InputStream wrapped; // Preserve TikaInputStreams as TikaInputStreams as they require less memory to process if (stream.markSupported()) { stream.mark(initial4.length); IOUtils.readFully(stream, initial4); stream.reset(); wrapped = stream; } else { PushbackInputStream inp = new PushbackInputStream(stream, 4); IOUtils.readFully(inp, initial4); inp.unread(initial4); wrapped = inp; } // Which is it? if(initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] && initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] && initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] && initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3]) { ooxmlParser.parse(wrapped, handler, metadata, parseContext); } else { ole2Parser.parse(wrapped, handler, metadata, parseContext); } }
Example 2
Source File: HeaderBlock.java From lams with GNU General Public License v2.0 | 5 votes |
/** * create a new HeaderBlockReader from an InputStream * * @param stream the source InputStream * * @exception IOException on errors or bad data */ public HeaderBlock(InputStream stream) throws IOException { // Grab the first 512 bytes // (For 4096 sized blocks, the remaining 3584 bytes are zero) // Then, process the contents this(readFirst512(stream)); // Fetch the rest of the block if needed if(bigBlockSize.getBigBlockSize() != 512) { int rest = bigBlockSize.getBigBlockSize() - 512; byte[] tmp = new byte[rest]; IOUtils.readFully(stream, tmp); } }
Example 3
Source File: HeaderBlock.java From lams with GNU General Public License v2.0 | 5 votes |
private static byte[] readFirst512(InputStream stream) throws IOException { // Grab the first 512 bytes // (For 4096 sized blocks, the remaining 3584 bytes are zero) byte[] data = new byte[512]; int bsCount = IOUtils.readFully(stream, data); if(bsCount != 512) { throw alertShortRead(bsCount, 512); } return data; }
Example 4
Source File: DocumentBlock.java From lams with GNU General Public License v2.0 | 5 votes |
/** * Create a single instance initialized with data. * * @param stream the InputStream delivering the data. * * @exception IOException */ public DocumentBlock(final InputStream stream, POIFSBigBlockSize bigBlockSize) throws IOException { this(bigBlockSize); int count = IOUtils.readFully(stream, _data); _bytes_read = (count == -1) ? 0 : count; }
Example 5
Source File: RawDataBlock.java From lams with GNU General Public License v2.0 | 5 votes |
/** * Constructor RawDataBlock * * @param stream the InputStream from which the data will be read * @param blockSize the size of the POIFS blocks, normally 512 bytes * {@link org.apache.poi.poifs.common.POIFSConstants#SMALLER_BIG_BLOCK_SIZE} * * @exception IOException on I/O errors, and if an insufficient * amount of data is read (the InputStream must * be an exact multiple of the block size) */ public RawDataBlock(final InputStream stream, int blockSize) throws IOException { _data = new byte[ blockSize ]; int count = IOUtils.readFully(stream, _data); _hasData = (count > 0); if (count == -1) { _eof = true; } else if (count != blockSize) { // IOUtils.readFully will always read the // requested number of bytes, unless it hits // an EOF _eof = true; String type = " byte" + ((count == 1) ? ("") : ("s")); log.log(POILogger.ERROR, "Unable to read entire block; " + count + type + " read before EOF; expected " + blockSize + " bytes. Your document " + "was either written by software that " + "ignores the spec, or has been truncated!" ); } else { _eof = false; } }
Example 6
Source File: FileBackedDataSource.java From lams with GNU General Public License v2.0 | 5 votes |
@Override public ByteBuffer read(int length, long position) throws IOException { if(position >= size()) { throw new IndexOutOfBoundsException("Position " + position + " past the end of the file"); } // TODO Could we do the read-only case with MapMode.PRIVATE instead? // See https://docs.oracle.com/javase/7/docs/api/java/nio/channels/FileChannel.MapMode.html#PRIVATE // Or should we have 3 modes instead of the current boolean - // read-write, read-only, read-to-write-elsewhere? // Do we read or map (for read/write)? ByteBuffer dst; if (writable) { dst = channel.map(FileChannel.MapMode.READ_WRITE, position, length); // remember this buffer for cleanup buffersToClean.add(dst); } else { // allocate the buffer on the heap if we cannot map the data in directly channel.position(position); dst = ByteBuffer.allocate(length); // Read the contents and check that we could read some data int worked = IOUtils.readFully(channel, dst); if(worked == -1) { throw new IndexOutOfBoundsException("Position " + position + " past the end of the file"); } } // make it ready for reading dst.position(0); // All done return dst; }
Example 7
Source File: MimetypeMapContentTest.java From alfresco-repository with GNU Lesser General Public License v3.0 | 4 votes |
public void testGuessMimetypeForFile() throws Exception { // Correct ones assertEquals( "application/msword", mimetypeService.guessMimetype("something.doc", openQuickTestFile("quick.doc")) ); assertEquals( "application/msword", mimetypeService.guessMimetype("SOMETHING.DOC", openQuickTestFile("quick.doc")) ); // Incorrect ones, Tika spots the mistake assertEquals( "application/msword", mimetypeService.guessMimetype("something.pdf", openQuickTestFile("quick.doc")) ); assertEquals( "application/pdf", mimetypeService.guessMimetype("something.doc", openQuickTestFile("quick.pdf")) ); // Ones where we use a different mimetype to the canonical one assertEquals( "image/bmp", // Officially image/x-ms-bmp mimetypeService.guessMimetype("image.bmp", openQuickTestFile("quick.bmp")) ); // Ones where we know about the parent, and Tika knows about the details assertEquals( "application/dita+xml", // Full version: application/dita+xml;format=concept mimetypeService.guessMimetype("concept.dita", openQuickTestFile("quickConcept.dita")) ); // Alfresco Specific ones, that Tika doesn't know about assertEquals( "application/acp", mimetypeService.guessMimetype("something.acp", openQuickTestFile("quick.acp")) ); // Where the file is corrupted File tmp = File.createTempFile("alfresco", ".tmp"); ContentReader reader = openQuickTestFile("quick.doc"); InputStream inp = reader.getContentInputStream(); byte[] trunc = new byte[512+256]; IOUtils.readFully(inp, trunc); inp.close(); FileOutputStream out = new FileOutputStream(tmp); out.write(trunc); out.close(); ContentReader truncReader = new FileContentReader(tmp); // Because the file is truncated, Tika won't be able to process the contents // of the OLE2 structure // So, it'll fall back to just OLE2, but it won't fail assertEquals( "application/x-tika-msoffice", mimetypeService.guessMimetype(null, truncReader) ); // But with the filename it'll be able to use the .doc extension // to guess at it being a .Doc file assertEquals( "application/msword", mimetypeService.guessMimetype("something.doc", truncReader) ); // Lotus notes EML files (ALF-16381 / TIKA-1042) assertEquals( "message/rfc822", mimetypeService.guessMimetype("something.eml", openQuickTestFile("quickLotus.eml")) ); }
Example 8
Source File: NPOIFSFileSystem.java From lams with GNU General Public License v2.0 | 4 votes |
/** * Create a POIFSFileSystem from an <tt>InputStream</tt>. Normally the stream is read until * EOF. The stream is always closed.<p> * * Some streams are usable after reaching EOF (typically those that return <code>true</code> * for <tt>markSupported()</tt>). In the unlikely case that the caller has such a stream * <i>and</i> needs to use it after this constructor completes, a work around is to wrap the * stream in order to trap the <tt>close()</tt> call. A convenience method ( * <tt>createNonClosingInputStream()</tt>) has been provided for this purpose: * <pre> * InputStream wrappedStream = POIFSFileSystem.createNonClosingInputStream(is); * HSSFWorkbook wb = new HSSFWorkbook(wrappedStream); * is.reset(); * doSomethingElse(is); * </pre> * Note also the special case of <tt>ByteArrayInputStream</tt> for which the <tt>close()</tt> * method does nothing. * <pre> * ByteArrayInputStream bais = ... * HSSFWorkbook wb = new HSSFWorkbook(bais); // calls bais.close() ! * bais.reset(); // no problem * doSomethingElse(bais); * </pre> * * @param stream the InputStream from which to read the data * * @exception IOException on errors reading, or on invalid data */ public NPOIFSFileSystem(InputStream stream) throws IOException { this(false); ReadableByteChannel channel = null; boolean success = false; try { // Turn our InputStream into something NIO based channel = Channels.newChannel(stream); // Get the header ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE); IOUtils.readFully(channel, headerBuffer); // Have the header processed _header = new HeaderBlock(headerBuffer); // Sanity check the block count BlockAllocationTableReader.sanityCheckBlockCount(_header.getBATCount()); // We need to buffer the whole file into memory when // working with an InputStream. // The max possible size is when each BAT block entry is used long maxSize = BATBlock.calculateMaximumSize(_header); if (maxSize > Integer.MAX_VALUE) { throw new IllegalArgumentException("Unable read a >2gb file via an InputStream"); } ByteBuffer data = ByteBuffer.allocate((int)maxSize); // Copy in the header headerBuffer.position(0); data.put(headerBuffer); data.position(headerBuffer.capacity()); // Now read the rest of the stream IOUtils.readFully(channel, data); success = true; // Turn it into a DataSource _data = new ByteArrayBackedDataSource(data.array(), data.position()); } finally { // As per the constructor contract, always close the stream if(channel != null) channel.close(); closeInputStream(stream, success); } // Now process the various entries readCoreContents(); }
Example 9
Source File: VBAMacroReader.java From lams with GNU General Public License v2.0 | 4 votes |
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException { byte[] buffer = new byte[unicodeNameRecordLength]; IOUtils.readFully(in, buffer); return new String(buffer, UTF_16LE); }