Java Code Examples for org.apache.pdfbox.rendering.PDFRenderer#renderImageWithDPI()

The following examples show how to use org.apache.pdfbox.rendering.PDFRenderer#renderImageWithDPI() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PdfBoxConverter.java    From workable-converter with GNU General Public License v3.0 8 votes vote down vote up
@Override
public boolean byFileFolder(ConvertRequire require) throws ConvertFailedException {
    logger.info("pdfbox convert by file folder begin(src file must be a pdf file) :");
    try {
        File srcFile = new File(require.getWaitingFilePath());
        PDDocument document = PDDocument.load(srcFile);
        PDFRenderer renderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); page++) {
            BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB);
            String savename = require.getDestConvertedPath() + "/" + SuffixTool.deleteSuffix(srcFile.getName()) + "_" + page + ".png";
            ImageIOUtil.writeImage(image, savename, 300);
        }
        document.close();
    } catch (Exception e) {
        throw new ConvertFailedException(e.getMessage());
    }
    logger.info("pdf box convert by filepath success");
    return true;
}
 
Example 2
Source File: PdfBoxConverter.java    From workable-converter with GNU General Public License v3.0 6 votes vote down vote up
/**
 * convert a pdf base64 file to png pics
 */
@Override
public boolean byBase64 (ConvertRequire require) throws ConvertFailedException {
    logger.info("pdfbox convert by base64 begin(src file must be a pdf file):");
    try {
        String srcFileTmpName = StrRandomTool.getUuid(true) + ".pdf";
        Base64FileTool.saveBase64File(require.getSrcBase64(), params.getTmpPath() + "/" + srcFileTmpName);
        File srcFile = new File(params.getTmpPath() + "/" + srcFileTmpName);
        PDDocument document = PDDocument.load(srcFile);
        PDFRenderer renderer = new PDFRenderer(document);
        List<String> results = new LinkedList<>();
        for (int page = 0; page < document.getNumberOfPages(); page++) {
            BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB);
            String savePath = params.getTmpPath() + "/" + SuffixTool.deleteSuffix(srcFile.getName()) + "_" + page + ".png";
            ImageIOUtil.writeImage(image, savePath, 300);
            results.add(Base64FileTool.filePathToBase64(savePath));
        }
        require.setDestBase64s(results);
        document.close();
    } catch (Exception e) {
        throw new ConvertFailedException(e.getMessage());
    }
    logger.info("pdf box convert by base64 success");
    return true;
}
 
Example 3
Source File: ConvertTest.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws InvalidPasswordException, IOException {
		PDDocument document = PDDocument.load(new File("/home/lili/data/testen.pdf"));
		PDFRenderer pdfRenderer = new PDFRenderer(document);
		for (int page = 0; page < document.getNumberOfPages(); ++page) {
			if(page>0 && page %100==0) {
				System.out.println("page: "+page);
			}
//			float w=document.getPage(page).getMediaBox().getWidth();
//			float h=document.getPage(page).getMediaBox().getHeight();
//			System.out.println(String.format("w: %f, h: %f",w, h));
			BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
			//BufferedImage bim = pdfRenderer.renderImage(page, 2.0f);
			
			// suffix in filename will be used as the file format
			ImageIOUtil.writeImage(bim, "/home/lili/data/testen-" + (page + 1) + ".png", 300);
		}
	}
 
Example 4
Source File: PdfTableReader.java    From pdf-table with MIT License 6 votes vote down vote up
/**
 * Saves debug images of PDF pages from specified range and saves them in specified directory.
 *
 * @param document  PDF document instance
 * @param startPage first page in range to process (first page == 1)
 * @param endPage   last page in range
 * @param outputDir destination directory
 * @throws IOException
 */
public void savePdfTablePagesDebugImages(PDDocument document, int startPage, int endPage, Path outputDir) throws IOException {
    TableExtractor debugExtractor = new TableExtractor(settings);
    PDFRenderer renderer = new PDFRenderer(document);
    for (int page = startPage - 1; page < endPage; ++page) {
        PdfTableSettings debugSettings = PdfTableSettings.getBuilder()
                .setDebugImages(true)
                .setDebugFileOutputDir(outputDir)
                .setDebugFilename("page_" + (page + 1))
                .build();
        debugExtractor.setSettings(debugSettings);
        BufferedImage bi;
        synchronized (this) {
            bi = renderer.renderImageWithDPI(page, settings.getPdfRenderingDpi(), ImageType.RGB);
        }
        debugExtractor.getTableBoundingRectangles(bufferedImage2GrayscaleMat(bi));
    }
}
 
Example 5
Source File: RenderPage.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/45831641/read-pdf-written-in-chinese-using-java">
 * read pdf written in chinese using java
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B6k7AYGPEth2djFMNVJ0dC1wLVU/view?usp=sharing">
 * sample1.pdf
 * </a>
 * <p>
 * Cannot reproduce the problem with the file at hand without concrete
 * code.
 * </p>
 */
@Test
public void testRenderSample1() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("sample1.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);

        PDFRenderer renderer = new PDFRenderer(document);

        for (int page = 0; page < document.getNumberOfPages(); page++)
        {
            BufferedImage image = renderer.renderImageWithDPI(page, 96);

            File result = new File(RESULT_FOLDER, String.format("sample1-%s.png", page));
            ImageIO.write(image, "PNG", result);
        }
    }
}
 
Example 6
Source File: PdfService.java    From cs-actions with Apache License 2.0 5 votes vote down vote up
private static void fileCreation(String destination, Integer dpi, PDFRenderer renderer, List<File> fileList,
                                 String fileName, Integer i) throws IOException {
    File fileTemp = new File(destination + fileName + UNDERSCORE +
            RandomStringUtils.randomAlphanumeric(15).toUpperCase() + PNG_EXTENSION); // jpg or png
    BufferedImage image = renderer.renderImageWithDPI(i, dpi);
    // if necessary, change 200 into another integer.
    ImageIO.write(image, PNG, fileTemp); // JPEG or PNG
    fileList.add(fileTemp);
}
 
Example 7
Source File: Utils.java    From tabula-java with MIT License 5 votes vote down vote up
public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException {
	try (PDDocument document = new PDDocument()) {
		document.addPage(page);
		PDFRenderer renderer = new PDFRenderer(document);
		document.close();
		return renderer.renderImageWithDPI(0, dpi, imageType);
	}
}
 
Example 8
Source File: PDF2ImageExample.java    From tutorials with MIT License 5 votes vote down vote up
private static void generateImageFromPDF(String filename, String extension) throws IOException {
	PDDocument document = PDDocument.load(new File(filename));
	PDFRenderer pdfRenderer = new PDFRenderer(document);
	for (int page = 0; page < document.getNumberOfPages(); ++page) {
		BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
		ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
	}
	document.close();
}
 
Example 9
Source File: DefaultDocumentTest.java    From geomajas-project-server with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void testToImage() throws Exception {
	testRender();
	PDDocument pdf = PDDocument.load(new File("target/test.pdf"), true);
	PDFRenderer renderer = new PDFRenderer(pdf);
	BufferedImage bufferedImage = renderer.renderImageWithDPI(0, 144);
	pdf.close();
	ImageIO.write(bufferedImage, "PNG", new File("target/test.png"));
}
 
Example 10
Source File: SinglePageDocument.java    From geomajas-project-server with GNU Affero General Public License v3.0 5 votes vote down vote up
private void writeDocument(OutputStream outputStream, Format format, int dpi) throws IOException,
		DocumentException, PrintingException {
	if (format == Format.PDF) {
		baos.writeTo(outputStream);
	} else {
		PDDocument pdf = PDDocument.load(new ByteArrayInputStream(baos.toByteArray()), true);
		PDFRenderer renderer = new PDFRenderer(pdf);
		BufferedImage bufferedImage = renderer.renderImageWithDPI(0, dpi);
		pdf.close();
		if (format == Format.PNG) {
			final String formatName = format.getExtension();
			for (Iterator<ImageWriter> iw = ImageIO.getImageWritersByFormatName(formatName); iw.hasNext();) {
				ImageWriter writer1 = iw.next();
				ImageWriteParam writeParam = writer1.getDefaultWriteParam();
				ImageTypeSpecifier typeSpecifier = ImageTypeSpecifier
						.createFromBufferedImageType(BufferedImage.TYPE_INT_ARGB);
				IIOMetadata metadata = writer1.getDefaultImageMetadata(typeSpecifier, writeParam);
				if (metadata.isReadOnly() || !metadata.isStandardMetadataFormatSupported()) {
					continue;
				}

				setDPI(metadata);
				// Write bufferedImage to outputStream
				final ImageOutputStream stream = ImageIO.createImageOutputStream(outputStream);
				try {
					writer1.setOutput(stream);
					writer1.write(metadata, new IIOImage(bufferedImage, null, metadata), writeParam);
				} finally {
					stream.flush();
					stream.close();
				}
				break;
			}
		} else {
			ImageIO.write(bufferedImage, format.getExtension(), outputStream);
		}

	}
}
 
Example 11
Source File: PdfComparator.java    From pdfcompare with Apache License 2.0 5 votes vote down vote up
public static ImageWithDimension renderPageAsImage(final PDDocument document, final PDFRenderer expectedPdfRenderer, final int pageIndex, Environment environment)
        throws IOException {
    final BufferedImage bufferedImage = expectedPdfRenderer.renderImageWithDPI(pageIndex, environment.getDPI());
    final PDPage page = document.getPage(pageIndex);
    final PDRectangle mediaBox = page.getMediaBox();
    if (page.getRotation() == 90 || page.getRotation() == 270)
        return new ImageWithDimension(bufferedImage, mediaBox.getHeight(), mediaBox.getWidth());
    else
        return new ImageWithDimension(bufferedImage, mediaBox.getWidth(), mediaBox.getHeight());
}
 
Example 12
Source File: PdfTableReader.java    From pdf-table with MIT License 5 votes vote down vote up
/**
 * Parses range of PDF pages and returns list of lists of rows containing cell texts.
 *
 * @param document  PDF document instance
 * @param startPage first page in range to parse (first page == 1)
 * @param endPage   last page in range
 * @return List of pages
 * @throws IOException
 */
public List<ParsedTablePage> parsePdfTablePages(PDDocument document, int startPage, int endPage) throws IOException {
    List<ParsedTablePage> out = new ArrayList<>();
    PDFRenderer renderer = new PDFRenderer(document);
    for (int page = startPage - 1; page < endPage; ++page) {
        BufferedImage bi;
        synchronized (this) {
            bi = renderer.renderImageWithDPI(page, settings.getPdfRenderingDpi(), ImageType.RGB);
        }
        ParsedTablePage parsedTablePage = parsePdfTablePage(bi, document.getPage(page), page + 1);
        out.add(parsedTablePage);
    }
    return out;
}
 
Example 13
Source File: PdfTableReader.java    From pdf-table with MIT License 5 votes vote down vote up
/**
 * Renders PDF page with DPI specified in settings and saves it in specified directory.
 *
 * @param renderer  PDF renderer instance
 * @param page      page number
 * @param outputDir output directory
 * @throws IOException
 */
private void savePdfPageAsPNG(PDFRenderer renderer, int page, Path outputDir) throws IOException {
    BufferedImage bim;
    synchronized (this) {
        bim = renderer.renderImageWithDPI(page, settings.getPdfRenderingDpi(), ImageType.RGB);
    }
    Path outPath = outputDir.resolve(Paths.get("page_" + (page + 1) + ".png"));
    ImageIOUtil.writeImage(bim, outPath.toString(), settings.getPdfRenderingDpi());

}
 
Example 14
Source File: PdfTools.java    From MyBox with Apache License 2.0 5 votes vote down vote up
public static BufferedImage page2image(File file, String password, int page,
        int dpi, ImageType imageType) {
    try {
        try ( PDDocument doc = PDDocument.load(file, password, AppVariables.pdfMemUsage)) {
            PDFRenderer renderer = new PDFRenderer(doc);
            BufferedImage image = renderer.renderImageWithDPI(page, dpi, imageType);
            doc.close();
            return image;
        }
    } catch (Exception e) {
        logger.debug(e.toString());
        return null;
    }
}
 
Example 15
Source File: RenderType3Character.java    From testarea-pdfbox2 with Apache License 2.0 4 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/42032729/render-type3-font-character-as-image-using-pdfbox">
 * Render Type3 font character as image using PDFBox
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B0f6X4SAMh2KRDJTbm4tb3E1a1U/view">
 * 4700198773.pdf
 * </a>
 * from
 * <a href="http://stackoverflow.com/questions/37754112/extract-text-with-custom-font-result-non-readble">
 * extract text with custom font result non readble
 * </a>
 * <p>
 * This test shows how one can render individual Type 3 font glyphs as bitmaps.
 * Unfortunately PDFBox out-of-the-box does not provide a class to render contents
 * of arbitrary XObjects, merely for rendering pages; thus, we simply create a page
 * with the glyph in question and render that page.   
 * </p>
 * <p>
 * As the OP did not provide a sample PDF, we simply use one from another
 * stackoverflow question. There obviously might remain issues with the
 * OP's files.
 * </p>
 */
@Test
public void testRenderSdnList() throws IOException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException
{
    Method PDPageContentStreamWrite = PDPageContentStream.class.getSuperclass().getDeclaredMethod("write", String.class);
    PDPageContentStreamWrite.setAccessible(true);

    try (   InputStream resource = getClass().getResourceAsStream("sdnlist.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);

        PDPage page = document.getPage(1);
        PDResources pageResources = page.getResources();
        COSName f1Name = COSName.getPDFName("R144");
        PDType3Font fontF1 = (PDType3Font) pageResources.getFont(f1Name);
        Map<String, Integer> f1NameToCode = fontF1.getEncoding().getNameToCodeMap();

        COSDictionary charProcsDictionary = fontF1.getCharProcs();
        for (COSName key : charProcsDictionary.keySet())
        {
            COSStream stream = (COSStream) charProcsDictionary.getDictionaryObject(key);
            PDType3CharProc charProc = new PDType3CharProc(fontF1, stream);
            PDRectangle bbox = charProc.getGlyphBBox();
            if (bbox == null)
                bbox = charProc.getBBox();
            Integer code = f1NameToCode.get(key.getName());

            if (code != null)
            {
                PDDocument charDocument = new PDDocument();
                PDPage charPage = new PDPage(bbox);
                charDocument.addPage(charPage);
                charPage.setResources(pageResources);
                PDPageContentStream charContentStream = new PDPageContentStream(charDocument, charPage);
                charContentStream.beginText();
                charContentStream.setFont(fontF1, bbox.getHeight());
                //charContentStream.getOutputStream().write(String.format("<%2X> Tj\n", code).getBytes());
                PDPageContentStreamWrite.invoke(charContentStream, String.format("<%2X> Tj\n", code));
                charContentStream.endText();
                charContentStream.close();

                File result = new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.png", key.getName(), code));
                PDFRenderer renderer = new PDFRenderer(charDocument);
                BufferedImage image = renderer.renderImageWithDPI(0, 96);
                ImageIO.write(image, "PNG", result);
                charDocument.save(new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.pdf", key.getName(), code)));
                charDocument.close();
            }
        }
    }
}
 
Example 16
Source File: PAdESVisibleSignaturePositionTest.java    From dss with GNU Lesser General Public License v2.1 4 votes vote down vote up
private BufferedImage pdfToBufferedImage(InputStream inputStream) throws IOException {
	try (PDDocument document = PDDocument.load(inputStream)) {
		PDFRenderer renderer = new PDFRenderer(document);
		return renderer.renderImageWithDPI(0, DPI);
	}
}
 
Example 17
Source File: PdfBoxUtilities.java    From tess4j with Apache License 2.0 4 votes vote down vote up
/**
 * Converts PDF to PNG format.
 *
 * @param inputPdfFile input file
 * @return an array of PNG images
 * @throws java.io.IOException
 */
public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
    Path path = Files.createTempDirectory("tessimages");
    File imageDir = path.toFile();

    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);

            // suffix in filename will be used as the file format
            String filename = String.format("workingimage%04d.png", page + 1);
            ImageIOUtil.writeImage(bim, new File(imageDir, filename).getAbsolutePath(), 300);
        }
    } catch (IOException ioe) {
        logger.error("Error extracting PDF Document => " + ioe);
    } finally {
        if (imageDir.list().length == 0) {
            imageDir.delete();
        }

        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }

    // find working files
    File[] workingFiles = imageDir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
        }
    });

    Arrays.sort(workingFiles, new Comparator<File>() {
        @Override
        public int compare(File f1, File f2) {
            return f1.getName().compareTo(f2.getName());
        }
    });

    return workingFiles;
}
 
Example 18
Source File: ConvertToImages.java    From blog-codes with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws InvalidPasswordException, IOException {
	System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider"); 

	String path="/home/lili/下载/books/汪曾祺全集1.pdf";
	String dir="/home/lili/data/wang/book1";
	new File(dir).mkdirs();
	PDDocument document = PDDocument.load(new File(path));
	PDFRenderer pdfRenderer = new PDFRenderer(document);
	for (int page = 0; page < document.getNumberOfPages(); ++page) {
		BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
		ImageIOUtil.writeImage(bim, dir + "/" + (page + 1) + ".png", 300);
	}
	document.close();
}
 
Example 19
Source File: RenderPage.java    From testarea-pdfbox2 with Apache License 2.0 3 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/37724865/pdfbox-2-0-1-hangs-rendering-pdf-page">
 * PDFBox 2.0.1 hangs rendering pdf page
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B5zMlyl8rHwsY3Y1WjFVZlllajA/view?usp=sharing">
 * 2E5D18CD314DC6B7E236C8546A2918.pdf
 * </a>
 * <p>
 * The issue can be reproduced in a Java 8 VM. As Tilman already mentioned in his answer,
 * it is an issue introduced by Java 8 using a different the color management system than
 * the former Java versions.
 * </p>
 * <p>
 * Analyzing the VM behavior with the new color management system it becomes clear that
 * the issue is not really a memory leak issue (as could be conjectured due to the excessive
 * memory use); instead objects are instantiated faster than garbage collection can collect
 * and free unused objects!
 * </p>
 * <p>
 * One can allow garbage collection to fetch up by changing the main loop of page content
 * parsing in PDFStreamEngine.processStreamOperators(PDContentStream):
 * </p>
 * <pre>
 * int i = 1;                         // new
 * while (token != null)
 * {
 *     if (token instanceof COSObject)
 *     {
 *         arguments.add(((COSObject) token).getObject());
 *     }
 *     else if (token instanceof Operator)
 *     {
 *         processOperator((Operator) token, arguments);
 *         arguments = new ArrayList<COSBase>();
 *     }
 *     else
 *     {
 *         arguments.add((COSBase) token);
 *     }
 *     token = parser.parseNextToken();
 *     if (i++ % 1000 == 0)           // new
 *         Runtime.getRuntime().gc(); // new
 * }
 * </pre>
 */
@Test
public void testRender2E5D18CD314DC6B7E236C8546A2918() throws IOException
{
    File result = new File(RESULT_FOLDER, "2E5D18CD314DC6B7E236C8546A2918.png");
    try (   InputStream resource = getClass().getResourceAsStream("2E5D18CD314DC6B7E236C8546A2918.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);

        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImageWithDPI(0, 96); //Gets stuck here
        ImageIO.write(image, "PNG", result);
    }
}
 
Example 20
Source File: CompatibilityHelper.java    From pdfbox-layout with MIT License 2 votes vote down vote up
/**
    * Renders the given page as an RGB image.
    * @param document the document containing the page.
    * @param pageIndex the index of the page to render.
    * @param resolution the image resolution.
    * @return the rendered image
    * @throws IOException by pdfbox
    */
   public static BufferedImage createImageFromPage(final PDDocument document, int pageIndex, final int resolution) throws IOException {
PDFRenderer pdfRenderer = new PDFRenderer(document);
return pdfRenderer.renderImageWithDPI(pageIndex, resolution, ImageType.RGB);
   }