org.apache.pdfbox.rendering.PDFRenderer#renderImageWithDPI

Source File: PdfBoxConverter.java From workable-converter with GNU General Public License v3.0

8 votes

@Override
public boolean byFileFolder(ConvertRequire require) throws ConvertFailedException {
    logger.info("pdfbox convert by file folder begin(src file must be a pdf file) :");
    try {
        File srcFile = new File(require.getWaitingFilePath());
        PDDocument document = PDDocument.load(srcFile);
        PDFRenderer renderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); page++) {
            BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB);
            String savename = require.getDestConvertedPath() + "/" + SuffixTool.deleteSuffix(srcFile.getName()) + "_" + page + ".png";
            ImageIOUtil.writeImage(image, savename, 300);
        }
        document.close();
    } catch (Exception e) {
        throw new ConvertFailedException(e.getMessage());
    }
    logger.info("pdf box convert by filepath success");
    return true;
}

Source File: PdfBoxConverter.java From workable-converter with GNU General Public License v3.0

6 votes

/**
 * convert a pdf base64 file to png pics
 */
@Override
public boolean byBase64 (ConvertRequire require) throws ConvertFailedException {
    logger.info("pdfbox convert by base64 begin(src file must be a pdf file):");
    try {
        String srcFileTmpName = StrRandomTool.getUuid(true) + ".pdf";
        Base64FileTool.saveBase64File(require.getSrcBase64(), params.getTmpPath() + "/" + srcFileTmpName);
        File srcFile = new File(params.getTmpPath() + "/" + srcFileTmpName);
        PDDocument document = PDDocument.load(srcFile);
        PDFRenderer renderer = new PDFRenderer(document);
        List<String> results = new LinkedList<>();
        for (int page = 0; page < document.getNumberOfPages(); page++) {
            BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB);
            String savePath = params.getTmpPath() + "/" + SuffixTool.deleteSuffix(srcFile.getName()) + "_" + page + ".png";
            ImageIOUtil.writeImage(image, savePath, 300);
            results.add(Base64FileTool.filePathToBase64(savePath));
        }
        require.setDestBase64s(results);
        document.close();
    } catch (Exception e) {
        throw new ConvertFailedException(e.getMessage());
    }
    logger.info("pdf box convert by base64 success");
    return true;
}

Source File: ConvertTest.java From blog-codes with Apache License 2.0

6 votes

public static void main(String[] args) throws InvalidPasswordException, IOException {
		PDDocument document = PDDocument.load(new File("/home/lili/data/testen.pdf"));
		PDFRenderer pdfRenderer = new PDFRenderer(document);
		for (int page = 0; page < document.getNumberOfPages(); ++page) {
			if(page>0 && page %100==0) {
				System.out.println("page: "+page);
			}
//			float w=document.getPage(page).getMediaBox().getWidth();
//			float h=document.getPage(page).getMediaBox().getHeight();
//			System.out.println(String.format("w: %f, h: %f",w, h));
			BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
			//BufferedImage bim = pdfRenderer.renderImage(page, 2.0f);
			
			// suffix in filename will be used as the file format
			ImageIOUtil.writeImage(bim, "/home/lili/data/testen-" + (page + 1) + ".png", 300);
		}
	}

Source File: PdfTableReader.java From pdf-table with MIT License

6 votes

/**
 * Saves debug images of PDF pages from specified range and saves them in specified directory.
 *
 * @param document  PDF document instance
 * @param startPage first page in range to process (first page == 1)
 * @param endPage   last page in range
 * @param outputDir destination directory
 * @throws IOException
 */
public void savePdfTablePagesDebugImages(PDDocument document, int startPage, int endPage, Path outputDir) throws IOException {
    TableExtractor debugExtractor = new TableExtractor(settings);
    PDFRenderer renderer = new PDFRenderer(document);
    for (int page = startPage - 1; page < endPage; ++page) {
        PdfTableSettings debugSettings = PdfTableSettings.getBuilder()
                .setDebugImages(true)
                .setDebugFileOutputDir(outputDir)
                .setDebugFilename("page_" + (page + 1))
                .build();
        debugExtractor.setSettings(debugSettings);
        BufferedImage bi;
        synchronized (this) {
            bi = renderer.renderImageWithDPI(page, settings.getPdfRenderingDpi(), ImageType.RGB);
        }
        debugExtractor.getTableBoundingRectangles(bufferedImage2GrayscaleMat(bi));
    }
}

Source File: RenderPage.java From testarea-pdfbox2 with Apache License 2.0

6 votes

/**
 * <a href="https://stackoverflow.com/questions/45831641/read-pdf-written-in-chinese-using-java">
 * read pdf written in chinese using java
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B6k7AYGPEth2djFMNVJ0dC1wLVU/view?usp=sharing">
 * sample1.pdf
 * </a>
 * <p>
 * Cannot reproduce the problem with the file at hand without concrete
 * code.
 * </p>
 */
@Test
public void testRenderSample1() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("sample1.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);

        PDFRenderer renderer = new PDFRenderer(document);

        for (int page = 0; page < document.getNumberOfPages(); page++)
        {
            BufferedImage image = renderer.renderImageWithDPI(page, 96);

            File result = new File(RESULT_FOLDER, String.format("sample1-%s.png", page));
            ImageIO.write(image, "PNG", result);
        }
    }
}

Source File: PdfService.java From cs-actions with Apache License 2.0

5 votes

private static void fileCreation(String destination, Integer dpi, PDFRenderer renderer, List<File> fileList,
                                 String fileName, Integer i) throws IOException {
    File fileTemp = new File(destination + fileName + UNDERSCORE +
            RandomStringUtils.randomAlphanumeric(15).toUpperCase() + PNG_EXTENSION); // jpg or png
    BufferedImage image = renderer.renderImageWithDPI(i, dpi);
    // if necessary, change 200 into another integer.
    ImageIO.write(image, PNG, fileTemp); // JPEG or PNG
    fileList.add(fileTemp);
}

Source File: Utils.java From tabula-java with MIT License

5 votes

public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException {
	try (PDDocument document = new PDDocument()) {
		document.addPage(page);
		PDFRenderer renderer = new PDFRenderer(document);
		document.close();
		return renderer.renderImageWithDPI(0, dpi, imageType);
	}
}

Source File: PDF2ImageExample.java From tutorials with MIT License

5 votes

private static void generateImageFromPDF(String filename, String extension) throws IOException {
	PDDocument document = PDDocument.load(new File(filename));
	PDFRenderer pdfRenderer = new PDFRenderer(document);
	for (int page = 0; page < document.getNumberOfPages(); ++page) {
		BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
		ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
	}
	document.close();
}

Source File: DefaultDocumentTest.java From geomajas-project-server with GNU Affero General Public License v3.0

5 votes

@Test
public void testToImage() throws Exception {
	testRender();
	PDDocument pdf = PDDocument.load(new File("target/test.pdf"), true);
	PDFRenderer renderer = new PDFRenderer(pdf);
	BufferedImage bufferedImage = renderer.renderImageWithDPI(0, 144);
	pdf.close();
	ImageIO.write(bufferedImage, "PNG", new File("target/test.png"));
}

Source File: SinglePageDocument.java From geomajas-project-server with GNU Affero General Public License v3.0

5 votes

private void writeDocument(OutputStream outputStream, Format format, int dpi) throws IOException,
		DocumentException, PrintingException {
	if (format == Format.PDF) {
		baos.writeTo(outputStream);
	} else {
		PDDocument pdf = PDDocument.load(new ByteArrayInputStream(baos.toByteArray()), true);
		PDFRenderer renderer = new PDFRenderer(pdf);
		BufferedImage bufferedImage = renderer.renderImageWithDPI(0, dpi);
		pdf.close();
		if (format == Format.PNG) {
			final String formatName = format.getExtension();
			for (Iterator<ImageWriter> iw = ImageIO.getImageWritersByFormatName(formatName); iw.hasNext();) {
				ImageWriter writer1 = iw.next();
				ImageWriteParam writeParam = writer1.getDefaultWriteParam();
				ImageTypeSpecifier typeSpecifier = ImageTypeSpecifier
						.createFromBufferedImageType(BufferedImage.TYPE_INT_ARGB);
				IIOMetadata metadata = writer1.getDefaultImageMetadata(typeSpecifier, writeParam);
				if (metadata.isReadOnly() || !metadata.isStandardMetadataFormatSupported()) {
					continue;
				}

				setDPI(metadata);
				// Write bufferedImage to outputStream
				final ImageOutputStream stream = ImageIO.createImageOutputStream(outputStream);
				try {
					writer1.setOutput(stream);
					writer1.write(metadata, new IIOImage(bufferedImage, null, metadata), writeParam);
				} finally {
					stream.flush();
					stream.close();
				}
				break;
			}
		} else {
			ImageIO.write(bufferedImage, format.getExtension(), outputStream);
		}

	}
}

Source File: PdfComparator.java From pdfcompare with Apache License 2.0

5 votes

public static ImageWithDimension renderPageAsImage(final PDDocument document, final PDFRenderer expectedPdfRenderer, final int pageIndex, Environment environment)
        throws IOException {
    final BufferedImage bufferedImage = expectedPdfRenderer.renderImageWithDPI(pageIndex, environment.getDPI());
    final PDPage page = document.getPage(pageIndex);
    final PDRectangle mediaBox = page.getMediaBox();
    if (page.getRotation() == 90 || page.getRotation() == 270)
        return new ImageWithDimension(bufferedImage, mediaBox.getHeight(), mediaBox.getWidth());
    else
        return new ImageWithDimension(bufferedImage, mediaBox.getWidth(), mediaBox.getHeight());
}

Source File: PdfTableReader.java From pdf-table with MIT License

5 votes

/**
 * Parses range of PDF pages and returns list of lists of rows containing cell texts.
 *
 * @param document  PDF document instance
 * @param startPage first page in range to parse (first page == 1)
 * @param endPage   last page in range
 * @return List of pages
 * @throws IOException
 */
public List<ParsedTablePage> parsePdfTablePages(PDDocument document, int startPage, int endPage) throws IOException {
    List<ParsedTablePage> out = new ArrayList<>();
    PDFRenderer renderer = new PDFRenderer(document);
    for (int page = startPage - 1; page < endPage; ++page) {
        BufferedImage bi;
        synchronized (this) {
            bi = renderer.renderImageWithDPI(page, settings.getPdfRenderingDpi(), ImageType.RGB);
        }
        ParsedTablePage parsedTablePage = parsePdfTablePage(bi, document.getPage(page), page + 1);
        out.add(parsedTablePage);
    }
    return out;
}

Source File: PdfTableReader.java From pdf-table with MIT License

5 votes

/**
 * Renders PDF page with DPI specified in settings and saves it in specified directory.
 *
 * @param renderer  PDF renderer instance
 * @param page      page number
 * @param outputDir output directory
 * @throws IOException
 */
private void savePdfPageAsPNG(PDFRenderer renderer, int page, Path outputDir) throws IOException {
    BufferedImage bim;
    synchronized (this) {
        bim = renderer.renderImageWithDPI(page, settings.getPdfRenderingDpi(), ImageType.RGB);
    }
    Path outPath = outputDir.resolve(Paths.get("page_" + (page + 1) + ".png"));
    ImageIOUtil.writeImage(bim, outPath.toString(), settings.getPdfRenderingDpi());

}

Source File: PdfTools.java From MyBox with Apache License 2.0

5 votes

public static BufferedImage page2image(File file, String password, int page,
        int dpi, ImageType imageType) {
    try {
        try ( PDDocument doc = PDDocument.load(file, password, AppVariables.pdfMemUsage)) {
            PDFRenderer renderer = new PDFRenderer(doc);
            BufferedImage image = renderer.renderImageWithDPI(page, dpi, imageType);
            doc.close();
            return image;
        }
    } catch (Exception e) {
        logger.debug(e.toString());
        return null;
    }
}

Source File: RenderType3Character.java From testarea-pdfbox2 with Apache License 2.0

4 votes

/**
 * <a href="http://stackoverflow.com/questions/42032729/render-type3-font-character-as-image-using-pdfbox">
 * Render Type3 font character as image using PDFBox
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B0f6X4SAMh2KRDJTbm4tb3E1a1U/view">
 * 4700198773.pdf
 * </a>
 * from
 * <a href="http://stackoverflow.com/questions/37754112/extract-text-with-custom-font-result-non-readble">
 * extract text with custom font result non readble
 * </a>
 * <p>
 * This test shows how one can render individual Type 3 font glyphs as bitmaps.
 * Unfortunately PDFBox out-of-the-box does not provide a class to render contents
 * of arbitrary XObjects, merely for rendering pages; thus, we simply create a page
 * with the glyph in question and render that page.   
 * </p>
 * <p>
 * As the OP did not provide a sample PDF, we simply use one from another
 * stackoverflow question. There obviously might remain issues with the
 * OP's files.
 * </p>
 */
@Test
public void testRenderSdnList() throws IOException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException
{
    Method PDPageContentStreamWrite = PDPageContentStream.class.getSuperclass().getDeclaredMethod("write", String.class);
    PDPageContentStreamWrite.setAccessible(true);

    try (   InputStream resource = getClass().getResourceAsStream("sdnlist.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);

        PDPage page = document.getPage(1);
        PDResources pageResources = page.getResources();
        COSName f1Name = COSName.getPDFName("R144");
        PDType3Font fontF1 = (PDType3Font) pageResources.getFont(f1Name);
        Map<String, Integer> f1NameToCode = fontF1.getEncoding().getNameToCodeMap();

        COSDictionary charProcsDictionary = fontF1.getCharProcs();
        for (COSName key : charProcsDictionary.keySet())
        {
            COSStream stream = (COSStream) charProcsDictionary.getDictionaryObject(key);
            PDType3CharProc charProc = new PDType3CharProc(fontF1, stream);
            PDRectangle bbox = charProc.getGlyphBBox();
            if (bbox == null)
                bbox = charProc.getBBox();
            Integer code = f1NameToCode.get(key.getName());

            if (code != null)
            {
                PDDocument charDocument = new PDDocument();
                PDPage charPage = new PDPage(bbox);
                charDocument.addPage(charPage);
                charPage.setResources(pageResources);
                PDPageContentStream charContentStream = new PDPageContentStream(charDocument, charPage);
                charContentStream.beginText();
                charContentStream.setFont(fontF1, bbox.getHeight());
                //charContentStream.getOutputStream().write(String.format("<%2X> Tj\n", code).getBytes());
                PDPageContentStreamWrite.invoke(charContentStream, String.format("<%2X> Tj\n", code));
                charContentStream.endText();
                charContentStream.close();

                File result = new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.png", key.getName(), code));
                PDFRenderer renderer = new PDFRenderer(charDocument);
                BufferedImage image = renderer.renderImageWithDPI(0, 96);
                ImageIO.write(image, "PNG", result);
                charDocument.save(new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.pdf", key.getName(), code)));
                charDocument.close();
            }
        }
    }
}

Source File: PAdESVisibleSignaturePositionTest.java From dss with GNU Lesser General Public License v2.1

4 votes

private BufferedImage pdfToBufferedImage(InputStream inputStream) throws IOException {
	try (PDDocument document = PDDocument.load(inputStream)) {
		PDFRenderer renderer = new PDFRenderer(document);
		return renderer.renderImageWithDPI(0, DPI);
	}
}

Source File: PdfBoxUtilities.java From tess4j with Apache License 2.0

4 votes

/**
 * Converts PDF to PNG format.
 *
 * @param inputPdfFile input file
 * @return an array of PNG images
 * @throws java.io.IOException
 */
public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
    Path path = Files.createTempDirectory("tessimages");
    File imageDir = path.toFile();

    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);

            // suffix in filename will be used as the file format
            String filename = String.format("workingimage%04d.png", page + 1);
            ImageIOUtil.writeImage(bim, new File(imageDir, filename).getAbsolutePath(), 300);
        }
    } catch (IOException ioe) {
        logger.error("Error extracting PDF Document => " + ioe);
    } finally {
        if (imageDir.list().length == 0) {
            imageDir.delete();
        }

        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }

    // find working files
    File[] workingFiles = imageDir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
        }
    });

    Arrays.sort(workingFiles, new Comparator<File>() {
        @Override
        public int compare(File f1, File f2) {
            return f1.getName().compareTo(f2.getName());
        }
    });

    return workingFiles;
}

Source File: ConvertToImages.java From blog-codes with Apache License 2.0

4 votes

public static void main(String[] args) throws InvalidPasswordException, IOException {
	System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider"); 

	String path="/home/lili/下载/books/汪曾祺全集1.pdf";
	String dir="/home/lili/data/wang/book1";
	new File(dir).mkdirs();
	PDDocument document = PDDocument.load(new File(path));
	PDFRenderer pdfRenderer = new PDFRenderer(document);
	for (int page = 0; page < document.getNumberOfPages(); ++page) {
		BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
		ImageIOUtil.writeImage(bim, dir + "/" + (page + 1) + ".png", 300);
	}
	document.close();
}

Source File: RenderPage.java From testarea-pdfbox2 with Apache License 2.0

3 votes

/**
 * <a href="http://stackoverflow.com/questions/37724865/pdfbox-2-0-1-hangs-rendering-pdf-page">
 * PDFBox 2.0.1 hangs rendering pdf page
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B5zMlyl8rHwsY3Y1WjFVZlllajA/view?usp=sharing">
 * 2E5D18CD314DC6B7E236C8546A2918.pdf
 * </a>
 * <p>
 * The issue can be reproduced in a Java 8 VM. As Tilman already mentioned in his answer,
 * it is an issue introduced by Java 8 using a different the color management system than
 * the former Java versions.
 * </p>
 * <p>
 * Analyzing the VM behavior with the new color management system it becomes clear that
 * the issue is not really a memory leak issue (as could be conjectured due to the excessive
 * memory use); instead objects are instantiated faster than garbage collection can collect
 * and free unused objects!
 * </p>
 * <p>
 * One can allow garbage collection to fetch up by changing the main loop of page content
 * parsing in PDFStreamEngine.processStreamOperators(PDContentStream):
 * </p>
 * <pre>
 * int i = 1;                         // new
 * while (token != null)
 * {
 *     if (token instanceof COSObject)
 *     {
 *         arguments.add(((COSObject) token).getObject());
 *     }
 *     else if (token instanceof Operator)
 *     {
 *         processOperator((Operator) token, arguments);
 *         arguments = new ArrayList<COSBase>();
 *     }
 *     else
 *     {
 *         arguments.add((COSBase) token);
 *     }
 *     token = parser.parseNextToken();
 *     if (i++ % 1000 == 0)           // new
 *         Runtime.getRuntime().gc(); // new
 * }
 * </pre>
 */
@Test
public void testRender2E5D18CD314DC6B7E236C8546A2918() throws IOException
{
    File result = new File(RESULT_FOLDER, "2E5D18CD314DC6B7E236C8546A2918.png");
    try (   InputStream resource = getClass().getResourceAsStream("2E5D18CD314DC6B7E236C8546A2918.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);

        PDFRenderer renderer = new PDFRenderer(document);
        BufferedImage image = renderer.renderImageWithDPI(0, 96); //Gets stuck here
        ImageIO.write(image, "PNG", result);
    }
}

Source File: CompatibilityHelper.java From pdfbox-layout with MIT License

2 votes

/**
    * Renders the given page as an RGB image.
    * @param document the document containing the page.
    * @param pageIndex the index of the page to render.
    * @param resolution the image resolution.
    * @return the rendered image
    * @throws IOException by pdfbox
    */
   public static BufferedImage createImageFromPage(final PDDocument document, int pageIndex, final int resolution) throws IOException {
PDFRenderer pdfRenderer = new PDFRenderer(document);
return pdfRenderer.renderImageWithDPI(pageIndex, resolution, ImageType.RGB);
   }

Java Code Examples for org.apache.pdfbox.rendering.PDFRenderer#renderImageWithDPI()