org.apache.pdfbox.pdmodel.PDDocument#close

Source File: TitleBlockWriterTest.java From eplmp with Eclipse Public License 1.0

6 votes

@Test
public void createTitleBlockForPartIterationTest() throws Exception {
    PartTitleBlockData partTitleBlockData = new PartTitleBlockData(partIteration, new Locale("en"));
    byte[] titleBlock = new TitleBlockWriter(partTitleBlockData).createTitleBlock();
    PDDocument loadedDocument = PDDocument.load(titleBlock);

    Assert.assertNotNull(loadedDocument);
    String text = new PDFTextStripper().getText(loadedDocument);

    loadedDocument.close();

    Assert.assertFalse(text.isEmpty());
    Assert.assertTrue(text.contains(user.getLogin()));
    Assert.assertTrue(text.contains(partIteration.getNumber()));
    Assert.assertTrue(text.contains(partIteration.getPartRevision().getDescription()));

}

Source File: ColorsProcessor.java From asciidoctorj with Apache License 2.0

6 votes

/**
 * Parses a document extracting the colors for the specified words in
 * the constructor
 *
 * @param filename PDF document path
 */
public void parse (String filename) throws IOException {
    PDDocument document = null;
    try {
        document = PDDocument.load(filename, false);
        List allPages = document.getDocumentCatalog().getAllPages();
        for( int i=0; i<allPages.size(); i++ ) {
            PDPage page = (PDPage)allPages.get( i );
            PDStream contents = page.getContents();
            if (contents != null) {
                processStream( page, page.getResources(),
                    page.getContents().getStream() );
            }
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

Source File: ImageProcessor.java From asciidoctorj-pdf with Apache License 2.0

6 votes

/**
 * Parses a document extracting the images
 *
 * @param filename PDF document path
 */
public void parse(String filename) throws IOException {
    PDDocument document = null;
    try {
        document = PDDocument.load(filename, false);
        List allPages = document.getDocumentCatalog().getAllPages();
        for( int i=0; i<allPages.size(); i++ ) {
            PDPage page = (PDPage)allPages.get( i );
            currentPage = i;
            processStream( page, page.findResources(), page.getContents().getStream() );
        }
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

Source File: PdfBoxUtilities.java From tess4j with Apache License 2.0

6 votes

/**
 * Gets PDF Page Count.
 *
 * @param inputPdfFile input file
 * @return number of pages
 */
public static int getPdfPageCount(File inputPdfFile) {
    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        return document.getNumberOfPages();
    } catch (IOException ioe) {
        logger.error("Error counting PDF pages => " + ioe);
        return - 1;
    } finally {
        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }
}

Source File: FlattenAndMerge.java From testarea-pdfbox2 with Apache License 2.0

6 votes

/**
 * <a href="https://stackoverflow.com/questions/47140209/pdfbox-files-are-sharing-common-cosstream-after-flatten">
 * PDFBox files are sharing common COSStream after flatten
 * </a>
 * <br/>
 * <a href="https://studentloans.gov/myDirectLoan/downloadForm.action?searchType=library&shortName=general&localeCode=en-us">
 * GeneralForbearance.pdf
 * </a>
 * <p>
 * Indeed, flattening, merging, and early closing of source documents
 * do not mingle well.
 * </p>
 */
@Test
public void testMergeGovernmentForms() throws IOException {
    try (   InputStream resource1 = getClass().getResourceAsStream("GeneralForbearance.pdf");
            InputStream resource2 = getClass().getResourceAsStream("GeneralForbearance.pdf")) {
        PDDocument destination = Loader.loadPDF(resource1);

        PDDocument source = Loader.loadPDF(resource2);
        source.getDocumentCatalog().getAcroForm().flatten(); //comment out just this line and the destination.save will pass

        PDFMergerUtility appender = new PDFMergerUtility();

        appender.appendDocument(destination, source);

        source.close(); //comment out just this line and the destination.save will pass

        destination.save(new File(RESULT_FOLDER, "PrintMergeIssue.pdf"));
        destination.close();
    }
}

Source File: ImageExtractor.java From inception with Apache License 2.0

6 votes

static void processFile(File inFile, int dpi, String outDir) throws IOException
{
    PDDocument doc = PDDocument.load(inFile);
    String baseName = inFile.getName().substring(0, inFile.getName().lastIndexOf("."));
    try {
        RegionExtractor regionExt = new RegionExtractor(doc, dpi);
        int count = 1;
        for (int pageIndex = 0; pageIndex < doc.getNumberOfPages(); pageIndex++) {
            for (ImageOperator op : ImageExtractor.extract(doc.getPage(pageIndex))) {
                RenderedImage image = regionExt.extract(pageIndex, op.x, op.y, op.w, op.h);
                String outFileName = baseName + "_" + String.valueOf(count) + ".png";
                ImageIO.write(image, "png", new File(outDir, outFileName));
                System.out.println(outFileName + " is saved.");
                count++;
            }
        }
    }
    finally {
        doc.close();
    }
}

Source File: FillInForm.java From testarea-pdfbox2 with Apache License 2.0

6 votes

/**
 * <a href="https://stackoverflow.com/questions/52059931/pdfbox-setvalue-for-multiple-pdtextfield">
 * PDFBox setValue for multiple PDTextField
 * </a>
 * <br/>
 * <a href="https://ufile.io/z8jzj">
 * testform.pdf
 * </a>
 * <p>
 * Cannot reproduce the issue.
 * </p>
 */
@Test
public void testFillLikeJuvi() throws IOException {
    try (   InputStream originalStream = getClass().getResourceAsStream("testform.pdf") ) {
        PDDocument document = Loader.loadPDF(originalStream);
        PDDocumentCatalog docCatalog = document.getDocumentCatalog();
        PDAcroForm acroForm = docCatalog.getAcroForm();

        PDTextField field = (PDTextField) acroForm.getField("Check1");
        field.setValue("1111");

        PDTextField field2 = (PDTextField) acroForm.getField("Check2");
        field2.setValue("2222");

        PDTextField field3 = (PDTextField) acroForm.getField("HelloWorld");
        field3.setValue("HelloWorld");

        document.save(new File(RESULT_FOLDER, "testform-filled.pdf"));
        document.close();
    }
}

Source File: PdfUtils.java From job with MIT License

5 votes

public static String parsePdf2Text(InputStream input) throws Exception {
  PDDocument doc = PDDocument.load(input);
  ByteArrayOutputStream output = new ByteArrayOutputStream();
  OutputStreamWriter writer = new OutputStreamWriter(output);
  try {
    PDFTextStripper stripper = new PDFTextStripper();
    stripper.writeText(doc, writer);
  } finally {
    doc.close();
    input.close();
    output.close();
    writer.close();
  }
  return new String(output.toByteArray());
}

Source File: FillInForm.java From testarea-pdfbox2 with Apache License 2.0

5 votes

/**
 * <a href="http://stackoverflow.com/questions/39720305/ufffd-is-not-available-in-this-fonts-encoding-winansiencoding">
 * U+FFFD is not available in this font's encoding: WinAnsiEncoding
 * </a>
 * <p>
 * The issue cannot be reproduced.
 * </p>
 */
@Test
public void testFillLikeStDdt() throws IOException
{
    try (   InputStream originalStream = getClass().getResourceAsStream("FillFormField.pdf") )
    {
        PDDocument pdfDocument = Loader.loadPDF(originalStream);
        PDAcroForm acroForm = pdfDocument.getDocumentCatalog().getAcroForm();

        if (acroForm != null)
        {
            List<PDField> fields = acroForm.getFields();
            for (PDField field : fields) {
                switch (field.getPartialName()) {
                    case "Title" /*"devices"*/:
                        field.setValue("Ger�t");
                        field.setReadOnly(true);
                        break;
                }
            }
            acroForm.flatten(fields, true);
        }

        pdfDocument.save(new File(RESULT_FOLDER, "FillFormFieldStDdt.pdf"));
        pdfDocument.close();
    }
}

Source File: PdfTools.java From MyBox with Apache License 2.0

5 votes

public static boolean createPdfFile(File file, String author) {
    try {
        PDDocument targetDoc = createPDF(file, author);
        if (targetDoc != null) {
            targetDoc.close();
        }
        return true;
    } catch (Exception e) {
        logger.error(e.toString());
        return false;
    }
}

Source File: AddImage.java From testarea-pdfbox2 with Apache License 2.0

5 votes

/**
 * <a href="https://stackoverflow.com/questions/50988007/clip-an-image-with-pdfbox">
 * Clip an image with PDFBOX
 * </a>
 * <p>
 * This test demonstrates how to clip an image and frame the clipping area.
 * </p>
 */
@SuppressWarnings("deprecation")
@Test
public void testImageAddClipped() throws IOException {
    try (   InputStream imageResource = getClass().getResourceAsStream("Willi-1.jpg")   )
    {
        PDDocument doc = new PDDocument();
        PDImageXObject pdImage = PDImageXObject.createFromByteArray(doc, ByteStreams.toByteArray(imageResource), "Willi");

        int w = pdImage.getWidth();
        int h = pdImage.getHeight();

        PDPage page = new PDPage();
        doc.addPage(page);
        PDRectangle cropBox = page.getCropBox();
        PDPageContentStream contentStream = new PDPageContentStream(doc, page);

        contentStream.setStrokingColor(25, 200, 25);
        contentStream.setLineWidth(4);
        contentStream.moveTo(cropBox.getLowerLeftX(), cropBox.getLowerLeftY() + h/2);
        contentStream.lineTo(cropBox.getLowerLeftX() + w/3, cropBox.getLowerLeftY() + 2*h/3);
        contentStream.lineTo(cropBox.getLowerLeftX() + w, cropBox.getLowerLeftY() + h/2);
        contentStream.lineTo(cropBox.getLowerLeftX() + w/3, cropBox.getLowerLeftY() + h/3);
        contentStream.closePath();
        //contentStream.clip();
        contentStream.appendRawCommands("W ");
        contentStream.stroke();

        contentStream.drawImage(pdImage, cropBox.getLowerLeftX(), cropBox.getLowerLeftY(), w, h);

        contentStream.close();

        doc.save(new File(RESULT_FOLDER, "image-clipped.pdf"));
        doc.close();
    }
}

Source File: FillInForm.java From testarea-pdfbox2 with Apache License 2.0

5 votes

/**
 * <a href="https://stackoverflow.com/questions/56938135/pdfbox-inconsistent-pdtextfield-autosize-behavior-after-setvalue">
 * PDFBox Inconsistent PDTextField Autosize Behavior after setValue
 * </a>
 * <br/>
 * <a href="http://www.filedropper.com/0postfontload">
 * 0.pdf
 * </a>
 * <p>
 * By resetting the MultiLine flags, too, one eventually gets rid
 * of the problem of the lower part of the field value being cut
 * off in the Care Providers Address fields. This actually should
 * be considered an issue of PDFBox, though, not of the source PDF
 * here.
 * </p>
 * @see #testFill0LikeXenyal()
 * @see #testFill0DropOldAppearance()
 * @see #testFill0DropOldAppearanceNoCombNoMax()
 */
@Test
public void testFill0DropOldAppearanceNoCombNoMaxNoMultiLine() throws IOException {
    final int FLAG_MULTILINE = 1 << 12;
    final int FLAG_COMB = 1 << 24;

    try (   InputStream originalStream = getClass().getResourceAsStream("0.pdf");
            InputStream fontStream = getClass().getResourceAsStream("Lato-Regular.ttf"))
    {
        PDDocument doc = Loader.loadPDF(originalStream);
        PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();

        PDType0Font font = PDType0Font.load(doc, fontStream, false);
        String font_name = acroForm.getDefaultResources().add(font).getName();

        for (PDField field : acroForm.getFieldTree()) {
            if (field instanceof PDTextField) {
                PDTextField textField = (PDTextField) field;
                textField.getCOSObject().removeItem(COSName.MAX_LEN);
                textField.getCOSObject().setFlag(COSName.FF, FLAG_COMB | FLAG_MULTILINE, false);;
                textField.setDefaultAppearance(String.format("/%s 0 Tf 0 g", font_name));
                textField.getWidgets().forEach(w -> w.getAppearance().setNormalAppearance((PDAppearanceEntry)null));
                textField.setValue("Test");
            }
        }
        

        doc.save(new File(RESULT_FOLDER, "0-filledDropOldAppearanceNoCombNoMaxNoMultiLine.pdf"));
        doc.close();
    }        
}

Source File: TestPdfFontExtractor.java From FontVerter with GNU Lesser General Public License v3.0

5 votes

@Test
public void givenPdfWith2Fonts_extractFontsToDir_thenDirectoryHasThreeTtfFiles() throws IOException {
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf"));
    PdfFontExtractor extractor = new PdfFontExtractor();

    File extractDir = folder.getRoot();
    extractor.extractFontsToDir(doc, extractDir);
    File[] fontFiles = extractDir.listFiles();

    Assert.assertEquals(3, fontFiles.length);
    for (File fileOn : fontFiles)
        Assert.assertEquals("ttf", FilenameUtils.getExtension(fileOn.getPath()));

    doc.close();
}

Source File: AddImage.java From testarea-pdfbox2 with Apache License 2.0

5 votes

/**
 * <a href="https://stackoverflow.com/questions/49958604/draw-image-at-mid-position-using-pdfbox-java">
 * Draw image at mid position using pdfbox Java
 * </a>
 * <p>
 * This is a fixed version of the the OP's original code, cf.
 * {@link #testImageAppendLikeShanky()}. It does not mirrors the image.
 * </p>
 */
@Test
public void testImageAppendNoMirror() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("/mkl/testarea/pdfbox2/sign/test.pdf");
            InputStream imageResource = getClass().getResourceAsStream("Willi-1.jpg")   )
    {
        PDDocument doc = Loader.loadPDF(resource);
        PDImageXObject pdImage = PDImageXObject.createFromByteArray(doc, ByteStreams.toByteArray(imageResource), "Willi");

        int w = pdImage.getWidth();
        int h = pdImage.getHeight();

        PDPage page = doc.getPage(0);
        PDPageContentStream contentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true);

        float x_pos = page.getCropBox().getWidth();
        float y_pos = page.getCropBox().getHeight();

        float x_adjusted = ( x_pos - w ) / 2 + page.getCropBox().getLowerLeftX();
        float y_adjusted = ( y_pos - h ) / 2 + page.getCropBox().getLowerLeftY();

        contentStream.drawImage(pdImage, x_adjusted, y_adjusted, w, h);
        contentStream.close();

        doc.save(new File(RESULT_FOLDER, "test-with-image-no-mirror.pdf"));
        doc.close();

    }
}

Source File: PdfExtractionResource.java From quarkus-pdf-extract with Apache License 2.0

5 votes

static String getText(File pdfFile) throws IOException {
    PDDocument doc = null;
    try {
        doc = PDDocument.load(pdfFile);
        return new PDFLayoutTextStripper().getText(doc);
    }
    finally {
        if (doc != null) {
            doc.close();
        }
    }
}

Source File: ReadXfaForm.java From testarea-pdfbox2 with Apache License 2.0

4 votes

public static byte[] getParsableXFAForm(InputStream file)
{
    if (file == null)
        return null;
    PDDocument doc;
    PDDocumentCatalog catalog;
    PDAcroForm acroForm;

    PDXFAResource xfa;
    try
    {
        // String pass = null;
        doc = Loader.loadPDF(file);
        if (doc == null)
            return null;
        // flattenPDF(doc);
        doc.setAllSecurityToBeRemoved(true);
        // System.out.println("Security " + doc.isAllSecurityToBeRemoved());
        catalog = doc.getDocumentCatalog();
        if (catalog == null)
        {
            doc.close();
            return null;
        }
        acroForm = catalog.getAcroForm();
        if (acroForm == null)
        {
            doc.close();
            return null;
        }
        xfa = acroForm.getXFA();
        if (xfa == null)
        {
            doc.close();
            return null;
        }
        // TODO return byte[]
        byte[] xfaBytes = xfa.getBytes();
        doc.close();
        return xfaBytes;
    } catch (IOException e)
    {
        // handle IOException
        // happens when the file is corrupt.
        e.printStackTrace();
        System.out.println("XFAUtils-getParsableXFAForm-IOException");
        return null;
    }
}

Source File: RectanglesOverText.java From testarea-pdfbox2 with Apache License 2.0

4 votes

/**
 * <a href="https://stackoverflow.com/questions/46080131/text-coordinates-when-stripping-from-pdfbox">
 * Text coordinates when stripping from PDFBox
 * </a>
 * <br/>
 * <a href="https://download-a.akamaihd.net/files/media_mwb/b7/mwb_I_201711.pdf">
 * mwb_I_201711.pdf
 * </a>
 * <p>
 * This test applies the OP's code to his example PDF file and indeed, there is an offset!
 * This is due to the <code>LegacyPDFStreamEngine</code> method <code>showGlyph</code>
 * which manipulates the text rendering matrix to make the lower left corner of the
 * crop box the origin. In the current version of this test, that offset is corrected,
 * see below. 
 * </p>
 */
@Test
public void testCoverTextByRectanglesMwbI201711() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("mwb_I_201711.pdf")  ) {
        PDDocument doc = Loader.loadPDF(resource);

        myStripper stripper = new myStripper();

        stripper.setStartPage(1); // fix it to first page just to test it
        stripper.setEndPage(1);
        stripper.getText(doc);

        TextLine line = stripper.lines.get(1); // the line i want to paint on

        float minx = -1;
        float maxx = -1;

        for (TextPosition pos: line.textPositions)
        {
            if (pos == null)
                continue;

            if (minx == -1 || pos.getTextMatrix().getTranslateX() < minx) {
                minx = pos.getTextMatrix().getTranslateX();
            }
            if (maxx == -1 || pos.getTextMatrix().getTranslateX() > maxx) {
                maxx = pos.getTextMatrix().getTranslateX();
            }
        }

        TextPosition firstPosition = line.textPositions.get(0);
        TextPosition lastPosition = line.textPositions.get(line.textPositions.size() - 1);

        // corrected x and y
        PDRectangle cropBox = doc.getPage(0).getCropBox();

        float x = minx + cropBox.getLowerLeftX();
        float y = firstPosition.getTextMatrix().getTranslateY() + cropBox.getLowerLeftY();
        float w = (maxx - minx) + lastPosition.getWidth();
        float h = lastPosition.getHeightDir();

        PDPageContentStream contentStream = new PDPageContentStream(doc, doc.getPage(0), PDPageContentStream.AppendMode.APPEND, false, true);

        contentStream.setNonStrokingColor(Color.RED);
        contentStream.addRect(x, y, w, h);
        contentStream.fill();
        contentStream.close();

        File fileout = new File(RESULT_FOLDER, "mwb_I_201711-withRectangles.pdf");
        doc.save(fileout);
        doc.close();
    }
}

Source File: TestEmptySignatureField.java From testarea-pdfbox2 with Apache License 2.0

4 votes

/**
 * <a href="http://stackoverflow.com/questions/37601092/pdfbox-identify-specific-pages-and-functionalities-recommendations">
 * PDFBox identify specific pages and functionalities recommendations
 * </a>
 * 
 * <p>
 * This test shows how to add an empty signature field with a custom appearance
 * to an existing PDF.
 * </p>
 */
@Test
public void testAddEmptySignatureField() throws IOException
{
    try (   InputStream sourceStream = getClass().getResourceAsStream("test.pdf");
            OutputStream output = new FileOutputStream(new File(RESULT_FOLDER, "test-with-empty-sig-field.pdf")))
    {
        PDFont font = PDType1Font.HELVETICA;
        PDResources resources = new PDResources();
        resources.put(COSName.getPDFName("Helv"), font);

        PDDocument document = Loader.loadPDF(sourceStream);
        PDAcroForm acroForm = new PDAcroForm(document);
        acroForm.setDefaultResources(resources);
        document.getDocumentCatalog().setAcroForm(acroForm);

        PDRectangle rect = new PDRectangle(50, 750, 200, 50);

        PDAppearanceDictionary appearanceDictionary = new PDAppearanceDictionary();
        PDAppearanceStream appearanceStream = new PDAppearanceStream(document);
        appearanceStream.setBBox(rect.createRetranslatedRectangle());
        appearanceStream.setResources(resources);
        appearanceDictionary.setNormalAppearance(appearanceStream);
        PDPageContentStream contentStream = new PDPageContentStream(document, appearanceStream);
        contentStream.setStrokingColor(Color.BLACK);
        contentStream.setNonStrokingColor(Color.LIGHT_GRAY);
        contentStream.setLineWidth(2);
        contentStream.addRect(0, 0, rect.getWidth(), rect.getHeight());
        contentStream.fill();
        contentStream.moveTo(1 * rect.getHeight() / 4, 1 * rect.getHeight() / 4);
        contentStream.lineTo(2 * rect.getHeight() / 4, 3 * rect.getHeight() / 4);
        contentStream.moveTo(1 * rect.getHeight() / 4, 3 * rect.getHeight() / 4);
        contentStream.lineTo(2 * rect.getHeight() / 4, 1 * rect.getHeight() / 4);
        contentStream.moveTo(3 * rect.getHeight() / 4, 1 * rect.getHeight() / 4);
        contentStream.lineTo(rect.getWidth() - rect.getHeight() / 4, 1 * rect.getHeight() / 4);
        contentStream.stroke();
        contentStream.setNonStrokingColor(Color.DARK_GRAY);
        contentStream.beginText();
        contentStream.setFont(font, rect.getHeight() / 5);
        contentStream.newLineAtOffset(3 * rect.getHeight() / 4, -font.getBoundingBox().getLowerLeftY() * rect.getHeight() / 5000);
        contentStream.showText("Customer");
        contentStream.endText();
        contentStream.close();

        PDSignatureField signatureField = new PDSignatureField(acroForm);
        signatureField.setPartialName("SignatureField");
        PDPage page = document.getPage(0);

        PDAnnotationWidget widget = signatureField.getWidgets().get(0);
        widget.setAppearance(appearanceDictionary);
        widget.setRectangle(rect);
        widget.setPage(page);

        page.getAnnotations().add(widget);
        acroForm.getFields().add(signatureField);

        document.save(output);
        document.close();
    }
}

Source File: ExtractText.java From testarea-pdfbox2 with Apache License 2.0

4 votes

/**
 * @see #testUiPathTutorial()
 * @author Venkatachalam Neelakantan
 */
public String getTextUsingPositionsUsingPdf(String pdfLocation, int pageNumber, double x, double y, double width,
        double height) throws IOException {
    String extractedText = "";
    // PDDocument Creates an empty PDF document. You need to add at least
    // one page for the document to be valid.
    // Using load method we can load a PDF document
    PDDocument document = null;
    PDPage page = null;
    try {
        if (pdfLocation.endsWith(".pdf")) {
            document = Loader.loadPDF(new File(pdfLocation));
            int getDocumentPageCount = document.getNumberOfPages();
            System.out.println(getDocumentPageCount);

            // Get specific page. THe parameter is pageindex which starts with // 0. If we need to
            // access the first page then // the pageIdex is 0 PDPage
            if (getDocumentPageCount > 0) {
                page = document.getPage(pageNumber + 1);
            } else if (getDocumentPageCount == 0) {
                page = document.getPage(0);
            }
            // To create a rectangle by passing the x axis, y axis, width and height 
            Rectangle2D rect = new Rectangle2D.Double(x, y, width, height);
            String regionName = "region1";

            // Strip the text from PDF using PDFTextStripper Area with the
            // help of Rectangle and named need to given for the rectangle
            PDFTextStripperByArea stripper = new PDFTextStripperByArea();
            stripper.setSortByPosition(true);
            stripper.addRegion(regionName, rect);
            stripper.extractRegions(page);
            System.out.println("Region is " + stripper.getTextForRegion("region1"));
            extractedText = stripper.getTextForRegion("region1");
        } else {
            System.out.println("No data return");
        }
    } catch (IOException e) {
        System.out.println("The file  not found" + "");
    } finally {
        document.close();
    }
    // Return the extracted text and this can be used for assertion
    return extractedText;
}

Source File: PdfBoxUtilities.java From tess4j with Apache License 2.0

4 votes

/**
 * Converts PDF to PNG format.
 *
 * @param inputPdfFile input file
 * @return an array of PNG images
 * @throws java.io.IOException
 */
public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
    Path path = Files.createTempDirectory("tessimages");
    File imageDir = path.toFile();

    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);

            // suffix in filename will be used as the file format
            String filename = String.format("workingimage%04d.png", page + 1);
            ImageIOUtil.writeImage(bim, new File(imageDir, filename).getAbsolutePath(), 300);
        }
    } catch (IOException ioe) {
        logger.error("Error extracting PDF Document => " + ioe);
    } finally {
        if (imageDir.list().length == 0) {
            imageDir.delete();
        }

        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }

    // find working files
    File[] workingFiles = imageDir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
        }
    });

    Arrays.sort(workingFiles, new Comparator<File>() {
        @Override
        public int compare(File f1, File f2) {
            return f1.getName().compareTo(f2.getName());
        }
    });

    return workingFiles;
}

Java Code Examples for org.apache.pdfbox.pdmodel.PDDocument#close()