Java Code Examples for org.apache.pdfbox.pdmodel.PDPageTree#getCount()
The following examples show how to use
org.apache.pdfbox.pdmodel.PDPageTree#getCount() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PdfTools.java From MyBox with Apache License 2.0 | 6 votes |
public static List<PDImageXObject> getImageListFromPDF(PDDocument document, Integer startPage) throws Exception { List<PDImageXObject> imageList = new ArrayList<>(); if (null != document) { PDPageTree pages = document.getPages(); startPage = startPage == null ? 0 : startPage; int len = pages.getCount(); if (startPage < len) { for (int i = startPage; i < len; ++i) { PDPage page = pages.get(i); Iterable<COSName> objectNames = page.getResources().getXObjectNames(); for (COSName imageObjectName : objectNames) { if (page.getResources().isImageXObject(imageObjectName)) { imageList.add((PDImageXObject) page.getResources().getXObject(imageObjectName)); } } } } } return imageList; }
Example 2
Source File: PdfScreenshotUtils.java From dss with GNU Lesser General Public License v2.1 | 6 votes |
public static void checkPdfSimilarity(PDDocument document1, PDDocument document2, float minSimilarity) throws IOException { PDPageTree samplePageTree = document1.getPages(); PDPageTree checkPageTree = document2.getPages(); assertEquals(checkPageTree.getCount(), samplePageTree.getCount()); PDFRenderer sampleRenderer = new PDFRenderer(document1); PDFRenderer checkRenderer = new PDFRenderer(document2); for (int pageNumber = 0; pageNumber < checkPageTree.getCount(); pageNumber++) { BufferedImage sampleImage = sampleRenderer.renderImageWithDPI(pageNumber, DPI); BufferedImage checkImage = checkRenderer.renderImageWithDPI(pageNumber, DPI); // ImageIO.write(sampleImage, "png", new File("target\\sampleImage.png")); // ImageIO.write(checkImage, "png", new File("target\\checkImage.png")); float checkSimilarity = checkImageSimilarity(sampleImage, checkImage, CHECK_RESOLUTION); assertTrue(checkSimilarity >= minSimilarity, "The image similarity " + checkSimilarity + " is lower the allowed limit " + minSimilarity); } }
Example 3
Source File: PdfContentImagePreprocessor.java From tika-server with Apache License 2.0 | 5 votes |
private void removeImagesAlphaChannelUnsafe() { try { PDPageTree allPages = document.getDocumentCatalog().getPages(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); processImagesFromResources(page.getResources()); } } catch (Exception e) { e.printStackTrace(); } }
Example 4
Source File: PdfContentTypeChecker.java From tika-server with Apache License 2.0 | 5 votes |
private void calculateObjectsInDocument(PDDocument document) throws IOException { this.pdfTextStripper = new PDFTextStripper(); try { PDPageTree allPages = document.getDocumentCatalog().getPages(); this.pageCount = allPages.getCount(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); readObjectsOnPage(page); calculateTextLengthOnPage(document, i + 1); } } catch (Exception e) { e.printStackTrace(); } }
Example 5
Source File: DetermineWidgetPage.java From testarea-pdfbox2 with Apache License 2.0 | 5 votes |
int determineSafe(PDDocument document, PDAnnotationWidget widget) throws IOException { COSDictionary widgetObject = widget.getCOSObject(); PDPageTree pages = document.getPages(); for (int i = 0; i < pages.getCount(); i++) { for (PDAnnotation annotation : pages.get(i).getAnnotations()) { COSDictionary annotationObject = annotation.getCOSObject(); if (annotationObject.equals(widgetObject)) return i; } } return -1; }
Example 6
Source File: RemoveStrikeoutComment.java From testarea-pdfbox2 with Apache License 2.0 | 5 votes |
/** * <a href="https://stackoverflow.com/questions/45812696/pdfbox-delete-comment-maintain-strikethrough"> * PDFBox delete comment maintain strikethrough * </a> * <br/> * <a href="https://expirebox.com/files/3d955e6df4ca5874c38dbf92fc43b5af.pdf"> * only_fields.pdf * </a> * <a href="https://file.io/DTvqhC"> * (alternative download) * </a> * <p> * Due to a bug in the <code>COSArrayList</code> usage for page annotations, * the indirect reference to the annotation in question is not removed from * the actual page annotations array. * </p> */ @Test public void testRemoveLikeStephan() throws IOException { try (InputStream resource = getClass().getResourceAsStream("only_fields.pdf")) { PDDocument document = Loader.loadPDF(resource); List<PDAnnotation> annotations = new ArrayList<>(); PDPageTree allPages = document.getDocumentCatalog().getPages(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); annotations = page.getAnnotations(); List<PDAnnotation> annotationToRemove = new ArrayList<PDAnnotation>(); if (annotations.size() < 1) continue; else { for (PDAnnotation annotation : annotations) { if (annotation.getContents() != null && annotation.getContents().equals("Sample Strikethrough")) { annotationToRemove.add(annotation); } } annotations.removeAll(annotationToRemove); } } document.save(new File(RESULT_FOLDER, "only_fields-removeLikeStephan.pdf")); } }
Example 7
Source File: RemoveStrikeoutComment.java From testarea-pdfbox2 with Apache License 2.0 | 5 votes |
/** * <a href="https://stackoverflow.com/questions/45812696/pdfbox-delete-comment-maintain-strikethrough"> * PDFBox delete comment maintain strikethrough * </a> * <br/> * <a href="https://expirebox.com/files/3d955e6df4ca5874c38dbf92fc43b5af.pdf"> * only_fields.pdf * </a> * <a href="https://file.io/DTvqhC"> * (alternative download) * </a> * <p> * The OP only wanted the comment removed, not the strike-through. Thus, we must * not remove the annotation but merely the comment building attributes. * </p> */ @Test public void testRemoveLikeStephanImproved() throws IOException { final COSName POPUP = COSName.getPDFName("Popup"); try (InputStream resource = getClass().getResourceAsStream("only_fields.pdf")) { PDDocument document = Loader.loadPDF(resource); List<PDAnnotation> annotations = new ArrayList<>(); PDPageTree allPages = document.getDocumentCatalog().getPages(); List<COSObjectable> objectsToRemove = new ArrayList<>(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); annotations = page.getAnnotations(); for (PDAnnotation annotation : annotations) { if ("StrikeOut".equals(annotation.getSubtype())) { COSDictionary annotationDict = annotation.getCOSObject(); COSBase popup = annotationDict.getItem(POPUP); annotationDict.removeItem(POPUP); annotationDict.removeItem(COSName.CONTENTS); // plain text comment annotationDict.removeItem(COSName.RC); // rich text comment annotationDict.removeItem(COSName.T); // author if (popup != null) objectsToRemove.add(popup); } } annotations.removeAll(objectsToRemove); } document.save(new File(RESULT_FOLDER, "only_fields-removeImproved.pdf")); } }
Example 8
Source File: DashboardUtil.java From Insights with Apache License 2.0 | 2 votes |
/** * Get previous page in the document. * * @param document * @return {pageNum} */ private static int getPages(PDDocument document) { PDPageTree pages = document.getPages(); return pages.getCount()-1; }