org.apache.pdfbox.pdmodel.PDPageTree Java Examples
The following examples show how to use
org.apache.pdfbox.pdmodel.PDPageTree.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PdfTools.java From MyBox with Apache License 2.0 | 6 votes |
public static List<PDImageXObject> getImageListFromPDF(PDDocument document, Integer startPage) throws Exception { List<PDImageXObject> imageList = new ArrayList<>(); if (null != document) { PDPageTree pages = document.getPages(); startPage = startPage == null ? 0 : startPage; int len = pages.getCount(); if (startPage < len) { for (int i = startPage; i < len; ++i) { PDPage page = pages.get(i); Iterable<COSName> objectNames = page.getResources().getXObjectNames(); for (COSName imageObjectName : objectNames) { if (page.getResources().isImageXObject(imageObjectName)) { imageList.add((PDImageXObject) page.getResources().getXObject(imageObjectName)); } } } } } return imageList; }
Example #2
Source File: PDPageDestination.java From gcs with Mozilla Public License 2.0 | 6 votes |
/** * Returns the page number for this destination, regardless of whether this is a page number or * a reference to a page. * * @since Apache PDFBox 1.0.0 * @see org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem * @return page number, or -1 if the destination type is unknown. The page number is 0-based if * it was in the dictionary (for remote destinations), and 1-based if it was computed from a * page reference (for local destinations). * @deprecated This method has inconsistent behavior (see returns), use {@link #retrievePageNumber()} instead. */ @Deprecated public int findPageNumber() { int retval = -1; if( array.size() > 0 ) { COSBase page = array.getObject( 0 ); if( page instanceof COSNumber ) { retval = ((COSNumber)page).intValue(); } else if (page instanceof COSDictionary) { COSBase parent = page; while (((COSDictionary) parent).getDictionaryObject(COSName.PARENT, COSName.P) != null) { parent = ((COSDictionary) parent).getDictionaryObject(COSName.PARENT, COSName.P); } // now parent is the pages node PDPageTree pages = new PDPageTree((COSDictionary) parent); return pages.indexOf(new PDPage((COSDictionary) page)) + 1; } } return retval; }
Example #3
Source File: PdfScreenshotUtils.java From dss with GNU Lesser General Public License v2.1 | 6 votes |
public static void checkPdfSimilarity(PDDocument document1, PDDocument document2, float minSimilarity) throws IOException { PDPageTree samplePageTree = document1.getPages(); PDPageTree checkPageTree = document2.getPages(); assertEquals(checkPageTree.getCount(), samplePageTree.getCount()); PDFRenderer sampleRenderer = new PDFRenderer(document1); PDFRenderer checkRenderer = new PDFRenderer(document2); for (int pageNumber = 0; pageNumber < checkPageTree.getCount(); pageNumber++) { BufferedImage sampleImage = sampleRenderer.renderImageWithDPI(pageNumber, DPI); BufferedImage checkImage = checkRenderer.renderImageWithDPI(pageNumber, DPI); // ImageIO.write(sampleImage, "png", new File("target\\sampleImage.png")); // ImageIO.write(checkImage, "png", new File("target\\checkImage.png")); float checkSimilarity = checkImageSimilarity(sampleImage, checkImage, CHECK_RESOLUTION); assertTrue(checkSimilarity >= minSimilarity, "The image similarity " + checkSimilarity + " is lower the allowed limit " + minSimilarity); } }
Example #4
Source File: PdfContentImagePreprocessor.java From tika-server with Apache License 2.0 | 5 votes |
private void removeImagesAlphaChannelUnsafe() { try { PDPageTree allPages = document.getDocumentCatalog().getPages(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); processImagesFromResources(page.getResources()); } } catch (Exception e) { e.printStackTrace(); } }
Example #5
Source File: PdfContentTypeChecker.java From tika-server with Apache License 2.0 | 5 votes |
private void calculateObjectsInDocument(PDDocument document) throws IOException { this.pdfTextStripper = new PDFTextStripper(); try { PDPageTree allPages = document.getDocumentCatalog().getPages(); this.pageCount = allPages.getCount(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); readObjectsOnPage(page); calculateTextLengthOnPage(document, i + 1); } } catch (Exception e) { e.printStackTrace(); } }
Example #6
Source File: DashboardUtil.java From Insights with Apache License 2.0 | 5 votes |
/** * Footer is filled with varaibles selected in Grafana by user * * @param doc * @param title * @param variables * @return doc * @throws IOException */ private PDDocument footer(PDDocument doc, String title, String variables) throws IOException { try{ PDPageTree pages = doc.getPages(); for(PDPage p : pages){ PDPageContentStream contentStream = new PDPageContentStream(doc, p, AppendMode.APPEND, false); contentStream.beginText(); contentStream.newLineAtOffset(220, 780); contentStream.setFont(PDType1Font.HELVETICA, 11); contentStream.showText("OneDevOps Insights – "+title); contentStream.endText(); if(!variables.equals("") && variables != null){ contentStream.beginText(); contentStream.newLineAtOffset(2, 17); contentStream.setFont(PDType1Font.HELVETICA, 9); contentStream.showText("This Report is generated based on the user selected values as below."); contentStream.endText(); contentStream.beginText(); contentStream.newLineAtOffset(2, 5); contentStream.setFont(PDType1Font.HELVETICA, 7); contentStream.showText(variables); contentStream.endText(); } contentStream.close(); } }catch(Exception e){ Log.error("Error, Failed in Footer.. ", e.getMessage()); } return doc; }
Example #7
Source File: DetermineWidgetPage.java From testarea-pdfbox2 with Apache License 2.0 | 5 votes |
int determineSafe(PDDocument document, PDAnnotationWidget widget) throws IOException { COSDictionary widgetObject = widget.getCOSObject(); PDPageTree pages = document.getPages(); for (int i = 0; i < pages.getCount(); i++) { for (PDAnnotation annotation : pages.get(i).getAnnotations()) { COSDictionary annotationObject = annotation.getCOSObject(); if (annotationObject.equals(widgetObject)) return i; } } return -1; }
Example #8
Source File: RemoveStrikeoutComment.java From testarea-pdfbox2 with Apache License 2.0 | 5 votes |
/** * <a href="https://stackoverflow.com/questions/45812696/pdfbox-delete-comment-maintain-strikethrough"> * PDFBox delete comment maintain strikethrough * </a> * <br/> * <a href="https://expirebox.com/files/3d955e6df4ca5874c38dbf92fc43b5af.pdf"> * only_fields.pdf * </a> * <a href="https://file.io/DTvqhC"> * (alternative download) * </a> * <p> * Due to a bug in the <code>COSArrayList</code> usage for page annotations, * the indirect reference to the annotation in question is not removed from * the actual page annotations array. * </p> */ @Test public void testRemoveLikeStephan() throws IOException { try (InputStream resource = getClass().getResourceAsStream("only_fields.pdf")) { PDDocument document = Loader.loadPDF(resource); List<PDAnnotation> annotations = new ArrayList<>(); PDPageTree allPages = document.getDocumentCatalog().getPages(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); annotations = page.getAnnotations(); List<PDAnnotation> annotationToRemove = new ArrayList<PDAnnotation>(); if (annotations.size() < 1) continue; else { for (PDAnnotation annotation : annotations) { if (annotation.getContents() != null && annotation.getContents().equals("Sample Strikethrough")) { annotationToRemove.add(annotation); } } annotations.removeAll(annotationToRemove); } } document.save(new File(RESULT_FOLDER, "only_fields-removeLikeStephan.pdf")); } }
Example #9
Source File: RemoveStrikeoutComment.java From testarea-pdfbox2 with Apache License 2.0 | 5 votes |
/** * <a href="https://stackoverflow.com/questions/45812696/pdfbox-delete-comment-maintain-strikethrough"> * PDFBox delete comment maintain strikethrough * </a> * <br/> * <a href="https://expirebox.com/files/3d955e6df4ca5874c38dbf92fc43b5af.pdf"> * only_fields.pdf * </a> * <a href="https://file.io/DTvqhC"> * (alternative download) * </a> * <p> * The OP only wanted the comment removed, not the strike-through. Thus, we must * not remove the annotation but merely the comment building attributes. * </p> */ @Test public void testRemoveLikeStephanImproved() throws IOException { final COSName POPUP = COSName.getPDFName("Popup"); try (InputStream resource = getClass().getResourceAsStream("only_fields.pdf")) { PDDocument document = Loader.loadPDF(resource); List<PDAnnotation> annotations = new ArrayList<>(); PDPageTree allPages = document.getDocumentCatalog().getPages(); List<COSObjectable> objectsToRemove = new ArrayList<>(); for (int i = 0; i < allPages.getCount(); i++) { PDPage page = allPages.get(i); annotations = page.getAnnotations(); for (PDAnnotation annotation : annotations) { if ("StrikeOut".equals(annotation.getSubtype())) { COSDictionary annotationDict = annotation.getCOSObject(); COSBase popup = annotationDict.getItem(POPUP); annotationDict.removeItem(POPUP); annotationDict.removeItem(COSName.CONTENTS); // plain text comment annotationDict.removeItem(COSName.RC); // rich text comment annotationDict.removeItem(COSName.T); // author if (popup != null) objectsToRemove.add(popup); } } annotations.removeAll(objectsToRemove); } document.save(new File(RESULT_FOLDER, "only_fields-removeImproved.pdf")); } }
Example #10
Source File: ShrinkPDF.java From shrink-pdf with MIT License | 5 votes |
/** * Shrink a PDF * @param f {@code File} pointing to the PDF to shrink * @param compQual Compression quality parameter. 0 is * smallest file, 1 is highest quality. * @return The compressed {@code PDDocument} * @throws FileNotFoundException * @throws IOException */ private PDDocument shrinkMe() throws FileNotFoundException, IOException { if(compQual < 0) compQual = compQualDefault; final RandomAccessBufferedFileInputStream rabfis = new RandomAccessBufferedFileInputStream(input); final PDFParser parser = new PDFParser(rabfis); parser.parse(); final PDDocument doc = parser.getPDDocument(); final PDPageTree pages = doc.getPages(); final ImageWriter imgWriter; final ImageWriteParam iwp; if(tiff) { final Iterator<ImageWriter> tiffWriters = ImageIO.getImageWritersBySuffix("png"); imgWriter = tiffWriters.next(); iwp = imgWriter.getDefaultWriteParam(); //iwp.setCompressionMode(ImageWriteParam.MODE_DISABLED); } else { final Iterator<ImageWriter> jpgWriters = ImageIO.getImageWritersByFormatName("jpeg"); imgWriter = jpgWriters.next(); iwp = imgWriter.getDefaultWriteParam(); iwp.setCompressionMode(ImageWriteParam.MODE_EXPLICIT); iwp.setCompressionQuality(compQual); } for(PDPage p : pages) { scanResources(p.getResources(), doc, imgWriter, iwp); } return doc; }
Example #11
Source File: PDDocumentCatalogBleach.java From DocBleach with MIT License | 4 votes |
private void sanitizePageActions(PDPageTree pages) throws IOException { LOGGER.trace("Checking Pages Actions"); for (PDPage page : pages) { sanitizePage(page); } }
Example #12
Source File: PDFTextStripper.java From gcs with Mozilla Public License 2.0 | 4 votes |
/** * This will process all of the pages and the text that is in them. * * @param pages The pages object in the document. * * @throws IOException If there is an error parsing the text. */ protected void processPages(PDPageTree pages) throws IOException { PDPage startBookmarkPage = startBookmark == null ? null : startBookmark.findDestinationPage(document); if (startBookmarkPage != null) { startBookmarkPageNumber = pages.indexOf(startBookmarkPage) + 1; } else { // -1 = undefined startBookmarkPageNumber = -1; } PDPage endBookmarkPage = endBookmark == null ? null : endBookmark.findDestinationPage(document); if (endBookmarkPage != null) { endBookmarkPageNumber = pages.indexOf(endBookmarkPage) + 1; } else { // -1 = undefined endBookmarkPageNumber = -1; } if (startBookmarkPageNumber == -1 && startBookmark != null && endBookmarkPageNumber == -1 && endBookmark != null && startBookmark.getCOSObject() == endBookmark.getCOSObject()) { // this is a special case where both the start and end bookmark // are the same but point to nothing. In this case // we will not extract any text. startBookmarkPageNumber = 0; endBookmarkPageNumber = 0; } for (PDPage page : pages) { currentPageNo++; if (page.hasContents()) { processPage(page); } } }
Example #13
Source File: DashboardUtil.java From Insights with Apache License 2.0 | 2 votes |
/** * Get previous page in the document. * * @param document * @return {pageNum} */ private static int getPages(PDDocument document) { PDPageTree pages = document.getPages(); return pages.getCount()-1; }