Python PyPDF2.PdfFileMerger() Examples

The following are 12 code examples of PyPDF2.PdfFileMerger(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module PyPDF2 , or try the search function .
Example #1
Source File: pdf2pdfocr.py    From pdf2pdfocr with Apache License 2.0 6 votes vote down vote up
def join_ocred_pdf(self):
        # Join PDF files into one file that contains all OCR "backgrounds"
        text_pdf_file_list = sorted(glob.glob(self.tmp_dir + "{0}*.{1}".format(self.prefix, "pdf")))
        self.debug("We have {0} ocr'ed files".format(len(text_pdf_file_list)))
        if len(text_pdf_file_list) > 0:
            pdf_merger = PyPDF2.PdfFileMerger()
            for text_pdf_file in text_pdf_file_list:
                pdf_merger.append(PyPDF2.PdfFileReader(text_pdf_file, strict=False))
            pdf_merger.write(self.tmp_dir + self.prefix + "-ocr.pdf")
            pdf_merger.close()
        else:
            eprint("No PDF files generated after OCR. This is not expected. Aborting.")
            self.cleanup()
            exit(1)
        #
        self.debug("Joined ocr'ed PDF files") 
Example #2
Source File: strokes.py    From strokes with GNU General Public License v3.0 6 votes vote down vote up
def gen_pdfs(pages):

    merger = PdfFileMerger()
    pdf_files = []
    try:
        for page in pages:
            pdf = gen_pdf(page.f.getvalue())
            pdf_f = io.BytesIO(pdf)
            pdf_files.append(pdf_f)
            merger.append(pdf_f)

        with io.BytesIO() as fout:
            merger.write(fout)
            return fout.getvalue()
    finally:
        for pdf_f in pdf_files:
            pdf_f.close() 
Example #3
Source File: merge_pdfs.py    From TOBIAS with MIT License 6 votes vote down vote up
def run_mergepdf(args):

	check_required(args, ["input", "output"])
	print("Number of input files: {0}".format(len(args.input)))

	#Preliminary checks
	print("Checking read/write status")
	check_files(args.input, action="r")
	check_files([args.output], action="w")

	#Join pdfs
	print("Starting to merge PDFs")
	merger = PdfFileMerger(strict=False)
	for pdf in args.input:
		if os.stat(pdf).st_size != 0:	#only join files containing plots
			merger.append(PdfFileReader(pdf))
	
	print("Writing merged file: {0}".format(args.output))
	merger.write(args.output)

	print("PDFs merged successfully!")


#--------------------------------------------------------------------------------------------------------# 
Example #4
Source File: merge_pdfs.py    From python-tools with MIT License 5 votes vote down vote up
def merge_pdfs(input_pdfs, output_pdf):
    """Combine multiple pdfs to single pdf.

    Args:
        input_pdfs (list): List of path files.
        output_pdf (str): Output file.

    """
    pdf_merger = PdfFileMerger()
    for path in input_pdfs:
        pdf_merger.append(path)
    with open(output_pdf, 'wb') as fileobj:
        pdf_merger.write(fileobj) 
Example #5
Source File: plot.py    From kicad-automation-scripts with Apache License 2.0 5 votes vote down vote up
def plot_to_directory(pcb, file_format, layers, plot_directory, temp_dir):
    output_files = []

    pcb.set_plot_directory(temp_dir)

    logger.debug(file_format)

    if file_format == 'zip_gerbers':
        # In theory not needed since gerber does not support dril marks, but added just to be sure
        pcb.plot_options.SetDrillMarksType(pcbnew.PCB_PLOT_PARAMS.NO_DRILL_SHAPE)

        for layer in layers:
            logger.debug('plotting layer {} ({}) to Gerber'.format(layer.get_name(), layer.layer_id))
            output_filename = layer.plot(pcbnew.PLOT_FORMAT_GERBER)
            output_files.append(output_filename)

        drill_file = pcb.plot_drill()
        if os.path.isfile(drill_file): # No drill file is generated if no holes exist
            output_files.append(drill_file)

        zip_file_name = os.path.join(plot_directory, '{}_gerbers.zip'.format(pcb.name))
        with zipfile.ZipFile(zip_file_name, 'w') as z:
            for f in output_files:
                z.write(f, os.path.relpath(f, plot_directory))

    elif file_format == 'pdf':
        pcb.plot_options.SetDrillMarksType(pcbnew.PCB_PLOT_PARAMS.FULL_DRILL_SHAPE)
        merger = PdfFileMerger()
        for layer in layers:
            logger.debug('plotting layer {} ({}) to PDF'.format(layer.get_name(), layer.layer_id))
            output_filename = layer.plot(pcbnew.PLOT_FORMAT_PDF)
            output_files.append(output_filename)
            logger.debug(output_filename)
            merger.append(PdfFileReader(file(output_filename, 'rb')), bookmark=layer.get_name())

        drill_map_file = pcb.plot_drill_map()
        if os.path.isfile(drill_map_file): # No drill map file is generated if no holes exist
            merger.append(PdfFileReader(file(drill_map_file, 'rb')), bookmark='Drill map')

        merger.write(plot_directory+'/{}.pdf'.format(pcb.name)) 
Example #6
Source File: converter.py    From python-automation-scripts with GNU General Public License v3.0 5 votes vote down vote up
def mergeIntoOnePDF(path):
    f=path+"\\"
    pdf_files=[fileName for fileName in os.listdir(f) if fileName.endswith('.pdf')]
    print(pdf_files)
    merger=PdfFileMerger()
    for filename in pdf_files:
        merger.append(PdfFileReader(os.path.join(f,filename),"rb"))
    merger.write(os.path.join(f,"merged_full.pdf")) 
Example #7
Source File: download.py    From sslibrary-pdf-downloader with MIT License 5 votes vote down vote up
def mergePDF(path, num, name):
    merger = PdfFileMerger()
    for cpage in range(1, num + 1):
        try:
            merger.append(open(path + '/page%d.pdf' % cpage, 'rb'))
        except:
            print(cpage)
    merger.write(path + '/' + name + '.pdf')
    merger.close() 
Example #8
Source File: merge.py    From Python-for-Everyday-Life with MIT License 5 votes vote down vote up
def merge(source_pdf_paths, target_pdf_path):
    merger = PyPDF2.PdfFileMerger()

    # append PDF source files to merger
    for pdf_path in source_pdf_paths:
        with open(pdf_path, 'rb') as f:
            reader = PyPDF2.PdfFileReader(f)
            merger.append(reader)

    # write to output file
    with open(target_pdf_path, 'wb') as g:
        merger.write(g) 
Example #9
Source File: doTaxes.py    From taxes-2018 with GNU Lesser General Public License v3.0 5 votes vote down vote up
def fill_forms():
    forms.s_1040.fill_in_form()
    forms.s1_1040.fill_in_form()
    forms.s3_1040.fill_in_form()
    forms.s4_1040.fill_in_form()
    forms.s5_1040.fill_in_form()
    forms.a_1040.fill_in_form()
    forms.b_1040.fill_in_form()
    forms.se_1040.fill_in_form()
    forms.cez_1040.fill_in_form()
    forms.sep_ira.fill_in_form()
    forms.f_8606.fill_in_form()
    forms.s_1040v.fill_in_form()
    forms.tax_worksheet.fill_in_form()

    pdfs = [ os.path.join('filled', 'f1040.pdf'),
             os.path.join('filled', 'f1040s1.pdf'),
             os.path.join('filled', 'f1040s3.pdf'),
             os.path.join('filled', 'f1040s4.pdf'),
             os.path.join('filled', 'f1040s5.pdf'),
             os.path.join('filled', 'tax_worksheet.pdf'),
             os.path.join('filled', 'f1040sa.pdf'),
             os.path.join('filled', 'f1040sb.pdf'),
             os.path.join('filled', 'f1040sce.pdf'),
             os.path.join('filled', 'f1040sse.pdf'),
             os.path.join('filled', 'f8606.pdf'),
             os.path.join('filled', 'f1040v.pdf'),
             os.path.join('filled', 'SEP_IRA_Worksheet.pdf')]

    merger = PdfFileMerger()
    for pdf in pdfs:
        merger.append(open(pdf, 'rb'))

    with open( os.path.join('filled', 'Tax_Return.pdf'), 'wb' ) as fd:
        merger.write(fd) 
Example #10
Source File: compare_sourcelists.py    From drizzlepac with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def pdf_merger(output_path, input_paths):
    """Merges multiple pdf files into a single multi-page pdf file
    
    Parameters
    ----------
    output_path : str
        name of output multipage pdf file
        
    input_paths : list
        list of pdf files to combine
    
    Returns
    -------
    nothing.
    """
    pdf_merger = PdfFileMerger()

    for path in input_paths:
        pdf_merger.append(path)

    with open(output_path, 'wb') as fileobj:
        pdf_merger.write(fileobj)

    for path in input_paths:
        os.remove(path)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- 
Example #11
Source File: pdf2pdfocr.py    From pdf2pdfocr with Apache License 2.0 4 votes vote down vote up
def rebuild_and_merge(self):
        eprint("Warning: metadata wiped from final PDF file (original file is not an unprotected PDF / "
               "forcing rebuild from extracted images / using deskew)")
        # Convert presets
        # Please read http://www.imagemagick.org/Usage/quantize/#colors_two
        preset_fast = "-threshold 60% -compress Group4"
        preset_best = "-colors 2 -colorspace gray -normalize -threshold 60% -compress Group4"
        preset_grayscale = "-threshold 85% -morphology Dilate Diamond -compress Group4"
        preset_jpeg = "-strip -interlace Plane -gaussian-blur 0.05 -quality 50% -compress JPEG"
        preset_jpeg2000 = "-quality 32% -compress JPEG2000"
        #
        if self.user_convert_params == "fast":
            convert_params = preset_fast
        elif self.user_convert_params == "best":
            convert_params = preset_best
        elif self.user_convert_params == "grayscale":
            convert_params = preset_grayscale
        elif self.user_convert_params == "jpeg":
            convert_params = preset_jpeg
        elif self.user_convert_params == "jpeg2000":
            convert_params = preset_jpeg2000
        else:
            convert_params = self.user_convert_params
        # Handle default case
        if convert_params == "":
            convert_params = preset_best
        #
        self.log("Rebuilding PDF from images")
        rebuild_list = sorted(glob.glob(self.tmp_dir + self.prefix + "*." + self.extension_images))
        rebuild_pool = multiprocessing.Pool(self.cpu_to_use)
        rebuild_pool_map = rebuild_pool.starmap_async(do_rebuild,
                                                      zip(rebuild_list,
                                                          itertools.repeat(self.path_convert),
                                                          itertools.repeat(convert_params),
                                                          itertools.repeat(self.tmp_dir),
                                                          itertools.repeat(self.shell_mode)))
        while not rebuild_pool_map.ready():
            pages_processed = len(glob.glob(self.tmp_dir + "REBUILD_" + self.prefix + "*.pdf"))
            self.log("Waiting for PDF rebuild to complete. {0}/{1} pages completed...".format(pages_processed, self.input_file_number_of_pages))
            time.sleep(5)
        #
        rebuilt_pdf_file_list = sorted(glob.glob(self.tmp_dir + "REBUILD_{0}*.pdf".format(self.prefix)))
        self.debug("We have {0} rebuilt PDF files".format(len(rebuilt_pdf_file_list)))
        if len(rebuilt_pdf_file_list) > 0:
            pdf_merger = PyPDF2.PdfFileMerger()
            for rebuilt_pdf_file in rebuilt_pdf_file_list:
                pdf_merger.append(PyPDF2.PdfFileReader(rebuilt_pdf_file, strict=False))
            pdf_merger.write(self.tmp_dir + self.prefix + "-input_unprotected.pdf")
            pdf_merger.close()
        else:
            eprint("No PDF files generated after image rebuilding. This is not expected. Aborting.")
            self.cleanup()
            exit(1)
        self.debug("PDF rebuilding completed")
        #
        self._merge_ocr((self.tmp_dir + self.prefix + "-input_unprotected.pdf"),
                        (self.tmp_dir + self.prefix + "-ocr.pdf"),
                        (self.tmp_dir + self.prefix + "-OUTPUT.pdf"), "rebuild-merge") 
Example #12
Source File: admin.py    From silver with Apache License 2.0 4 votes vote down vote up
def download_selected_documents(self, request, queryset):
        # NOTE (important): this works only if the pdf is not stored on local
        # disk as it is fetched via HTTP
        now = timezone.now()

        queryset = queryset.filter(
            state__in=[BillingDocumentBase.STATES.ISSUED,
                       BillingDocumentBase.STATES.CANCELED,
                       BillingDocumentBase.STATES.PAID]
        )

        base_path = '/tmp'
        merger = PdfFileMerger()
        for document in queryset:
            if document.pdf:
                local_file_path = self._download_pdf(document.pdf.url, base_path)
                try:
                    reader = PdfFileReader(open(local_file_path, 'rb'))
                    merger.append(reader)
                    logging_ctx = {
                        'number': document.series_number,
                        'status': 'ok'
                    }
                except Exception as e:
                    logging_ctx = {
                        'number': document.series_number,
                        'status': 'failed',
                        'error': e
                    }

                logger.debug('Admin aggregate PDF generation: %s', logging_ctx)

                try:
                    os.remove(local_file_path)
                except OSError as e:
                    if e.errno != errno.ENOENT:
                        raise

        response = HttpResponse(content_type='application/pdf')
        filename = 'Billing-Documents-{now}.pdf'.format(now=now)
        content_disposition = 'attachment; filename="{fn}'.format(fn=filename)
        response['Content-Disposition'] = content_disposition

        merger.write(response)
        merger.close()

        return response