: Keep content logic in Jinja, layout in CSS (using @media print ), and generation pure Python. 2. Pattern: Zero-Copy PDF Merging with pypdf (formerly PyPDF2) The Impact : Merge hundreds of PDFs without memory explosion.
from pypdf import PdfReader, PdfWriter reader = PdfReader("form.pdf") writer = PdfWriter() writer.clone_document_from_reader(reader) writer.update_page_form_field_values( writer.pages[0], {"full_name": "Ada Lovelace", "date": "2026-01-15"} ) with open("filled.pdf", "wb") as f: writer.write(f)
from pypdf import PdfWriter, PdfReader writer = PdfWriter() for pdf_path in list_of_pdfs: reader = PdfReader(pdf_path) for page in reader.pages: writer.add_page(page) writer.add_metadata(reader.metadata) # preserves source metadata : Keep content logic in Jinja, layout in
:
def _generate_report_sync(data: dict) -> bytes: # heavy PDF generation using pypdf/reportlab return pdf_bytes from pypdf import PdfReader
@app.post("/report") async def create_report(data: dict, background_tasks: BackgroundTasks): # offload to thread pool pdf_bytes = await asyncio.to_thread(_generate_report_sync, data) background_tasks.add_task(log_pdf_generation, data["id"]) return Response(pdf_bytes, media_type="application/pdf")
# efficiently iterate for page in pdf.pages: if "_summary_" in page.extract_text().lower(): print(page.extract_tables()) {"full_name": "Ada Lovelace"
Each subtask has isolated deps – e.g., extractors/ocr uses pytesseract + pdf2image , while generators/html2pdf uses weasyprint .