Files
scripts/file-management/pdf-with-notes.py
Guillem Hernandez Sola c10c98dfda Converter with notes
2026-04-12 16:35:04 +02:00

120 lines
4.2 KiB
Python

import os
import shutil
import fitz # PyMuPDF
from pptx import Presentation
from fpdf import FPDF
from fpdf.enums import XPos, YPos
# Define the PDF layout and styling
class NotesPDF(FPDF):
def header(self):
self.set_font("Helvetica", "B", 12)
self.set_text_color(100, 100, 100)
self.cell(0, 10, "Presentation Slides & Notes", align="C", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
self.ln(5)
def footer(self):
self.set_y(-15)
self.set_font("Helvetica", "I", 8)
self.set_text_color(150, 150, 150)
self.cell(0, 10, f"Page {self.page_no()}", align="C")
def clean_text_for_fpdf(text):
"""Replaces unsupported Unicode characters with standard Latin-1 equivalents."""
if not text:
return ""
replacements = {
'\u2018': "'", '\u2019': "'", '\u201c': '"', '\u201d': '"',
'\u2013': "-", '\u2014': "--", '\u2022': "*", '\u2026': "...",
'\u00A0': " ", '\u2122': "TM", '\u00a9': "(c)", '\u00ae': "(R)"
}
for search, replace in replacements.items():
text = text.replace(search, replace)
return text.encode('latin-1', 'ignore').decode('latin-1')
def create_handout_pdf(pptx_path, slides_pdf_path, output_pdf):
temp_images_folder = "temp_slide_images"
# 1. Check if the exported PDF exists
if not os.path.exists(slides_pdf_path):
print(f"⚠️ ERROR: Could not find {slides_pdf_path}.")
print("Please open OnlyOffice and Save As -> PDF into your Assets folder.")
return
# 2. Slice the PDF into images automatically
print(f"Slicing slides from {slides_pdf_path}...")
if not os.path.exists(temp_images_folder):
os.makedirs(temp_images_folder)
pdf_doc = fitz.open(slides_pdf_path)
for i in range(len(pdf_doc)):
page = pdf_doc[i]
pix = page.get_pixmap(dpi=150) # 150 DPI is perfect for standard PDF viewing
pix.save(os.path.join(temp_images_folder, f"Slide{i+1}.png"))
# 3. Read the PPTX to get the notes
print(f"Reading presentation notes from {pptx_path}...")
try:
prs = Presentation(pptx_path)
except Exception as e:
print(f"Error loading PPTX: {e}")
return
# 4. Build the final PDF
pdf = NotesPDF()
pdf.set_auto_page_break(auto=True, margin=15)
for i, slide in enumerate(prs.slides):
slide_num = i + 1
pdf.add_page()
# Place the automatically generated image
img_path = os.path.join(temp_images_folder, f"Slide{slide_num}.png")
if os.path.exists(img_path):
pdf.image(img_path, x=20, w=170)
pdf.ln(5)
else:
pdf.set_font("Helvetica", "B", 12)
pdf.set_text_color(255, 0, 0)
pdf.cell(0, 10, f"[Slide {slide_num} Image Missing]", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
pdf.ln(5)
# Place the Notes
pdf.set_font("Helvetica", "B", 14)
pdf.set_text_color(0, 0, 0)
pdf.cell(0, 10, f"Notes - Slide {slide_num}:", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
pdf.set_font("Helvetica", "", 11)
notes_text = ""
if slide.has_notes_slide and slide.notes_slide.notes_text_frame:
notes_text = slide.notes_slide.notes_text_frame.text
if not notes_text.strip():
notes_text = "(No notes provided for this slide)"
notes_text = clean_text_for_fpdf(notes_text)
pdf.multi_cell(0, 6, notes_text)
print(f"Compiling final PDF...")
pdf.output(output_pdf)
# 5. Clean up the temporary images folder
if os.path.exists(temp_images_folder):
shutil.rmtree(temp_images_folder)
print(f"✅ Success! Your file is ready: {output_pdf}")
if __name__ == "__main__":
# Pointing exactly to your Downloads/Assets folder
PPTX_FILE = "/Users/guillemhernandezsola/Downloads/Assets/presentation.pptx"
SLIDES_PDF = "/Users/guillemhernandezsola/Downloads/Assets/slides.pdf"
# The final file will be saved in your current code directory
OUTPUT_FILE = "/Users/guillemhernandezsola/Downloads/Assets/Final_Presentation_Notes.pdf"
create_handout_pdf(PPTX_FILE, SLIDES_PDF, OUTPUT_FILE)