scripts/file-management/pdf-with-notes.py

import os
import re
from pptx import Presentation
from fpdf import FPDF
from fpdf.enums import XPos, YPos

# Define the PDF layout and styling
class NotesPDF(FPDF):
    def header(self):
        self.set_font("Helvetica", "B", 12)
        self.set_text_color(100, 100, 100)
        self.cell(0, 10, "Presentation Slides & Notes", align="C", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
        self.ln(5)

    def footer(self):
        self.set_y(-15)
        self.set_font("Helvetica", "I", 8)
        self.set_text_color(150, 150, 150)
        self.cell(0, 10, f"Page {self.page_no()}", align="C")

def clean_text_for_fpdf(text):
    """Replaces unsupported Unicode characters with standard Latin-1 equivalents."""
    if not text:
        return ""

    replacements = {
        '\u2018': "'", '\u2019': "'", '\u201c': '"', '\u201d': '"',
        '\u2013': "-", '\u2014': "--", '\u2022': "*", '\u2026': "...",
        '\u00A0': " ", '\u2122': "TM", '\u00a9': "(c)", '\u00ae': "(R)"
    }
    for search, replace in replacements.items():
        text = text.replace(search, replace)

    return text.encode('latin-1', 'ignore').decode('latin-1')

def find_slide_image(images_folder, slide_num):
    """Smartly searches for an image file matching the slide number."""
    if not os.path.exists(images_folder):
        return None

    for filename in os.listdir(images_folder):
        # Check if it's an image file
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Extract all numbers from the filename
            numbers = re.findall(r'\d+', filename)
            # If the last number in the filename matches our slide number, we found it!
            if numbers and int(numbers[-1]) == slide_num:
                return os.path.join(images_folder, filename)
    return None

def create_handout_pdf(pptx_path, images_folder, output_pdf):
    print(f"Reading presentation: {pptx_path}...")

    try:
        prs = Presentation(pptx_path)
    except Exception as e:
        print(f"Error loading PPTX: {e}")
        return

    pdf = NotesPDF()
    pdf.set_auto_page_break(auto=True, margin=15)

    for i, slide in enumerate(prs.slides):
        slide_num = i + 1
        pdf.add_page()

        # 1. Smartly find and place the Slide Image
        img_path = find_slide_image(images_folder, slide_num)

        if img_path:
            # A4 width is ~210mm. 170mm width leaves 20mm margins on each side.
            pdf.image(img_path, x=20, w=170)
            pdf.ln(5)
        else:
            pdf.set_font("Helvetica", "B", 12)
            pdf.set_text_color(255, 0, 0)
            pdf.cell(0, 10, f"[Slide {slide_num} Image Missing - Check 'images' folder]", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
            pdf.ln(5)

        # 2. Extract and Place the Notes
        pdf.set_font("Helvetica", "B", 14)
        pdf.set_text_color(0, 0, 0)
        pdf.cell(0, 10, f"Notes - Slide {slide_num}:", new_x=XPos.LMARGIN, new_y=YPos.NEXT)

        pdf.set_font("Helvetica", "", 11)
        notes_text = ""

        if slide.has_notes_slide and slide.notes_slide.notes_text_frame:
            notes_text = slide.notes_slide.notes_text_frame.text

        if not notes_text.strip():
            notes_text = "(No notes provided for this slide)"

        notes_text = clean_text_for_fpdf(notes_text)
        pdf.multi_cell(0, 6, notes_text)

    print(f"Compiling PDF...")
    pdf.output(output_pdf)
    print(f"✅ Success! Your file is ready: {output_pdf}")

if __name__ == "__main__":
    PPTX_FILE = "/Users/guillemhernandezsola/Downloads/Assets/presentation.pptx"
    # Make sure this folder exists in the same directory where you run the script!
    IMAGES_DIR = "/Users/guillemhernandezsola/Downloads/Assets/images"
    OUTPUT_FILE = "/Users/guillemhernandezsola/Downloads/Assets/Final_Presentation_Notes.pdf"

    if not os.path.exists(IMAGES_DIR):
        print(f"⚠️ ERROR: The folder '{IMAGES_DIR}' was not found.")
        print("Please create it in: /Users/guillemhernandezsola/code/scripts/file-management/")
        print("And make sure you exported your slide images from OnlyOffice into it!")
    else:
        create_handout_pdf(PPTX_FILE, IMAGES_DIR, OUTPUT_FILE)