Converter with notes

This commit is contained in:
Guillem Hernandez Sola
2026-04-12 16:35:04 +02:00
parent ea7f562785
commit c10c98dfda

View File

@@ -1,5 +1,6 @@
import os import os
import re import shutil
import fitz # PyMuPDF
from pptx import Presentation from pptx import Presentation
from fpdf import FPDF from fpdf import FPDF
from fpdf.enums import XPos, YPos from fpdf.enums import XPos, YPos
@@ -33,30 +34,35 @@ def clean_text_for_fpdf(text):
return text.encode('latin-1', 'ignore').decode('latin-1') return text.encode('latin-1', 'ignore').decode('latin-1')
def find_slide_image(images_folder, slide_num): def create_handout_pdf(pptx_path, slides_pdf_path, output_pdf):
"""Smartly searches for an image file matching the slide number.""" temp_images_folder = "temp_slide_images"
if not os.path.exists(images_folder):
return None
for filename in os.listdir(images_folder):
# Check if it's an image file
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
# Extract all numbers from the filename
numbers = re.findall(r'\d+', filename)
# If the last number in the filename matches our slide number, we found it!
if numbers and int(numbers[-1]) == slide_num:
return os.path.join(images_folder, filename)
return None
def create_handout_pdf(pptx_path, images_folder, output_pdf):
print(f"Reading presentation: {pptx_path}...")
# 1. Check if the exported PDF exists
if not os.path.exists(slides_pdf_path):
print(f"⚠️ ERROR: Could not find {slides_pdf_path}.")
print("Please open OnlyOffice and Save As -> PDF into your Assets folder.")
return
# 2. Slice the PDF into images automatically
print(f"Slicing slides from {slides_pdf_path}...")
if not os.path.exists(temp_images_folder):
os.makedirs(temp_images_folder)
pdf_doc = fitz.open(slides_pdf_path)
for i in range(len(pdf_doc)):
page = pdf_doc[i]
pix = page.get_pixmap(dpi=150) # 150 DPI is perfect for standard PDF viewing
pix.save(os.path.join(temp_images_folder, f"Slide{i+1}.png"))
# 3. Read the PPTX to get the notes
print(f"Reading presentation notes from {pptx_path}...")
try: try:
prs = Presentation(pptx_path) prs = Presentation(pptx_path)
except Exception as e: except Exception as e:
print(f"Error loading PPTX: {e}") print(f"Error loading PPTX: {e}")
return return
# 4. Build the final PDF
pdf = NotesPDF() pdf = NotesPDF()
pdf.set_auto_page_break(auto=True, margin=15) pdf.set_auto_page_break(auto=True, margin=15)
@@ -64,20 +70,19 @@ def create_handout_pdf(pptx_path, images_folder, output_pdf):
slide_num = i + 1 slide_num = i + 1
pdf.add_page() pdf.add_page()
# 1. Smartly find and place the Slide Image # Place the automatically generated image
img_path = find_slide_image(images_folder, slide_num) img_path = os.path.join(temp_images_folder, f"Slide{slide_num}.png")
if img_path: if os.path.exists(img_path):
# A4 width is ~210mm. 170mm width leaves 20mm margins on each side.
pdf.image(img_path, x=20, w=170) pdf.image(img_path, x=20, w=170)
pdf.ln(5) pdf.ln(5)
else: else:
pdf.set_font("Helvetica", "B", 12) pdf.set_font("Helvetica", "B", 12)
pdf.set_text_color(255, 0, 0) pdf.set_text_color(255, 0, 0)
pdf.cell(0, 10, f"[Slide {slide_num} Image Missing - Check 'images' folder]", new_x=XPos.LMARGIN, new_y=YPos.NEXT) pdf.cell(0, 10, f"[Slide {slide_num} Image Missing]", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
pdf.ln(5) pdf.ln(5)
# 2. Extract and Place the Notes # Place the Notes
pdf.set_font("Helvetica", "B", 14) pdf.set_font("Helvetica", "B", 14)
pdf.set_text_color(0, 0, 0) pdf.set_text_color(0, 0, 0)
pdf.cell(0, 10, f"Notes - Slide {slide_num}:", new_x=XPos.LMARGIN, new_y=YPos.NEXT) pdf.cell(0, 10, f"Notes - Slide {slide_num}:", new_x=XPos.LMARGIN, new_y=YPos.NEXT)
@@ -94,19 +99,21 @@ def create_handout_pdf(pptx_path, images_folder, output_pdf):
notes_text = clean_text_for_fpdf(notes_text) notes_text = clean_text_for_fpdf(notes_text)
pdf.multi_cell(0, 6, notes_text) pdf.multi_cell(0, 6, notes_text)
print(f"Compiling PDF...") print(f"Compiling final PDF...")
pdf.output(output_pdf) pdf.output(output_pdf)
# 5. Clean up the temporary images folder
if os.path.exists(temp_images_folder):
shutil.rmtree(temp_images_folder)
print(f"✅ Success! Your file is ready: {output_pdf}") print(f"✅ Success! Your file is ready: {output_pdf}")
if __name__ == "__main__": if __name__ == "__main__":
# Pointing exactly to your Downloads/Assets folder
PPTX_FILE = "/Users/guillemhernandezsola/Downloads/Assets/presentation.pptx" PPTX_FILE = "/Users/guillemhernandezsola/Downloads/Assets/presentation.pptx"
# Make sure this folder exists in the same directory where you run the script! SLIDES_PDF = "/Users/guillemhernandezsola/Downloads/Assets/slides.pdf"
IMAGES_DIR = "/Users/guillemhernandezsola/Downloads/Assets/images"
# The final file will be saved in your current code directory
OUTPUT_FILE = "/Users/guillemhernandezsola/Downloads/Assets/Final_Presentation_Notes.pdf" OUTPUT_FILE = "/Users/guillemhernandezsola/Downloads/Assets/Final_Presentation_Notes.pdf"
if not os.path.exists(IMAGES_DIR): create_handout_pdf(PPTX_FILE, SLIDES_PDF, OUTPUT_FILE)
print(f"⚠️ ERROR: The folder '{IMAGES_DIR}' was not found.")
print("Please create it in: /Users/guillemhernandezsola/code/scripts/file-management/")
print("And make sure you exported your slide images from OnlyOffice into it!")
else:
create_handout_pdf(PPTX_FILE, IMAGES_DIR, OUTPUT_FILE)