diff --git a/.gitignore b/.gitignore index c52daf1..4c78eef 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ media/xhs_videos *.xls .env *.env +*.pdf # Generated files .idea/**/contentModel.xml diff --git a/file-management/split-pdfs.py b/file-management/split-pdfs.py new file mode 100644 index 0000000..2cf3cc5 --- /dev/null +++ b/file-management/split-pdfs.py @@ -0,0 +1,26 @@ +from pathlib import Path +from pypdf import PdfReader, PdfWriter + + +def split_pdf_to_single_pages(input_pdf_path: str, output_dir: str = "split_pages") -> None: + input_path = Path(input_pdf_path) + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + reader = PdfReader(str(input_path)) + total_pages = len(reader.pages) + + for i, page in enumerate(reader.pages, start=1): + writer = PdfWriter() + writer.add_page(page) + + output_file = output_path / f"{input_path.stem}_page_{i}.pdf" + with open(output_file, "wb") as f: + writer.write(f) + + print(f"Done! Split {total_pages} pages into {total_pages} single-page PDFs in '{output_path}'.") + + +if __name__ == "__main__": + # Change this to your file name/path + split_pdf_to_single_pages("/Users/guillemhernandezsola/icloud/agile611/asistencia/Certificado_Asistencia_Jenkins_Eclekte_signed.pdf") \ No newline at end of file