diff --git a/pipeline.py b/pipeline.py index 9625c56..12f22d3 100644 --- a/pipeline.py +++ b/pipeline.py @@ -4,67 +4,40 @@ pipeline.py ─────────────────────────────────────────────────────────────── Translation + render pipeline -Flow per page: - 1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG) - 2) render_translations() -> page_translated.png - 3) Pack CBZ with originals + rendered pages + text outputs - -Folder structure: - / - ├── 000.png - ├── 001.png - └── translated/ - ├── 000/ - │ ├── output.txt - │ ├── bubbles.json - │ ├── page_translated.png - │ └── debug_clusters.png (optional) - ├── 001/ - │ └── ... - └── ... - -CBZ: - - pages/ - - rendered/_translated.png - - translations/_output.txt +Usage: + python pipeline.py /path/to/chapter/folder """ import os import sys +import argparse import zipfile import importlib.util from pathlib import Path - # ───────────────────────────────────────────── # CONFIG # ───────────────────────────────────────────── -CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n" -OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz" - SOURCE_LANG = "en" TARGET_LANG = "ca" -# translator (NEW signature-compatible) +# Translator Settings CONFIDENCE_THRESHOLD = 0.10 MIN_TEXT_LENGTH = 1 -GAP_PX = "auto" # was cluster/proximity in old version +GAP_PX = "auto" FILTER_SFX = True QUALITY_THRESHOLD = 0.50 READING_MODE = "ltr" DEBUG = True -# renderer +# Renderer Settings RENDER_ENABLED = True RENDER_OUTPUT_NAME = "page_translated.png" - -# optional custom font list for renderer -FONT_CANDIDATES = [ +FONT_CANDIDATES = [ "fonts/ComicNeue-Regular.ttf", "fonts/ComicRelief-Regular.ttf" ] - # ───────────────────────────────────────────── # DYNAMIC MODULE LOADER # ───────────────────────────────────────────── @@ -76,7 +49,6 @@ def load_module(name, filepath): spec.loader.exec_module(module) return module - # ───────────────────────────────────────────── # HELPERS # ───────────────────────────────────────────── @@ -88,80 +60,61 @@ def sorted_pages(chapter_dir): ] return sorted(pages, key=lambda p: p.stem) - def make_page_workdir(chapter_dir, page_stem): workdir = Path(chapter_dir) / "translated" / page_stem workdir.mkdir(parents=True, exist_ok=True) return workdir - def pack_cbz(chapter_dir, translated_dir, output_cbz): exts = {".jpg", ".jpeg", ".png", ".webp"} pages = sorted( - [p for p in Path(chapter_dir).iterdir() - if p.is_file() and p.suffix.lower() in exts], + [p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts], key=lambda p: p.stem ) - txts = sorted( - translated_dir.rglob("output.txt"), - key=lambda p: p.parent.name - ) - - rendered = sorted( - translated_dir.rglob(RENDER_OUTPUT_NAME), - key=lambda p: p.parent.name - ) + txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name) + rendered = sorted(translated_dir.rglob(RENDER_OUTPUT_NAME), key=lambda p: p.parent.name) if not pages: print("⚠️ No original pages found — CBZ not created.") return with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf: - # original pages + # Original pages for img in pages: arcname = f"pages/{img.name}" zf.write(img, arcname) - print(f" 🖼 {arcname}") - - # rendered pages + + # Rendered pages for rp in rendered: arcname = f"rendered/{rp.parent.name}_translated.png" zf.write(rp, arcname) - print(f" 🎨 {arcname}") - - # text outputs + + # Text outputs for txt in txts: arcname = f"translations/{txt.parent.name}_output.txt" zf.write(txt, arcname) - print(f" 📄 {arcname}") - - print( - f"\n✅ CBZ saved → {output_cbz} " - f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)" - ) + print(f"\n✅ CBZ saved → {output_cbz}") + print(f"📦 Contains: {len(pages)} original, {len(rendered)} rendered, {len(txts)} text files.") # ───────────────────────────────────────────── # PER-PAGE PIPELINE # ───────────────────────────────────────────── def process_page(page_path, workdir, translator_module, renderer_module): - """ - Runs translator + renderer for one page. - All generated files are written inside workdir. - """ print(f"\n{'─' * 70}") print(f"PAGE: {page_path.name}") print(f"{'─' * 70}") orig_dir = os.getcwd() try: + # Isolate execution to the specific page's folder os.chdir(workdir) - # 1) translate + # 1) Translate translator_module.translate_manga_text( - image_path= str(page_path.resolve()), + image_path=str(page_path.resolve()), source_lang=SOURCE_LANG, target_lang=TARGET_LANG, confidence_threshold=CONFIDENCE_THRESHOLD, @@ -174,9 +127,9 @@ def process_page(page_path, workdir, translator_module, renderer_module): reading_mode=READING_MODE, debug=DEBUG ) - print(" ✅ translator done") + print(" ✅ Translator done") - # 2) render + # 2) Render if RENDER_ENABLED: renderer_module.render_translations( input_image=str(page_path.resolve()), @@ -185,7 +138,7 @@ def process_page(page_path, workdir, translator_module, renderer_module): bubbles_file="bubbles.json", font_candidates=FONT_CANDIDATES ) - print(" ✅ renderer done") + print(" ✅ Renderer done") return True @@ -196,44 +149,52 @@ def process_page(page_path, workdir, translator_module, renderer_module): finally: os.chdir(orig_dir) - # ───────────────────────────────────────────── # MAIN # ───────────────────────────────────────────── def main(): - print("Loading modules...") + parser = argparse.ArgumentParser(description="Manga Translation Pipeline") + parser.add_argument("chapter_dir", help="Path to the folder containing manga pages") + args = parser.parse_args() + chapter_dir = Path(args.chapter_dir).resolve() + output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz" + + print("Loading modules...") + + # Ensure we are loading from the directory where pipeline.py is located + script_dir = Path(__file__).parent + try: - translator = load_module("manga_translator", "manga-translator.py") + translator = load_module("manga_translator", str(script_dir / "manga-translator.py")) except Exception as e: print(f"❌ Could not load manga-translator.py: {e}") sys.exit(1) try: - renderer = load_module("manga_renderer", "manga-renderer.py") + renderer = load_module("manga_renderer", str(script_dir / "manga-renderer.py")) except Exception as e: print(f"❌ Could not load manga-renderer.py: {e}") sys.exit(1) - pages = sorted_pages(CHAPTER_DIR) + pages = sorted_pages(chapter_dir) if not pages: - print(f"❌ No images found in: {CHAPTER_DIR}") + print(f"❌ No images found in: {chapter_dir}") sys.exit(1) - print(f"\n📖 Chapter : {CHAPTER_DIR}") + print(f"\n📖 Chapter : {chapter_dir}") print(f" Pages : {len(pages)}") print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}") print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n") - translated_dir = Path(CHAPTER_DIR) / "translated" - succeeded = [] - failed = [] + translated_dir = chapter_dir / "translated" + succeeded, failed = [], [] for i, page_path in enumerate(pages, start=1): - print(f"[{i}/{len(pages)}] {page_path.name}") - workdir = make_page_workdir(CHAPTER_DIR, page_path.stem) - ok = process_page(page_path, workdir, translator, renderer) - if ok: + print(f"[{i}/{len(pages)}] Processing...") + workdir = make_page_workdir(chapter_dir, page_path.stem) + + if process_page(page_path, workdir, translator, renderer): succeeded.append(page_path.name) else: failed.append(page_path.name) @@ -248,8 +209,7 @@ def main(): print(f"{'═' * 70}\n") print("Packing CBZ...") - pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ) - + pack_cbz(chapter_dir, translated_dir, output_cbz) if __name__ == "__main__": main()