#!/usr/bin/env python3 """ pipeline-translator.py ─────────────────────────────────────────────────────────────── Translation OCR pipeline (Batch Processing Only) Usage: python pipeline-translator.py /path/to/chapter/folder """ import os import sys import argparse import importlib.util from pathlib import Path # ───────────────────────────────────────────── # DYNAMIC MODULE LOADER # ───────────────────────────────────────────── def load_module(name, filepath): spec = importlib.util.spec_from_file_location(name, filepath) if spec is None or spec.loader is None: raise FileNotFoundError(f"Cannot load spec for {filepath}") module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module # ───────────────────────────────────────────── # HELPERS # ───────────────────────────────────────────── def sorted_pages(chapter_dir): exts = {".jpg", ".jpeg", ".png", ".webp"} pages = [ p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts ] return sorted(pages, key=lambda p: p.stem) def make_page_workdir(chapter_dir, page_stem): workdir = Path(chapter_dir) / "translated" / page_stem workdir.mkdir(parents=True, exist_ok=True) return workdir # ───────────────────────────────────────────── # PER-PAGE PIPELINE # ───────────────────────────────────────────── def process_page(page_path, workdir, translator_module): print(f"\n{'─' * 70}") print(f"PAGE: {page_path.name}") print(f"{'─' * 70}") orig_dir = os.getcwd() try: # Isolate execution to the specific page's folder os.chdir(workdir) print(" ⏳ Extracting text and translating...") # 1) Translate using ONLY the required path arguments. # This forces the function to use its own internal default variables # (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py translator_module.translate_manga_text( image_path=str(page_path.resolve()), export_to_file="output.txt", export_bubbles_to="bubbles.json" ) print(" ✅ Translation and OCR data saved successfully") return True except Exception as e: print(f" ❌ Failed: {e}") return False finally: os.chdir(orig_dir) # ───────────────────────────────────────────── # MAIN # ───────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline") parser.add_argument("chapter_dir", help="Path to the folder containing manga pages") args = parser.parse_args() chapter_dir = Path(args.chapter_dir).resolve() print("Loading translator module...") script_dir = Path(__file__).parent try: translator = load_module("manga_translator", str(script_dir / "manga-translator.py")) except Exception as e: print(f"❌ Could not load manga-translator.py: {e}") sys.exit(1) pages = sorted_pages(chapter_dir) if not pages: print(f"❌ No images found in: {chapter_dir}") sys.exit(1) print(f"\n📖 Chapter : {chapter_dir.name}") print(f" Pages : {len(pages)}") print(" Note : Using translation settings directly from manga-translator.py\n") succeeded, failed = [], [] for i, page_path in enumerate(pages, start=1): print(f"[{i}/{len(pages)}] Processing...") workdir = make_page_workdir(chapter_dir, page_path.stem) if process_page(page_path, workdir, translator): succeeded.append(page_path.name) else: failed.append(page_path.name) print(f"\n{'═' * 70}") print("PIPELINE COMPLETE") print(f"✅ {len(succeeded)} page(s) succeeded") if failed: print(f"❌ {len(failed)} page(s) failed:") for f in failed: print(f" • {f}") print(f"{'═' * 70}\n") if __name__ == "__main__": main()