diff --git a/pipeline-translator.py b/pipeline-translator.py index 7f7b6f8..c1b9e9d 100644 --- a/pipeline-translator.py +++ b/pipeline-translator.py @@ -1,35 +1,19 @@ #!/usr/bin/env python3 """ -pipeline.py +pipeline-translator.py ─────────────────────────────────────────────────────────────── -Translation OCR pipeline (No Rendering) +Translation OCR pipeline (Batch Processing Only) Usage: - python pipeline.py /path/to/chapter/folder + python pipeline-translator.py /path/to/chapter/folder """ import os import sys import argparse -import zipfile import importlib.util from pathlib import Path -# ───────────────────────────────────────────── -# CONFIG -# ───────────────────────────────────────────── -SOURCE_LANG = "en" -TARGET_LANG = "ca" - -# Translator Settings -CONFIDENCE_THRESHOLD = 0.10 -MIN_TEXT_LENGTH = 1 -GAP_PX = "auto" -FILTER_SFX = True -QUALITY_THRESHOLD = 0.50 -READING_MODE = "ltr" -DEBUG = True - # ───────────────────────────────────────────── # DYNAMIC MODULE LOADER # ───────────────────────────────────────────── @@ -57,40 +41,6 @@ def make_page_workdir(chapter_dir, page_stem): workdir.mkdir(parents=True, exist_ok=True) return workdir -def pack_cbz(chapter_dir, translated_dir, output_cbz): - exts = {".jpg", ".jpeg", ".png", ".webp"} - - pages = sorted( - [p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts], - key=lambda p: p.stem - ) - - txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name) - jsons = sorted(translated_dir.rglob("bubbles.json"), key=lambda p: p.parent.name) - - if not pages: - print("⚠️ No original pages found — CBZ not created.") - return - - with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf: - # Original pages - for img in pages: - arcname = f"pages/{img.name}" - zf.write(img, arcname) - - # Text outputs - for txt in txts: - arcname = f"translations/{txt.parent.name}_output.txt" - zf.write(txt, arcname) - - # JSON outputs - for j in jsons: - arcname = f"data/{j.parent.name}_bubbles.json" - zf.write(j, arcname) - - print(f"\n✅ CBZ saved → {output_cbz}") - print(f"📦 Contains: {len(pages)} original pages, {len(txts)} text files, {len(jsons)} JSON files.") - # ───────────────────────────────────────────── # PER-PAGE PIPELINE # ───────────────────────────────────────────── @@ -104,22 +54,17 @@ def process_page(page_path, workdir, translator_module): # Isolate execution to the specific page's folder os.chdir(workdir) - # 1) Translate + print(" ⏳ Extracting text and translating...") + + # 1) Translate using ONLY the required path arguments. + # This forces the function to use its own internal default variables + # (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py translator_module.translate_manga_text( image_path=str(page_path.resolve()), - source_lang=SOURCE_LANG, - target_lang=TARGET_LANG, - confidence_threshold=CONFIDENCE_THRESHOLD, - min_text_length=MIN_TEXT_LENGTH, - gap_px=GAP_PX, - filter_sound_effects=FILTER_SFX, - quality_threshold=QUALITY_THRESHOLD, export_to_file="output.txt", - export_bubbles_to="bubbles.json", - reading_mode=READING_MODE, - debug=DEBUG + export_bubbles_to="bubbles.json" ) - print(" ✅ Translator done") + print(" ✅ Translation and OCR data saved successfully") return True @@ -134,16 +79,13 @@ def process_page(page_path, workdir, translator_module): # MAIN # ───────────────────────────────────────────── def main(): - parser = argparse.ArgumentParser(description="Manga Translation OCR Pipeline") + parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline") parser.add_argument("chapter_dir", help="Path to the folder containing manga pages") args = parser.parse_args() chapter_dir = Path(args.chapter_dir).resolve() - output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz" - print("Loading modules...") - - # Ensure we are loading from the directory where pipeline.py is located + print("Loading translator module...") script_dir = Path(__file__).parent try: @@ -157,11 +99,10 @@ def main(): print(f"❌ No images found in: {chapter_dir}") sys.exit(1) - print(f"\n📖 Chapter : {chapter_dir}") + print(f"\n📖 Chapter : {chapter_dir.name}") print(f" Pages : {len(pages)}") - print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n") + print(" Note : Using translation settings directly from manga-translator.py\n") - translated_dir = chapter_dir / "translated" succeeded, failed = [], [] for i, page_path in enumerate(pages, start=1): @@ -182,8 +123,5 @@ def main(): print(f" • {f}") print(f"{'═' * 70}\n") - print("Packing CBZ...") - pack_cbz(chapter_dir, translated_dir, output_cbz) - if __name__ == "__main__": main() \ No newline at end of file