Started pipelines, render not working

2026-04-16 21:17:00 +02:00
parent 39765a6cf1
commit beb8557e19
2 changed files with 172 additions and 39 deletions
--- a/pipeline-translator.py
+++ b/pipeline-translator.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+"""
+pipeline.py
+───────────────────────────────────────────────────────────────
+Translation OCR pipeline (No Rendering)
+
+Usage:
+  python pipeline.py /path/to/chapter/folder
+"""
+
+import os
+import sys
+import argparse
+import zipfile
+import importlib.util
+from pathlib import Path
+
+# ─────────────────────────────────────────────
+#  CONFIG
+# ─────────────────────────────────────────────
+SOURCE_LANG = "en"
+TARGET_LANG = "ca"
+
+# Translator Settings
+CONFIDENCE_THRESHOLD = 0.10
+MIN_TEXT_LENGTH      = 1
+GAP_PX               = "auto"
+FILTER_SFX           = True
+QUALITY_THRESHOLD    = 0.50
+READING_MODE         = "ltr"
+DEBUG                = True
+
+# ─────────────────────────────────────────────
+#  DYNAMIC MODULE LOADER
+# ─────────────────────────────────────────────
+def load_module(name, filepath):
+    spec = importlib.util.spec_from_file_location(name, filepath)
+    if spec is None or spec.loader is None:
+        raise FileNotFoundError(f"Cannot load spec for {filepath}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+# ─────────────────────────────────────────────
+#  HELPERS
+# ─────────────────────────────────────────────
+def sorted_pages(chapter_dir):
+    exts = {".jpg", ".jpeg", ".png", ".webp"}
+    pages = [
+        p for p in Path(chapter_dir).iterdir()
+        if p.is_file() and p.suffix.lower() in exts
+    ]
+    return sorted(pages, key=lambda p: p.stem)
+
+def make_page_workdir(chapter_dir, page_stem):
+    workdir = Path(chapter_dir) / "translated" / page_stem
+    workdir.mkdir(parents=True, exist_ok=True)
+    return workdir
+
+def pack_cbz(chapter_dir, translated_dir, output_cbz):
+    exts = {".jpg", ".jpeg", ".png", ".webp"}
+
+    pages = sorted(
+        [p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts],
+        key=lambda p: p.stem
+    )
+
+    txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name)
+    jsons = sorted(translated_dir.rglob("bubbles.json"), key=lambda p: p.parent.name)
+
+    if not pages:
+        print("⚠️  No original pages found — CBZ not created.")
+        return
+
+    with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
+        # Original pages
+        for img in pages:
+            arcname = f"pages/{img.name}"
+            zf.write(img, arcname)
+            
+        # Text outputs
+        for txt in txts:
+            arcname = f"translations/{txt.parent.name}_output.txt"
+            zf.write(txt, arcname)
+
+        # JSON outputs
+        for j in jsons:
+            arcname = f"data/{j.parent.name}_bubbles.json"
+            zf.write(j, arcname)
+
+    print(f"\n✅ CBZ saved → {output_cbz}")
+    print(f"📦 Contains: {len(pages)} original pages, {len(txts)} text files, {len(jsons)} JSON files.")
+
+# ─────────────────────────────────────────────
+#  PER-PAGE PIPELINE
+# ─────────────────────────────────────────────
+def process_page(page_path, workdir, translator_module):
+    print(f"\n{'─' * 70}")
+    print(f"PAGE: {page_path.name}")
+    print(f"{'─' * 70}")
+
+    orig_dir = os.getcwd()
+    try:
+        # Isolate execution to the specific page's folder
+        os.chdir(workdir)
+
+        # 1) Translate
+        translator_module.translate_manga_text(
+            image_path=str(page_path.resolve()),
+            source_lang=SOURCE_LANG,
+            target_lang=TARGET_LANG,
+            confidence_threshold=CONFIDENCE_THRESHOLD,
+            min_text_length=MIN_TEXT_LENGTH,
+            gap_px=GAP_PX,
+            filter_sound_effects=FILTER_SFX,
+            quality_threshold=QUALITY_THRESHOLD,
+            export_to_file="output.txt",
+            export_bubbles_to="bubbles.json",
+            reading_mode=READING_MODE,
+            debug=DEBUG
+        )
+        print("  ✅ Translator done")
+
+        return True
+
+    except Exception as e:
+        print(f"  ❌ Failed: {e}")
+        return False
+
+    finally:
+        os.chdir(orig_dir)
+
+# ─────────────────────────────────────────────
+#  MAIN
+# ─────────────────────────────────────────────
+def main():
+    parser = argparse.ArgumentParser(description="Manga Translation OCR Pipeline")
+    parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
+    args = parser.parse_args()
+
+    chapter_dir = Path(args.chapter_dir).resolve()
+    output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz"
+
+    print("Loading modules...")
+    
+    # Ensure we are loading from the directory where pipeline.py is located
+    script_dir = Path(__file__).parent
+    
+    try:
+        translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
+    except Exception as e:
+        print(f"❌ Could not load manga-translator.py: {e}")
+        sys.exit(1)
+
+    pages = sorted_pages(chapter_dir)
+    if not pages:
+        print(f"❌ No images found in: {chapter_dir}")
+        sys.exit(1)
+
+    print(f"\n📖 Chapter : {chapter_dir}")
+    print(f"   Pages   : {len(pages)}")
+    print(f"   Source  : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
+
+    translated_dir = chapter_dir / "translated"
+    succeeded, failed = [], []
+
+    for i, page_path in enumerate(pages, start=1):
+        print(f"[{i}/{len(pages)}] Processing...")
+        workdir = make_page_workdir(chapter_dir, page_path.stem)
+        
+        if process_page(page_path, workdir, translator):
+            succeeded.append(page_path.name)
+        else:
+            failed.append(page_path.name)
+
+    print(f"\n{'═' * 70}")
+    print("PIPELINE COMPLETE")
+    print(f"✅ {len(succeeded)} page(s) succeeded")
+    if failed:
+        print(f"❌ {len(failed)} page(s) failed:")
+        for f in failed:
+            print(f"   • {f}")
+    print(f"{'═' * 70}\n")
+
+    print("Packing CBZ...")
+    pack_cbz(chapter_dir, translated_dir, output_cbz)
+
+if __name__ == "__main__":
+    main()