ADded pipeline-translator.py

2026-04-21 20:09:11 +02:00
parent 2fb5e9eb7b
commit ba5f001e75
1 changed files with 14 additions and 76 deletions
--- a/pipeline-translator.py
+++ b/pipeline-translator.py
@@ -1,35 +1,19 @@
 #!/usr/bin/env python3
 """
-pipeline.py
+pipeline-translator.py
 ───────────────────────────────────────────────────────────────
-Translation OCR pipeline (No Rendering)
+Translation OCR pipeline (Batch Processing Only)

 Usage:
-  python pipeline.py /path/to/chapter/folder
+  python pipeline-translator.py /path/to/chapter/folder
 """

 import os
 import sys
 import argparse
-import zipfile
 import importlib.util
 from pathlib import Path

-# ─────────────────────────────────────────────
-#  CONFIG
-# ─────────────────────────────────────────────
-SOURCE_LANG = "en"
-TARGET_LANG = "ca"
-
-# Translator Settings
-CONFIDENCE_THRESHOLD = 0.10
-MIN_TEXT_LENGTH      = 1
-GAP_PX               = "auto"
-FILTER_SFX           = True
-QUALITY_THRESHOLD    = 0.50
-READING_MODE         = "ltr"
-DEBUG                = True
-
 # ─────────────────────────────────────────────
 #  DYNAMIC MODULE LOADER
 # ─────────────────────────────────────────────
@@ -57,40 +41,6 @@ def make_page_workdir(chapter_dir, page_stem):
    workdir.mkdir(parents=True, exist_ok=True)
    return workdir

-def pack_cbz(chapter_dir, translated_dir, output_cbz):
-    exts = {".jpg", ".jpeg", ".png", ".webp"}
-
-    pages = sorted(
-        [p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts],
-        key=lambda p: p.stem
-    )
-
-    txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name)
-    jsons = sorted(translated_dir.rglob("bubbles.json"), key=lambda p: p.parent.name)
-
-    if not pages:
-        print("⚠️  No original pages found — CBZ not created.")
-        return
-
-    with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
-        # Original pages
-        for img in pages:
-            arcname = f"pages/{img.name}"
-            zf.write(img, arcname)
-            
-        # Text outputs
-        for txt in txts:
-            arcname = f"translations/{txt.parent.name}_output.txt"
-            zf.write(txt, arcname)
-
-        # JSON outputs
-        for j in jsons:
-            arcname = f"data/{j.parent.name}_bubbles.json"
-            zf.write(j, arcname)
-
-    print(f"\n✅ CBZ saved → {output_cbz}")
-    print(f"📦 Contains: {len(pages)} original pages, {len(txts)} text files, {len(jsons)} JSON files.")
-
 # ─────────────────────────────────────────────
 #  PER-PAGE PIPELINE
 # ─────────────────────────────────────────────
@@ -104,22 +54,17 @@ def process_page(page_path, workdir, translator_module):
        # Isolate execution to the specific page's folder
        os.chdir(workdir)

-        # 1) Translate
+        print("  ⏳ Extracting text and translating...")
+        
+        # 1) Translate using ONLY the required path arguments.
+        # This forces the function to use its own internal default variables 
+        # (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
        translator_module.translate_manga_text(
            image_path=str(page_path.resolve()),
-            source_lang=SOURCE_LANG,
-            target_lang=TARGET_LANG,
-            confidence_threshold=CONFIDENCE_THRESHOLD,
-            min_text_length=MIN_TEXT_LENGTH,
-            gap_px=GAP_PX,
-            filter_sound_effects=FILTER_SFX,
-            quality_threshold=QUALITY_THRESHOLD,
            export_to_file="output.txt",
-            export_bubbles_to="bubbles.json",
-            reading_mode=READING_MODE,
-            debug=DEBUG
+            export_bubbles_to="bubbles.json"
        )
-        print("  ✅ Translator done")
+        print("  ✅ Translation and OCR data saved successfully")

        return True

@@ -134,16 +79,13 @@ def process_page(page_path, workdir, translator_module):
 #  MAIN
 # ─────────────────────────────────────────────
 def main():
-    parser = argparse.ArgumentParser(description="Manga Translation OCR Pipeline")
+    parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
    parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
    args = parser.parse_args()

    chapter_dir = Path(args.chapter_dir).resolve()
-    output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz"

-    print("Loading modules...")
-    
-    # Ensure we are loading from the directory where pipeline.py is located
+    print("Loading translator module...")
    script_dir = Path(__file__).parent
    
    try:
@@ -157,11 +99,10 @@ def main():
        print(f"❌ No images found in: {chapter_dir}")
        sys.exit(1)

-    print(f"\n📖 Chapter : {chapter_dir}")
+    print(f"\n📖 Chapter : {chapter_dir.name}")
    print(f"   Pages   : {len(pages)}")
-    print(f"   Source  : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
+    print("   Note    : Using translation settings directly from manga-translator.py\n")

-    translated_dir = chapter_dir / "translated"
    succeeded, failed = [], []

    for i, page_path in enumerate(pages, start=1):
@@ -182,8 +123,5 @@ def main():
            print(f"   • {f}")
    print(f"{'═' * 70}\n")

-    print("Packing CBZ...")
-    pack_cbz(chapter_dir, translated_dir, output_cbz)
-
 if __name__ == "__main__":
    main()